From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- .../core_arch/src/arm_shared/neon/generated.rs | 40888 +++++++++++++++++++ .../core_arch/src/arm_shared/neon/load_tests.rs | 206 + .../crates/core_arch/src/arm_shared/neon/mod.rs | 12347 ++++++ .../src/arm_shared/neon/shift_and_insert_tests.rs | 93 + .../core_arch/src/arm_shared/neon/store_tests.rs | 389 + .../src/arm_shared/neon/table_lookup_tests.rs | 1042 + 6 files changed, 54965 insertions(+) create mode 100644 library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs create mode 100644 library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs create mode 100644 library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs create mode 100644 library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs create mode 100644 library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs create mode 100644 library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs (limited to 'library/stdarch/crates/core_arch/src/arm_shared/neon') diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs new file mode 100644 index 000000000..d69fbd8e8 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/generated.rs @@ -0,0 +1,40888 @@ +// This code is automatically generated. DO NOT MODIFY. +// +// Instead, modify `crates/stdarch-gen/neon.spec` and run the following command to re-generate this file: +// +// ``` +// OUT_DIR=`pwd`/crates/core_arch cargo run -p stdarch-gen -- crates/stdarch-gen/neon.spec +// ``` +use super::*; +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vand_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_and(a, b) +} + +/// Vector bitwise and +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vand))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(and))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vandq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_and(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorr_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_or(a, b) +} + +/// Vector bitwise or (immediate, inclusive) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orr))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vorrq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_or(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veor_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_xor(a, b) +} + +/// Vector bitwise exclusive or (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(veor))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(eor))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn veorq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_xor(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v8i8")] + fn vabd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vabd_s8_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v16i8")] + fn vabdq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vabdq_s8_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v4i16")] + fn vabd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vabd_s16_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v8i16")] + fn vabdq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vabdq_s16_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v2i32")] + fn vabd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vabd_s32_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sabd.v4i32")] + fn vabdq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vabdq_s32_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v8i8")] + fn vabd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vabd_u8_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v16i8")] + fn vabdq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vabdq_u8_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v4i16")] + fn vabd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vabd_u16_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v8i16")] + fn vabdq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vabdq_u16_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v2i32")] + fn vabd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vabd_u32_(a, b) +} + +/// Absolute difference between the arguments +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabdu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uabd.v4i32")] + fn vabdq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vabdq_u32_(a, b) +} + +/// Absolute difference between the arguments of Floating +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v2f32")] + fn vabd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vabd_f32_(a, b) +} + +/// Absolute difference between the arguments of Floating +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabd.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabds.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fabd.v4f32")] + fn vabdq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vabdq_f32_(a, b) +} + +/// Unsigned Absolute difference Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabdl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + simd_cast(vabd_u8(a, b)) +} + +/// Unsigned Absolute difference Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabdl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + simd_cast(vabd_u16(a, b)) +} + +/// Unsigned Absolute difference Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabdl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + simd_cast(vabd_u32(a, b)) +} + +/// Signed Absolute difference Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabdl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + let c: uint8x8_t = simd_cast(vabd_s8(a, b)); + simd_cast(c) +} + +/// Signed Absolute difference Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabdl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + let c: uint16x4_t = simd_cast(vabd_s16(a, b)); + simd_cast(c) +} + +/// Signed Absolute difference Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabdl.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabdl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabdl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + let c: uint32x2_t = simd_cast(vabd_s32(a, b)); + simd_cast(c) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + simd_eq(a, b) +} + +/// Compare bitwise Equal (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + simd_eq(a, b) +} + +/// Floating-point compare equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceq_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_eq(a, b) +} + +/// Floating-point compare equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vceq.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmeq))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vceqq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_eq(a, b) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + let c: int8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + let c: int8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + let c: int16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + let c: int16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + let c: int32x2_t = simd_and(a, b); + let d: i32x2 = i32x2::new(0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + let c: int32x4_t = simd_and(a, b); + let d: i32x4 = i32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_p8(a: poly8x8_t, b: poly8x8_t) -> uint8x8_t { + let c: poly8x8_t = simd_and(a, b); + let d: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_p8(a: poly8x16_t, b: poly8x16_t) -> uint8x16_t { + let c: poly8x16_t = simd_and(a, b); + let d: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_p16(a: poly16x4_t, b: poly16x4_t) -> uint16x4_t { + let c: poly16x4_t = simd_and(a, b); + let d: i16x4 = i16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Signed compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_p16(a: poly16x8_t, b: poly16x8_t) -> uint16x8_t { + let c: poly16x8_t = simd_and(a, b); + let d: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Unsigned compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c: uint8x8_t = simd_and(a, b); + let d: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Unsigned compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c: uint8x16_t = simd_and(a, b); + let d: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Unsigned compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c: uint16x4_t = simd_and(a, b); + let d: u16x4 = u16x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Unsigned compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c: uint16x8_t = simd_and(a, b); + let d: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Unsigned compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtst_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c: uint32x2_t = simd_and(a, b); + let d: u32x2 = u32x2::new(0, 0); + simd_ne(c, transmute(d)) +} + +/// Unsigned compare bitwise Test bits nonzero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtst))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmtst))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c: uint32x4_t = simd_and(a, b); + let d: u32x4 = u32x4::new(0, 0, 0, 0); + simd_ne(c, transmute(d)) +} + +/// Floating-point absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabs_f32(a: float32x2_t) -> float32x2_t { + simd_fabs(a) +} + +/// Floating-point absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabsq_f32(a: float32x4_t) -> float32x4_t { + simd_fabs(a) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_gt(a, b) +} + +/// Compare signed greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_gt(a, b) +} + +/// Compare unsigned highe +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_gt(a, b) +} + +/// Floating-point compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_gt(a, b) +} + +/// Floating-point compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_gt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_lt(a, b) +} + +/// Compare signed less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_lt(a, b) +} + +/// Compare unsigned less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhi))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_lt(a, b) +} + +/// Floating-point compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_lt(a, b) +} + +/// Floating-point compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_lt(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_le(a, b) +} + +/// Compare signed less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_le(a, b) +} + +/// Compare unsigned less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_le(a, b) +} + +/// Floating-point compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcle_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_le(a, b) +} + +/// Floating-point compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_le(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_s8(a: int8x8_t, b: int8x8_t) -> uint8x8_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_s8(a: int8x16_t, b: int8x16_t) -> uint8x16_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_s16(a: int16x4_t, b: int16x4_t) -> uint16x4_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_s16(a: int16x8_t, b: int16x8_t) -> uint16x8_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_s32(a: int32x2_t, b: int32x2_t) -> uint32x2_t { + simd_ge(a, b) +} + +/// Compare signed greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_s32(a: int32x4_t, b: int32x4_t) -> uint32x4_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_ge(a, b) +} + +/// Compare unsigned greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cmhs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_ge(a, b) +} + +/// Floating-point compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcge_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + simd_ge(a, b) +} + +/// Floating-point compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcmge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcgeq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + simd_ge(a, b) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcls_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i8")] + fn vcls_s8_(a: int8x8_t) -> int8x8_t; + } +vcls_s8_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclsq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v16i8")] + fn vclsq_s8_(a: int8x16_t) -> int8x16_t; + } +vclsq_s8_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcls_s16(a: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i16")] + fn vcls_s16_(a: int16x4_t) -> int16x4_t; + } +vcls_s16_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclsq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v8i16")] + fn vclsq_s16_(a: int16x8_t) -> int16x8_t; + } +vclsq_s16_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcls_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v2i32")] + fn vcls_s32_(a: int32x2_t) -> int32x2_t; + } +vcls_s32_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vcls.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclsq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcls.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.cls.v4i32")] + fn vclsq_s32_(a: int32x4_t) -> int32x4_t; + } +vclsq_s32_(a) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcls_u8(a: uint8x8_t) -> int8x8_t { + transmute(vcls_s8(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclsq_u8(a: uint8x16_t) -> int8x16_t { + transmute(vclsq_s8(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcls_u16(a: uint16x4_t) -> int16x4_t { + transmute(vcls_s16(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclsq_u16(a: uint16x8_t) -> int16x8_t { + transmute(vclsq_s16(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcls_u32(a: uint32x2_t) -> int32x2_t { + transmute(vcls_s32(transmute(a))) +} + +/// Count leading sign bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcls))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclsq_u32(a: uint32x4_t) -> int32x4_t { + transmute(vclsq_s32(transmute(a))) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclz_s8(a: int8x8_t) -> int8x8_t { + vclz_s8_(a) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclzq_s8(a: int8x16_t) -> int8x16_t { + vclzq_s8_(a) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclz_s16(a: int16x4_t) -> int16x4_t { + vclz_s16_(a) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclzq_s16(a: int16x8_t) -> int16x8_t { + vclzq_s16_(a) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclz_s32(a: int32x2_t) -> int32x2_t { + vclz_s32_(a) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclzq_s32(a: int32x4_t) -> int32x4_t { + vclzq_s32_(a) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclz_u8(a: uint8x8_t) -> uint8x8_t { + transmute(vclz_s8_(transmute(a))) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclzq_u8(a: uint8x16_t) -> uint8x16_t { + transmute(vclzq_s8_(transmute(a))) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclz_u16(a: uint16x4_t) -> uint16x4_t { + transmute(vclz_s16_(transmute(a))) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclzq_u16(a: uint16x8_t) -> uint16x8_t { + transmute(vclzq_s16_(transmute(a))) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclz_u32(a: uint32x2_t) -> uint32x2_t { + transmute(vclz_s32_(transmute(a))) +} + +/// Count leading zero bits +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vclz.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(clz))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vclzq_u32(a: uint32x4_t) -> uint32x4_t { + transmute(vclzq_s32_(transmute(a))) +} + +/// Floating-point absolute compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcagt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v2i32.v2f32")] + fn vcagt_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t; + } +vcagt_f32_(a, b) +} + +/// Floating-point absolute compare greater than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcagtq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacgt.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facgt.v4i32.v4f32")] + fn vcagtq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t; + } +vcagtq_f32_(a, b) +} + +/// Floating-point absolute compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcage_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v2i32.v2f32")] + fn vcage_f32_(a: float32x2_t, b: float32x2_t) -> uint32x2_t; + } +vcage_f32_(a, b) +} + +/// Floating-point absolute compare greater than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcageq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vacge.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.facge.v4i32.v4f32")] + fn vcageq_f32_(a: float32x4_t, b: float32x4_t) -> uint32x4_t; + } +vcageq_f32_(a, b) +} + +/// Floating-point absolute compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcalt_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + vcagt_f32(b, a) +} + +/// Floating-point absolute compare less than +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacgt.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facgt))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcaltq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + vcagtq_f32(b, a) +} + +/// Floating-point absolute compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcale_f32(a: float32x2_t, b: float32x2_t) -> uint32x2_t { + vcage_f32(b, a) +} + +/// Floating-point absolute compare less than or equal +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vacge.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(facge))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcaleq_f32(a: float32x4_t, b: float32x4_t) -> uint32x4_t { + vcageq_f32(b, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_s8(a: u64) -> int8x8_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_s16(a: u64) -> int16x4_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_s32(a: u64) -> int32x2_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_s64(a: u64) -> int64x1_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_u8(a: u64) -> uint8x8_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_u16(a: u64) -> uint16x4_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_u32(a: u64) -> uint32x2_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_u64(a: u64) -> uint64x1_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_p8(a: u64) -> poly8x8_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_p16(a: u64) -> poly16x4_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_p64(a: u64) -> poly64x1_t { + transmute(a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcreate_f32(a: u64) -> float32x2_t { + transmute(a) +} + +/// Fixed-point convert to floating-point +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(scvtf))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvt_f32_s32(a: int32x2_t) -> float32x2_t { + simd_cast(a) +} + +/// Fixed-point convert to floating-point +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(scvtf))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvtq_f32_s32(a: int32x4_t) -> float32x4_t { + simd_cast(a) +} + +/// Fixed-point convert to floating-point +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ucvtf))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvt_f32_u32(a: uint32x2_t) -> float32x2_t { + simd_cast(a) +} + +/// Fixed-point convert to floating-point +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ucvtf))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvtq_f32_u32(a: uint32x4_t) -> float32x4_t { + simd_cast(a) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvt_n_f32_s32(a: int32x2_t) -> float32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxs2fp.v2f32.v2i32")] + fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t; + } +vcvt_n_f32_s32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvt_n_f32_s32(a: int32x2_t) -> float32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v2f32.v2i32")] + fn vcvt_n_f32_s32_(a: int32x2_t, n: i32) -> float32x2_t; + } +vcvt_n_f32_s32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvtq_n_f32_s32(a: int32x4_t) -> float32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxs2fp.v4f32.v4i32")] + fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t; + } +vcvtq_n_f32_s32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(scvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtq_n_f32_s32(a: int32x4_t) -> float32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxs2fp.v4f32.v4i32")] + fn vcvtq_n_f32_s32_(a: int32x4_t, n: i32) -> float32x4_t; + } +vcvtq_n_f32_s32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvt_n_f32_u32(a: uint32x2_t) -> float32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxu2fp.v2f32.v2i32")] + fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t; + } +vcvt_n_f32_u32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvt_n_f32_u32(a: uint32x2_t) -> float32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v2f32.v2i32")] + fn vcvt_n_f32_u32_(a: uint32x2_t, n: i32) -> float32x2_t; + } +vcvt_n_f32_u32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfxu2fp.v4f32.v4i32")] + fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t; + } +vcvtq_n_f32_u32_(a, N) +} + +/// Fixed-point convert to floating-point +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ucvtf, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtq_n_f32_u32(a: uint32x4_t) -> float32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfxu2fp.v4f32.v4i32")] + fn vcvtq_n_f32_u32_(a: uint32x4_t, n: i32) -> float32x4_t; + } +vcvtq_n_f32_u32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvt_n_s32_f32(a: float32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxs.v2i32.v2f32")] + fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t; + } +vcvt_n_s32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvt_n_s32_f32(a: float32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v2i32.v2f32")] + fn vcvt_n_s32_f32_(a: float32x2_t, n: i32) -> int32x2_t; + } +vcvt_n_s32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxs.v4i32.v4f32")] + fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t; + } +vcvtq_n_s32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzs, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtq_n_s32_f32(a: float32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxs.v4i32.v4f32")] + fn vcvtq_n_s32_f32_(a: float32x4_t, n: i32) -> int32x4_t; + } +vcvtq_n_s32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvt_n_u32_f32(a: float32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxu.v2i32.v2f32")] + fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t; + } +vcvt_n_u32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvt_n_u32_f32(a: float32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v2i32.v2f32")] + fn vcvt_n_u32_f32_(a: float32x2_t, n: i32) -> uint32x2_t; + } +vcvt_n_u32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vcvt, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vcvtfp2fxu.v4i32.v4f32")] + fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t; + } +vcvtq_n_u32_f32_(a, N) +} + +/// Floating-point convert to fixed-point, rounding toward zero +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(fcvtzu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vcvtq_n_u32_f32(a: float32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.vcvtfp2fxu.v4i32.v4f32")] + fn vcvtq_n_u32_f32_(a: float32x4_t, n: i32) -> uint32x4_t; + } +vcvtq_n_u32_f32_(a, N) +} + +/// Floating-point convert to signed fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvt_s32_f32(a: float32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v2i32.v2f32")] + fn vcvt_s32_f32_(a: float32x2_t) -> int32x2_t; + } +vcvt_s32_f32_(a) +} + +/// Floating-point convert to signed fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvtq_s32_f32(a: float32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptosi.sat.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptosi.sat.v4i32.v4f32")] + fn vcvtq_s32_f32_(a: float32x4_t) -> int32x4_t; + } +vcvtq_s32_f32_(a) +} + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvt_u32_f32(a: float32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v2i32.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v2i32.v2f32")] + fn vcvt_u32_f32_(a: float32x2_t) -> uint32x2_t; + } +vcvt_u32_f32_(a) +} + +/// Floating-point convert to unsigned fixed-point, rounding toward zero +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcvt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fcvtzu))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fptoui.sat.v4i32.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fptoui.sat.v4i32.v4f32")] + fn vcvtq_u32_f32_(a: float32x4_t) -> uint32x4_t; + } +vcvtq_u32_f32_(a) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm4!(N); + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm2!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm3!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm1!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm2!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_s8(a: int8x16_t) -> int8x8_t { + static_assert_imm4!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_s16(a: int16x8_t) -> int16x4_t { + static_assert_imm3!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_s32(a: int32x4_t) -> int32x2_t { + static_assert_imm2!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_s8(a: int8x8_t) -> int8x16_t { + static_assert_imm3!(N); + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_s16(a: int16x4_t) -> int16x8_t { + static_assert_imm2!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_s32(a: int32x2_t) -> int32x4_t { + static_assert_imm1!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(N); + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_u8(a: uint8x16_t) -> uint8x8_t { + static_assert_imm4!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_u16(a: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_u32(a: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_u8(a: uint8x8_t) -> uint8x16_t { + static_assert_imm3!(N); + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_u16(a: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_u32(a: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_p8(a: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_p8(a: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(N); + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_p16(a: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_p16(a: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 8))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_p8(a: poly8x16_t) -> poly8x8_t { + static_assert_imm4!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_p16(a: poly16x8_t) -> poly16x4_t { + static_assert_imm3!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 4))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_p8(a: poly8x8_t) -> poly8x16_t { + static_assert_imm3!(N); + simd_shuffle16!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_p16(a: poly16x4_t) -> poly16x8_t { + static_assert_imm2!(N); + simd_shuffle8!(a, a, [N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm1!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_s64(a: int64x1_t) -> int64x2_t { + static_assert!(N : i32 where N == 0); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 0))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_u64(a: uint64x1_t) -> uint64x2_t { + static_assert!(N : i32 where N == 0); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_f32(a: float32x2_t) -> float32x2_t { + static_assert_imm1!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_laneq_f32(a: float32x4_t) -> float32x4_t { + static_assert_imm2!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_f32(a: float32x4_t) -> float32x2_t { + static_assert_imm2!(N); + simd_shuffle2!(a, a, [N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdupq_lane_f32(a: float32x2_t) -> float32x4_t { + static_assert_imm1!(N); + simd_shuffle4!(a, a, [N as u32, N as u32, N as u32, N as u32]) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 0))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_lane_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_s64(a: int64x2_t) -> int64x1_t { + static_assert_imm1!(N); + transmute::(simd_extract(a, N as u32)) +} + +/// Set all vector lanes to the same value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, N = 1))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vdup_laneq_u64(a: uint64x2_t) -> uint64x1_t { + static_assert_imm1!(N); + transmute::(simd_extract(a, N as u32)) +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 8))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert_imm4!(N); + match N & 0b1111 { + 0 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 8))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(N); + match N & 0b1111 { + 0 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 8))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(N); + match N & 0b1111 { + 0 => simd_shuffle16!(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]), + 1 => simd_shuffle16!(a, b, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]), + 2 => simd_shuffle16!(a, b, [2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]), + 3 => simd_shuffle16!(a, b, [3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18]), + 4 => simd_shuffle16!(a, b, [4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]), + 5 => simd_shuffle16!(a, b, [5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]), + 6 => simd_shuffle16!(a, b, [6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21]), + 7 => simd_shuffle16!(a, b, [7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22]), + 8 => simd_shuffle16!(a, b, [8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23]), + 9 => simd_shuffle16!(a, b, [9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]), + 10 => simd_shuffle16!(a, b, [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25]), + 11 => simd_shuffle16!(a, b, [11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26]), + 12 => simd_shuffle16!(a, b, [12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]), + 13 => simd_shuffle16!(a, b, [13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28]), + 14 => simd_shuffle16!(a, b, [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29]), + 15 => simd_shuffle16!(a, b, [15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 4))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(N); + match N & 0b111 { + 0 => simd_shuffle8!(a, b, [0, 1, 2, 3, 4, 5, 6, 7]), + 1 => simd_shuffle8!(a, b, [1, 2, 3, 4, 5, 6, 7, 8]), + 2 => simd_shuffle8!(a, b, [2, 3, 4, 5, 6, 7, 8, 9]), + 3 => simd_shuffle8!(a, b, [3, 4, 5, 6, 7, 8, 9, 10]), + 4 => simd_shuffle8!(a, b, [4, 5, 6, 7, 8, 9, 10, 11]), + 5 => simd_shuffle8!(a, b, [5, 6, 7, 8, 9, 10, 11, 12]), + 6 => simd_shuffle8!(a, b, [6, 7, 8, 9, 10, 11, 12, 13]), + 7 => simd_shuffle8!(a, b, [7, 8, 9, 10, 11, 12, 13, 14]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmov, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vext_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_imm1!(N); + match N & 0b1 { + 0 => simd_shuffle2!(a, b, [0, 1]), + 1 => simd_shuffle2!(a, b, [1, 2]), + _ => unreachable_unchecked(), + } +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vext.8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vextq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_imm2!(N); + match N & 0b11 { + 0 => simd_shuffle4!(a, b, [0, 1, 2, 3]), + 1 => simd_shuffle4!(a, b, [1, 2, 3, 4]), + 2 => simd_shuffle4!(a, b, [2, 3, 4, 5]), + 3 => simd_shuffle4!(a, b, [3, 4, 5, 6]), + _ => unreachable_unchecked(), + } +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Floating-point multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + simd_add(a, simd_mul(b, c)) +} + +/// Floating-point multiply-add to accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + simd_add(a, simd_mul(b, c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmla_s16(a, b, vdup_n_s16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlaq_s16(a, b, vdupq_n_s16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmla_s32(a, b, vdup_n_s32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlaq_s32(a, b, vdupq_n_s32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmla_u16(a, b, vdup_n_u16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlaq_u16(a, b, vdupq_n_u16(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmla_u32(a, b, vdup_n_u32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlaq_u32(a, b, vdupq_n_u32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmla_f32(a, b, vdup_n_f32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlaq_f32(a, b, vdupq_n_f32(c)) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + vmla_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vmla_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + vmlaq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vmlaq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + vmla_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vmla_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + vmlaq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlaq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + vmla_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + vmla_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + vmlaq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + vmlaq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + vmla_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + vmla_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + vmlaq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mla, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlaq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + vmla_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmla_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + vmla_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + vmlaq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmla.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlaq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + vmlaq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Signed multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + simd_add(a, vmull_s8(b, c)) +} + +/// Signed multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + simd_add(a, vmull_s16(b, c)) +} + +/// Signed multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + simd_add(a, vmull_s32(b, c)) +} + +/// Unsigned multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + simd_add(a, vmull_u8(b, c)) +} + +/// Unsigned multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + simd_add(a, vmull_u16(b, c)) +} + +/// Unsigned multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + simd_add(a, vmull_u32(b, c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlal_s16(a, b, vdup_n_s16(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlal_s32(a, b, vdup_n_s32(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlal_u16(a, b, vdup_n_u16(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlal_u32(a, b, vdup_n_u32(c)) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlal_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlal_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlal_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlal_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlal_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlal_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlal_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlal.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlal, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlal_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlal_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Floating-point multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Floating-point multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + simd_sub(a, simd_mul(b, c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_n_s16(a: int16x4_t, b: int16x4_t, c: i16) -> int16x4_t { + vmls_s16(a, b, vdup_n_s16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_n_s16(a: int16x8_t, b: int16x8_t, c: i16) -> int16x8_t { + vmlsq_s16(a, b, vdupq_n_s16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_n_s32(a: int32x2_t, b: int32x2_t, c: i32) -> int32x2_t { + vmls_s32(a, b, vdup_n_s32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_n_s32(a: int32x4_t, b: int32x4_t, c: i32) -> int32x4_t { + vmlsq_s32(a, b, vdupq_n_s32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_n_u16(a: uint16x4_t, b: uint16x4_t, c: u16) -> uint16x4_t { + vmls_u16(a, b, vdup_n_u16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_n_u16(a: uint16x8_t, b: uint16x8_t, c: u16) -> uint16x8_t { + vmlsq_u16(a, b, vdupq_n_u16(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_n_u32(a: uint32x2_t, b: uint32x2_t, c: u32) -> uint32x2_t { + vmls_u32(a, b, vdup_n_u32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_n_u32(a: uint32x4_t, b: uint32x4_t, c: u32) -> uint32x4_t { + vmlsq_u32(a, b, vdupq_n_u32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vmls_f32(a, b, vdup_n_f32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vmlsq_f32(a, b, vdupq_n_f32(c)) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_lane_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + vmls_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_laneq_s16(a: int16x4_t, b: int16x4_t, c: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vmls_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_lane_s16(a: int16x8_t, b: int16x8_t, c: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + vmlsq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_laneq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vmlsq_s16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_lane_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + vmls_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_laneq_s32(a: int32x2_t, b: int32x2_t, c: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vmls_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_lane_s32(a: int32x4_t, b: int32x4_t, c: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + vmlsq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_laneq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsq_s32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_lane_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + vmls_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_laneq_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + vmls_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_lane_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + vmlsq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_laneq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + vmlsq_u16(a, b, simd_shuffle8!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_lane_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + vmls_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_laneq_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + vmls_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_lane_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + vmlsq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.i32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mls, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_laneq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsq_u32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_lane_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + vmls_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmls_laneq_f32(a: float32x2_t, b: float32x2_t, c: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + vmls_f32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_lane_f32(a: float32x4_t, b: float32x4_t, c: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + vmlsq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmls.f32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsq_laneq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + vmlsq_f32(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Signed multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + simd_sub(a, vmull_s8(b, c)) +} + +/// Signed multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + simd_sub(a, vmull_s16(b, c)) +} + +/// Signed multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + simd_sub(a, vmull_s32(b, c)) +} + +/// Unsigned multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + simd_sub(a, vmull_u8(b, c)) +} + +/// Unsigned multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + simd_sub(a, vmull_u16(b, c)) +} + +/// Unsigned multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + simd_sub(a, vmull_u32(b, c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vmlsl_s16(a, b, vdup_n_s16(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vmlsl_s32(a, b, vdup_n_s32(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_n_u16(a: uint32x4_t, b: uint16x4_t, c: u16) -> uint32x4_t { + vmlsl_u16(a, b, vdup_n_u16(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_n_u32(a: uint64x2_t, b: uint32x2_t, c: u32) -> uint64x2_t { + vmlsl_u32(a, b, vdup_n_u32(c)) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmlsl_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_laneq_s16(a: int32x4_t, b: int16x4_t, c: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmlsl_s16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmlsl_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.s32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_laneq_s32(a: int64x2_t, b: int32x2_t, c: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmlsl_s32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_lane_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmlsl_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u16", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_laneq_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmlsl_u16(a, b, simd_shuffle4!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_lane_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmlsl_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Vector widening multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmlsl.u32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umlsl, LANE = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmlsl_laneq_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmlsl_u32(a, b, simd_shuffle2!(c, c, [LANE as u32, LANE as u32])) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vneg_s8(a: int8x8_t) -> int8x8_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vnegq_s8(a: int8x16_t) -> int8x16_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vneg_s16(a: int16x4_t) -> int16x4_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vnegq_s16(a: int16x8_t) -> int16x8_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vneg_s32(a: int32x2_t) -> int32x2_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(neg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vnegq_s32(a: int32x4_t) -> int32x4_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vneg_f32(a: float32x2_t) -> float32x2_t { + simd_neg(a) +} + +/// Negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vneg.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vnegq_f32(a: float32x4_t) -> float32x4_t { + simd_neg(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqneg_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i8")] + fn vqneg_s8_(a: int8x8_t) -> int8x8_t; + } +vqneg_s8_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqnegq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v16i8")] + fn vqnegq_s8_(a: int8x16_t) -> int8x16_t; + } +vqnegq_s8_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqneg_s16(a: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i16")] + fn vqneg_s16_(a: int16x4_t) -> int16x4_t; + } +vqneg_s16_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqnegq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v8i16")] + fn vqnegq_s16_(a: int16x8_t) -> int16x8_t; + } +vqnegq_s16_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqneg_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v2i32")] + fn vqneg_s32_(a: int32x2_t) -> int32x2_t; + } +vqneg_s32_(a) +} + +/// Signed saturating negate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqneg.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqneg))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqnegq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqneg.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqneg.v4i32")] + fn vqnegq_s32_(a: int32x4_t) -> int32x4_t; + } +vqnegq_s32_(a) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i8")] + fn vqsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vqsub_u8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v16i8")] + fn vqsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vqsubq_u8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i16")] + fn vqsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vqsub_u16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v8i16")] + fn vqsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vqsubq_u16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i32")] + fn vqsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vqsub_u32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v4i32")] + fn vqsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vqsubq_u32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v1i64")] + fn vqsub_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; + } +vqsub_u64_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.usub.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqsub.v2i64")] + fn vqsubq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } +vqsubq_u64_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i8")] + fn vqsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqsub_s8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v16i8")] + fn vqsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqsubq_s8_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i16")] + fn vqsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqsub_s16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v8i16")] + fn vqsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqsubq_s16_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i32")] + fn vqsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqsub_s32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v4i32")] + fn vqsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqsubq_s32_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v1i64")] + fn vqsub_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqsub_s64_(a, b) +} + +/// Saturating subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqsub.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.ssub.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqsub.v2i64")] + fn vqsubq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqsubq_s64_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i8")] + fn vhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vhadd_u8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v16i8")] + fn vhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vhaddq_u8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i16")] + fn vhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vhadd_u16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v8i16")] + fn vhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vhaddq_u16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v2i32")] + fn vhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vhadd_u32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhadd.v4i32")] + fn vhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vhaddq_u32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i8")] + fn vhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vhadd_s8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v16i8")] + fn vhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vhaddq_s8_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i16")] + fn vhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vhadd_s16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v8i16")] + fn vhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vhaddq_s16_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v2i32")] + fn vhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vhadd_s32_(a, b) +} + +/// Halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shadd.v4i32")] + fn vhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vhaddq_s32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i8")] + fn vrhadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vrhadd_u8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v16i8")] + fn vrhaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vrhaddq_u8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i16")] + fn vrhadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vrhadd_u16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v8i16")] + fn vrhaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vrhaddq_u16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v2i32")] + fn vrhadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vrhadd_u32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhaddu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urhadd.v4i32")] + fn vrhaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vrhaddq_u32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i8")] + fn vrhadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vrhadd_s8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v16i8")] + fn vrhaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vrhaddq_s8_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i16")] + fn vrhadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vrhadd_s16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v8i16")] + fn vrhaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vrhaddq_s16_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v2i32")] + fn vrhadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vrhadd_s32_(a, b) +} + +/// Rounding halving add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrhadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srhadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrhaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrhadds.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srhadd.v4i32")] + fn vrhaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vrhaddq_s32_(a, b) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrndn_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v2f32")] + fn vrndn_f32_(a: float32x2_t) -> float32x2_t; + } +vrndn_f32_(a) +} + +/// Floating-point round to integral, to nearest with ties to even +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrintn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frintn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrndnq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrintn.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frintn.v4f32")] + fn vrndnq_f32_(a: float32x4_t) -> float32x4_t; + } +vrndnq_f32_(a) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i8")] + fn vqadd_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vqadd_u8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v16i8")] + fn vqaddq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vqaddq_u8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i16")] + fn vqadd_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vqadd_u16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v8i16")] + fn vqaddq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vqaddq_u16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i32")] + fn vqadd_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vqadd_u32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v4i32")] + fn vqaddq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vqaddq_u32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v1i64")] + fn vqadd_u64_(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t; + } +vqadd_u64_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.u64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.uadd.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqadd.v2i64")] + fn vqaddq_u64_(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t; + } +vqaddq_u64_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i8")] + fn vqadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqadd_s8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v16i8")] + fn vqaddq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqaddq_s8_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i16")] + fn vqadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqadd_s16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v8i16")] + fn vqaddq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqaddq_s16_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i32")] + fn vqadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqadd_s32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v4i32")] + fn vqaddq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqaddq_s32_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqadd_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v1i64")] + fn vqadd_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqadd_s64_(a, b) +} + +/// Saturating add +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqadd.s64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqadd))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.sadd.sat.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqadd.v2i64")] + fn vqaddq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqaddq_s64_(a, b) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s8_x2(a: *const i8) -> int8x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i8.p0i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v8i8.p0i8")] + fn vld1_s8_x2_(a: *const i8) -> int8x8x2_t; + } +vld1_s8_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s16_x2(a: *const i16) -> int16x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i16.p0i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4i16.p0i16")] + fn vld1_s16_x2_(a: *const i16) -> int16x4x2_t; + } +vld1_s16_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s32_x2(a: *const i32) -> int32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i32.p0i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2i32.p0i32")] + fn vld1_s32_x2_(a: *const i32) -> int32x2x2_t; + } +vld1_s32_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s64_x2(a: *const i64) -> int64x1x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v1i64.p0i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v1i64.p0i64")] + fn vld1_s64_x2_(a: *const i64) -> int64x1x2_t; + } +vld1_s64_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s8_x2(a: *const i8) -> int8x16x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v16i8.p0i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v16i8.p0i8")] + fn vld1q_s8_x2_(a: *const i8) -> int8x16x2_t; + } +vld1q_s8_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s16_x2(a: *const i16) -> int16x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v8i16.p0i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v8i16.p0i16")] + fn vld1q_s16_x2_(a: *const i16) -> int16x8x2_t; + } +vld1q_s16_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s32_x2(a: *const i32) -> int32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4i32.p0i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4i32.p0i32")] + fn vld1q_s32_x2_(a: *const i32) -> int32x4x2_t; + } +vld1q_s32_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s64_x2(a: *const i64) -> int64x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2i64.p0i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2i64.p0i64")] + fn vld1q_s64_x2_(a: *const i64) -> int64x2x2_t; + } +vld1q_s64_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s8_x3(a: *const i8) -> int8x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i8.p0i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i8.p0i8")] + fn vld1_s8_x3_(a: *const i8) -> int8x8x3_t; + } +vld1_s8_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s16_x3(a: *const i16) -> int16x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i16.p0i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i16.p0i16")] + fn vld1_s16_x3_(a: *const i16) -> int16x4x3_t; + } +vld1_s16_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s32_x3(a: *const i32) -> int32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i32.p0i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i32.p0i32")] + fn vld1_s32_x3_(a: *const i32) -> int32x2x3_t; + } +vld1_s32_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s64_x3(a: *const i64) -> int64x1x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v1i64.p0i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v1i64.p0i64")] + fn vld1_s64_x3_(a: *const i64) -> int64x1x3_t; + } +vld1_s64_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s8_x3(a: *const i8) -> int8x16x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v16i8.p0i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v16i8.p0i8")] + fn vld1q_s8_x3_(a: *const i8) -> int8x16x3_t; + } +vld1q_s8_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s16_x3(a: *const i16) -> int16x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v8i16.p0i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v8i16.p0i16")] + fn vld1q_s16_x3_(a: *const i16) -> int16x8x3_t; + } +vld1q_s16_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s32_x3(a: *const i32) -> int32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4i32.p0i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4i32.p0i32")] + fn vld1q_s32_x3_(a: *const i32) -> int32x4x3_t; + } +vld1q_s32_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s64_x3(a: *const i64) -> int64x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2i64.p0i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2i64.p0i64")] + fn vld1q_s64_x3_(a: *const i64) -> int64x2x3_t; + } +vld1q_s64_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s8_x4(a: *const i8) -> int8x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i8.p0i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i8.p0i8")] + fn vld1_s8_x4_(a: *const i8) -> int8x8x4_t; + } +vld1_s8_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s16_x4(a: *const i16) -> int16x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i16.p0i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i16.p0i16")] + fn vld1_s16_x4_(a: *const i16) -> int16x4x4_t; + } +vld1_s16_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s32_x4(a: *const i32) -> int32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i32.p0i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i32.p0i32")] + fn vld1_s32_x4_(a: *const i32) -> int32x2x4_t; + } +vld1_s32_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_s64_x4(a: *const i64) -> int64x1x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v1i64.p0i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v1i64.p0i64")] + fn vld1_s64_x4_(a: *const i64) -> int64x1x4_t; + } +vld1_s64_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s8_x4(a: *const i8) -> int8x16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v16i8.p0i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v16i8.p0i8")] + fn vld1q_s8_x4_(a: *const i8) -> int8x16x4_t; + } +vld1q_s8_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s16_x4(a: *const i16) -> int16x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v8i16.p0i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v8i16.p0i16")] + fn vld1q_s16_x4_(a: *const i16) -> int16x8x4_t; + } +vld1q_s16_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s32_x4(a: *const i32) -> int32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4i32.p0i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4i32.p0i32")] + fn vld1q_s32_x4_(a: *const i32) -> int32x4x4_t; + } +vld1q_s32_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_s64_x4(a: *const i64) -> int64x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2i64.p0i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2i64.p0i64")] + fn vld1q_s64_x4_(a: *const i64) -> int64x2x4_t; + } +vld1q_s64_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u8_x2(a: *const u8) -> uint8x8x2_t { + transmute(vld1_s8_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u16_x2(a: *const u16) -> uint16x4x2_t { + transmute(vld1_s16_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u32_x2(a: *const u32) -> uint32x2x2_t { + transmute(vld1_s32_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u64_x2(a: *const u64) -> uint64x1x2_t { + transmute(vld1_s64_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u8_x2(a: *const u8) -> uint8x16x2_t { + transmute(vld1q_s8_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u16_x2(a: *const u16) -> uint16x8x2_t { + transmute(vld1q_s16_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u32_x2(a: *const u32) -> uint32x4x2_t { + transmute(vld1q_s32_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u64_x2(a: *const u64) -> uint64x2x2_t { + transmute(vld1q_s64_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u8_x3(a: *const u8) -> uint8x8x3_t { + transmute(vld1_s8_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u16_x3(a: *const u16) -> uint16x4x3_t { + transmute(vld1_s16_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u32_x3(a: *const u32) -> uint32x2x3_t { + transmute(vld1_s32_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u64_x3(a: *const u64) -> uint64x1x3_t { + transmute(vld1_s64_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u8_x3(a: *const u8) -> uint8x16x3_t { + transmute(vld1q_s8_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u16_x3(a: *const u16) -> uint16x8x3_t { + transmute(vld1q_s16_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u32_x3(a: *const u32) -> uint32x4x3_t { + transmute(vld1q_s32_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u64_x3(a: *const u64) -> uint64x2x3_t { + transmute(vld1q_s64_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u8_x4(a: *const u8) -> uint8x8x4_t { + transmute(vld1_s8_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u16_x4(a: *const u16) -> uint16x4x4_t { + transmute(vld1_s16_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u32_x4(a: *const u32) -> uint32x2x4_t { + transmute(vld1_s32_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_u64_x4(a: *const u64) -> uint64x1x4_t { + transmute(vld1_s64_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u8_x4(a: *const u8) -> uint8x16x4_t { + transmute(vld1q_s8_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u16_x4(a: *const u16) -> uint16x8x4_t { + transmute(vld1q_s16_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u32_x4(a: *const u32) -> uint32x4x4_t { + transmute(vld1q_s32_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_u64_x4(a: *const u64) -> uint64x2x4_t { + transmute(vld1q_s64_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p8_x2(a: *const p8) -> poly8x8x2_t { + transmute(vld1_s8_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p8_x3(a: *const p8) -> poly8x8x3_t { + transmute(vld1_s8_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p8_x4(a: *const p8) -> poly8x8x4_t { + transmute(vld1_s8_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p8_x2(a: *const p8) -> poly8x16x2_t { + transmute(vld1q_s8_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p8_x3(a: *const p8) -> poly8x16x3_t { + transmute(vld1q_s8_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p8_x4(a: *const p8) -> poly8x16x4_t { + transmute(vld1q_s8_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p16_x2(a: *const p16) -> poly16x4x2_t { + transmute(vld1_s16_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p16_x3(a: *const p16) -> poly16x4x3_t { + transmute(vld1_s16_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p16_x4(a: *const p16) -> poly16x4x4_t { + transmute(vld1_s16_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p16_x2(a: *const p16) -> poly16x8x2_t { + transmute(vld1q_s16_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p16_x3(a: *const p16) -> poly16x8x3_t { + transmute(vld1q_s16_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p16_x4(a: *const p16) -> poly16x8x4_t { + transmute(vld1q_s16_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p64_x2(a: *const p64) -> poly64x1x2_t { + transmute(vld1_s64_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p64_x3(a: *const p64) -> poly64x1x3_t { + transmute(vld1_s64_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_p64_x4(a: *const p64) -> poly64x1x4_t { + transmute(vld1_s64_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p64_x2(a: *const p64) -> poly64x2x2_t { + transmute(vld1q_s64_x2(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p64_x3(a: *const p64) -> poly64x2x3_t { + transmute(vld1q_s64_x3(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_p64_x4(a: *const p64) -> poly64x2x4_t { + transmute(vld1q_s64_x4(transmute(a))) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_f32_x2(a: *const f32) -> float32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v2f32.p0f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v2f32.p0f32")] + fn vld1_f32_x2_(a: *const f32) -> float32x2x2_t; + } +vld1_f32_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_f32_x2(a: *const f32) -> float32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x2.v4f32.p0f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x2.v4f32.p0f32")] + fn vld1q_f32_x2_(a: *const f32) -> float32x4x2_t; + } +vld1q_f32_x2_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_f32_x3(a: *const f32) -> float32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v2f32.p0f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v2f32.p0f32")] + fn vld1_f32_x3_(a: *const f32) -> float32x2x3_t; + } +vld1_f32_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_f32_x3(a: *const f32) -> float32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x3.v4f32.p0f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x3.v4f32.p0f32")] + fn vld1q_f32_x3_(a: *const f32) -> float32x4x3_t; + } +vld1q_f32_x3_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1_f32_x4(a: *const f32) -> float32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v2f32.p0f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v2f32.p0f32")] + fn vld1_f32_x4_(a: *const f32) -> float32x2x4_t; + } +vld1_f32_x4_(a) +} + +/// Load multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld1q_f32_x4(a: *const f32) -> float32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld1x4.v4f32.p0f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld1x4.v4f32.p0f32")] + fn vld1q_f32_x4_(a: *const f32) -> float32x4x4_t; + } +vld1q_f32_x4_(a) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i8.p0i8")] + fn vld2_s8_(ptr: *const i8, size: i32) -> int8x8x2_t; + } +vld2_s8_(a as *const i8, 1) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_s8(a: *const i8) -> int8x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v8i8.p0v8i8")] + fn vld2_s8_(ptr: *const int8x8_t) -> int8x8x2_t; + } +vld2_s8_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i16.p0i8")] + fn vld2_s16_(ptr: *const i8, size: i32) -> int16x4x2_t; + } +vld2_s16_(a as *const i8, 2) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_s16(a: *const i16) -> int16x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v4i16.p0v4i16")] + fn vld2_s16_(ptr: *const int16x4_t) -> int16x4x2_t; + } +vld2_s16_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2i32.p0i8")] + fn vld2_s32_(ptr: *const i8, size: i32) -> int32x2x2_t; + } +vld2_s32_(a as *const i8, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_s32(a: *const i32) -> int32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2i32.p0v2i32")] + fn vld2_s32_(ptr: *const int32x2_t) -> int32x2x2_t; + } +vld2_s32_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v16i8.p0i8")] + fn vld2q_s8_(ptr: *const i8, size: i32) -> int8x16x2_t; + } +vld2q_s8_(a as *const i8, 1) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_s8(a: *const i8) -> int8x16x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v16i8.p0v16i8")] + fn vld2q_s8_(ptr: *const int8x16_t) -> int8x16x2_t; + } +vld2q_s8_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v8i16.p0i8")] + fn vld2q_s16_(ptr: *const i8, size: i32) -> int16x8x2_t; + } +vld2q_s16_(a as *const i8, 2) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_s16(a: *const i16) -> int16x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v8i16.p0v8i16")] + fn vld2q_s16_(ptr: *const int16x8_t) -> int16x8x2_t; + } +vld2q_s16_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4i32.p0i8")] + fn vld2q_s32_(ptr: *const i8, size: i32) -> int32x4x2_t; + } +vld2q_s32_(a as *const i8, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_s32(a: *const i32) -> int32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v4i32.p0v4i32")] + fn vld2q_s32_(ptr: *const int32x4_t) -> int32x4x2_t; + } +vld2q_s32_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v1i64.p0i8")] + fn vld2_s64_(ptr: *const i8, size: i32) -> int64x1x2_t; + } +vld2_s64_(a as *const i8, 8) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_s64(a: *const i64) -> int64x1x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v1i64.p0v1i64")] + fn vld2_s64_(ptr: *const int64x1_t) -> int64x1x2_t; + } +vld2_s64_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_s8(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_s16(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_s32(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_s8(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_s16(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_s32(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_s8(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_s16(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_s8(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_s16(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_s64(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_s64(transmute(a))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v2f32.p0i8")] + fn vld2_f32_(ptr: *const i8, size: i32) -> float32x2x2_t; + } +vld2_f32_(a as *const i8, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_f32(a: *const f32) -> float32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v2f32.p0v2f32")] + fn vld2_f32_(ptr: *const float32x2_t) -> float32x2x2_t; + } +vld2_f32_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2.v4f32.p0i8")] + fn vld2q_f32_(ptr: *const i8, size: i32) -> float32x4x2_t; + } +vld2q_f32_(a as *const i8, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_f32(a: *const f32) -> float32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2.v4f32.p0v4f32")] + fn vld2q_f32_(ptr: *const float32x4_t) -> float32x4x2_t; + } +vld2q_f32_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i8.p0i8")] + fn vld2_dup_s8_(ptr: *const i8, size: i32) -> int8x8x2_t; + } +vld2_dup_s8_(a as *const i8, 1) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_dup_s8(a: *const i8) -> int8x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v8i8.p0i8")] + fn vld2_dup_s8_(ptr: *const i8) -> int8x8x2_t; + } +vld2_dup_s8_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i16.p0i8")] + fn vld2_dup_s16_(ptr: *const i8, size: i32) -> int16x4x2_t; + } +vld2_dup_s16_(a as *const i8, 2) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_dup_s16(a: *const i16) -> int16x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v4i16.p0i16")] + fn vld2_dup_s16_(ptr: *const i16) -> int16x4x2_t; + } +vld2_dup_s16_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2i32.p0i8")] + fn vld2_dup_s32_(ptr: *const i8, size: i32) -> int32x2x2_t; + } +vld2_dup_s32_(a as *const i8, 4) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_dup_s32(a: *const i32) -> int32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2i32.p0i32")] + fn vld2_dup_s32_(ptr: *const i32) -> int32x2x2_t; + } +vld2_dup_s32_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v16i8.p0i8")] + fn vld2q_dup_s8_(ptr: *const i8, size: i32) -> int8x16x2_t; + } +vld2q_dup_s8_(a as *const i8, 1) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_dup_s8(a: *const i8) -> int8x16x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v16i8.p0i8")] + fn vld2q_dup_s8_(ptr: *const i8) -> int8x16x2_t; + } +vld2q_dup_s8_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v8i16.p0i8")] + fn vld2q_dup_s16_(ptr: *const i8, size: i32) -> int16x8x2_t; + } +vld2q_dup_s16_(a as *const i8, 2) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_dup_s16(a: *const i16) -> int16x8x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v8i16.p0i16")] + fn vld2q_dup_s16_(ptr: *const i16) -> int16x8x2_t; + } +vld2q_dup_s16_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4i32.p0i8")] + fn vld2q_dup_s32_(ptr: *const i8, size: i32) -> int32x4x2_t; + } +vld2q_dup_s32_(a as *const i8, 4) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_dup_s32(a: *const i32) -> int32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v4i32.p0i32")] + fn vld2q_dup_s32_(ptr: *const i32) -> int32x4x2_t; + } +vld2q_dup_s32_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v1i64.p0i8")] + fn vld2_dup_s64_(ptr: *const i8, size: i32) -> int64x1x2_t; + } +vld2_dup_s64_(a as *const i8, 8) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_dup_s64(a: *const i64) -> int64x1x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v1i64.p0i64")] + fn vld2_dup_s64_(ptr: *const i64) -> int64x1x2_t; + } +vld2_dup_s64_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_u8(a: *const u8) -> uint8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_u16(a: *const u16) -> uint16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_u32(a: *const u32) -> uint32x2x2_t { + transmute(vld2_dup_s32(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_dup_u8(a: *const u8) -> uint8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_dup_u16(a: *const u16) -> uint16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_dup_u32(a: *const u32) -> uint32x4x2_t { + transmute(vld2q_dup_s32(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_p8(a: *const p8) -> poly8x8x2_t { + transmute(vld2_dup_s8(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_p16(a: *const p16) -> poly16x4x2_t { + transmute(vld2_dup_s16(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_dup_p8(a: *const p8) -> poly8x16x2_t { + transmute(vld2q_dup_s8(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_dup_p16(a: *const p16) -> poly16x8x2_t { + transmute(vld2q_dup_s16(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_u64(a: *const u64) -> uint64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_dup_p64(a: *const p64) -> poly64x1x2_t { + transmute(vld2_dup_s64(transmute(a))) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v2f32.p0i8")] + fn vld2_dup_f32_(ptr: *const i8, size: i32) -> float32x2x2_t; + } +vld2_dup_f32_(a as *const i8, 4) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_dup_f32(a: *const f32) -> float32x2x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v2f32.p0f32")] + fn vld2_dup_f32_(ptr: *const f32) -> float32x2x2_t; + } +vld2_dup_f32_(a as _) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2))] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2dup.v4f32.p0i8")] + fn vld2q_dup_f32_(ptr: *const i8, size: i32) -> float32x4x2_t; + } +vld2q_dup_f32_(a as *const i8, 4) +} + +/// Load single 2-element structure and replicate to all lanes of two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_dup_f32(a: *const f32) -> float32x4x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2r.v4f32.p0f32")] + fn vld2q_dup_f32_(ptr: *const f32) -> float32x4x2_t; + } +vld2q_dup_f32_(a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i8.p0i8")] + fn vld2_lane_s8_(ptr: *const i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32) -> int8x8x2_t; + } +vld2_lane_s8_(a as _, b.0, b.1, LANE, 1) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s8(a: *const i8, b: int8x8x2_t) -> int8x8x2_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v8i8.p0i8")] + fn vld2_lane_s8_(a: int8x8_t, b: int8x8_t, n: i64, ptr: *const i8) -> int8x8x2_t; + } +vld2_lane_s8_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i16.p0i8")] + fn vld2_lane_s16_(ptr: *const i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32) -> int16x4x2_t; + } +vld2_lane_s16_(a as _, b.0, b.1, LANE, 2) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s16(a: *const i16, b: int16x4x2_t) -> int16x4x2_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v4i16.p0i8")] + fn vld2_lane_s16_(a: int16x4_t, b: int16x4_t, n: i64, ptr: *const i8) -> int16x4x2_t; + } +vld2_lane_s16_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2i32.p0i8")] + fn vld2_lane_s32_(ptr: *const i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32) -> int32x2x2_t; + } +vld2_lane_s32_(a as _, b.0, b.1, LANE, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_s32(a: *const i32, b: int32x2x2_t) -> int32x2x2_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2i32.p0i8")] + fn vld2_lane_s32_(a: int32x2_t, b: int32x2_t, n: i64, ptr: *const i8) -> int32x2x2_t; + } +vld2_lane_s32_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v8i16.p0i8")] + fn vld2q_lane_s16_(ptr: *const i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32) -> int16x8x2_t; + } +vld2q_lane_s16_(a as _, b.0, b.1, LANE, 2) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s16(a: *const i16, b: int16x8x2_t) -> int16x8x2_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v8i16.p0i8")] + fn vld2q_lane_s16_(a: int16x8_t, b: int16x8_t, n: i64, ptr: *const i8) -> int16x8x2_t; + } +vld2q_lane_s16_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4i32.p0i8")] + fn vld2q_lane_s32_(ptr: *const i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32) -> int32x4x2_t; + } +vld2q_lane_s32_(a as _, b.0, b.1, LANE, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_s32(a: *const i32, b: int32x4x2_t) -> int32x4x2_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v4i32.p0i8")] + fn vld2q_lane_s32_(a: int32x4_t, b: int32x4_t, n: i64, ptr: *const i8) -> int32x4x2_t; + } +vld2q_lane_s32_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_lane_u8(a: *const u8, b: uint8x8x2_t) -> uint8x8x2_t { + static_assert_imm3!(LANE); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_lane_u16(a: *const u16, b: uint16x4x2_t) -> uint16x4x2_t { + static_assert_imm2!(LANE); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_lane_u32(a: *const u32, b: uint32x2x2_t) -> uint32x2x2_t { + static_assert_imm1!(LANE); + transmute(vld2_lane_s32::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_lane_u16(a: *const u16, b: uint16x8x2_t) -> uint16x8x2_t { + static_assert_imm3!(LANE); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_lane_u32(a: *const u32, b: uint32x4x2_t) -> uint32x4x2_t { + static_assert_imm2!(LANE); + transmute(vld2q_lane_s32::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_lane_p8(a: *const p8, b: poly8x8x2_t) -> poly8x8x2_t { + static_assert_imm3!(LANE); + transmute(vld2_lane_s8::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2_lane_p16(a: *const p16, b: poly16x4x2_t) -> poly16x4x2_t { + static_assert_imm2!(LANE); + transmute(vld2_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld2q_lane_p16(a: *const p16, b: poly16x8x2_t) -> poly16x8x2_t { + static_assert_imm3!(LANE); + transmute(vld2q_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v2f32.p0i8")] + fn vld2_lane_f32_(ptr: *const i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32) -> float32x2x2_t; + } +vld2_lane_f32_(a as _, b.0, b.1, LANE, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2_lane_f32(a: *const f32, b: float32x2x2_t) -> float32x2x2_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v2f32.p0i8")] + fn vld2_lane_f32_(a: float32x2_t, b: float32x2_t, n: i64, ptr: *const i8) -> float32x2x2_t; + } +vld2_lane_f32_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld2lane.v4f32.p0i8")] + fn vld2q_lane_f32_(ptr: *const i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32) -> float32x4x2_t; + } +vld2q_lane_f32_(a as _, b.0, b.1, LANE, 4) +} + +/// Load multiple 2-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld2q_lane_f32(a: *const f32, b: float32x4x2_t) -> float32x4x2_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld2lane.v4f32.p0i8")] + fn vld2q_lane_f32_(a: float32x4_t, b: float32x4_t, n: i64, ptr: *const i8) -> float32x4x2_t; + } +vld2q_lane_f32_(b.0, b.1, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i8.p0i8")] + fn vld3_s8_(ptr: *const i8, size: i32) -> int8x8x3_t; + } +vld3_s8_(a as *const i8, 1) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_s8(a: *const i8) -> int8x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v8i8.p0v8i8")] + fn vld3_s8_(ptr: *const int8x8_t) -> int8x8x3_t; + } +vld3_s8_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i16.p0i8")] + fn vld3_s16_(ptr: *const i8, size: i32) -> int16x4x3_t; + } +vld3_s16_(a as *const i8, 2) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_s16(a: *const i16) -> int16x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v4i16.p0v4i16")] + fn vld3_s16_(ptr: *const int16x4_t) -> int16x4x3_t; + } +vld3_s16_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2i32.p0i8")] + fn vld3_s32_(ptr: *const i8, size: i32) -> int32x2x3_t; + } +vld3_s32_(a as *const i8, 4) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_s32(a: *const i32) -> int32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2i32.p0v2i32")] + fn vld3_s32_(ptr: *const int32x2_t) -> int32x2x3_t; + } +vld3_s32_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v16i8.p0i8")] + fn vld3q_s8_(ptr: *const i8, size: i32) -> int8x16x3_t; + } +vld3q_s8_(a as *const i8, 1) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_s8(a: *const i8) -> int8x16x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v16i8.p0v16i8")] + fn vld3q_s8_(ptr: *const int8x16_t) -> int8x16x3_t; + } +vld3q_s8_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v8i16.p0i8")] + fn vld3q_s16_(ptr: *const i8, size: i32) -> int16x8x3_t; + } +vld3q_s16_(a as *const i8, 2) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_s16(a: *const i16) -> int16x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v8i16.p0v8i16")] + fn vld3q_s16_(ptr: *const int16x8_t) -> int16x8x3_t; + } +vld3q_s16_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4i32.p0i8")] + fn vld3q_s32_(ptr: *const i8, size: i32) -> int32x4x3_t; + } +vld3q_s32_(a as *const i8, 4) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_s32(a: *const i32) -> int32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v4i32.p0v4i32")] + fn vld3q_s32_(ptr: *const int32x4_t) -> int32x4x3_t; + } +vld3q_s32_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v1i64.p0i8")] + fn vld3_s64_(ptr: *const i8, size: i32) -> int64x1x3_t; + } +vld3_s64_(a as *const i8, 8) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_s64(a: *const i64) -> int64x1x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v1i64.p0v1i64")] + fn vld3_s64_(ptr: *const int64x1_t) -> int64x1x3_t; + } +vld3_s64_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_s8(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_s16(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_s32(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_s8(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_s16(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_s32(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_s8(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_s16(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_s8(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_s16(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_s64(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_s64(transmute(a))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v2f32.p0i8")] + fn vld3_f32_(ptr: *const i8, size: i32) -> float32x2x3_t; + } +vld3_f32_(a as *const i8, 4) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_f32(a: *const f32) -> float32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v2f32.p0v2f32")] + fn vld3_f32_(ptr: *const float32x2_t) -> float32x2x3_t; + } +vld3_f32_(a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3.v4f32.p0i8")] + fn vld3q_f32_(ptr: *const i8, size: i32) -> float32x4x3_t; + } +vld3q_f32_(a as *const i8, 4) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_f32(a: *const f32) -> float32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3.v4f32.p0v4f32")] + fn vld3q_f32_(ptr: *const float32x4_t) -> float32x4x3_t; + } +vld3q_f32_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i8.p0i8")] + fn vld3_dup_s8_(ptr: *const i8, size: i32) -> int8x8x3_t; + } +vld3_dup_s8_(a as *const i8, 1) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_dup_s8(a: *const i8) -> int8x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v8i8.p0i8")] + fn vld3_dup_s8_(ptr: *const i8) -> int8x8x3_t; + } +vld3_dup_s8_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i16.p0i8")] + fn vld3_dup_s16_(ptr: *const i8, size: i32) -> int16x4x3_t; + } +vld3_dup_s16_(a as *const i8, 2) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_dup_s16(a: *const i16) -> int16x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v4i16.p0i16")] + fn vld3_dup_s16_(ptr: *const i16) -> int16x4x3_t; + } +vld3_dup_s16_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2i32.p0i8")] + fn vld3_dup_s32_(ptr: *const i8, size: i32) -> int32x2x3_t; + } +vld3_dup_s32_(a as *const i8, 4) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_dup_s32(a: *const i32) -> int32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2i32.p0i32")] + fn vld3_dup_s32_(ptr: *const i32) -> int32x2x3_t; + } +vld3_dup_s32_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v16i8.p0i8")] + fn vld3q_dup_s8_(ptr: *const i8, size: i32) -> int8x16x3_t; + } +vld3q_dup_s8_(a as *const i8, 1) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_dup_s8(a: *const i8) -> int8x16x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v16i8.p0i8")] + fn vld3q_dup_s8_(ptr: *const i8) -> int8x16x3_t; + } +vld3q_dup_s8_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v8i16.p0i8")] + fn vld3q_dup_s16_(ptr: *const i8, size: i32) -> int16x8x3_t; + } +vld3q_dup_s16_(a as *const i8, 2) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_dup_s16(a: *const i16) -> int16x8x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v8i16.p0i16")] + fn vld3q_dup_s16_(ptr: *const i16) -> int16x8x3_t; + } +vld3q_dup_s16_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4i32.p0i8")] + fn vld3q_dup_s32_(ptr: *const i8, size: i32) -> int32x4x3_t; + } +vld3q_dup_s32_(a as *const i8, 4) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_dup_s32(a: *const i32) -> int32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v4i32.p0i32")] + fn vld3q_dup_s32_(ptr: *const i32) -> int32x4x3_t; + } +vld3q_dup_s32_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v1i64.p0i8")] + fn vld3_dup_s64_(ptr: *const i8, size: i32) -> int64x1x3_t; + } +vld3_dup_s64_(a as *const i8, 8) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_dup_s64(a: *const i64) -> int64x1x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v1i64.p0i64")] + fn vld3_dup_s64_(ptr: *const i64) -> int64x1x3_t; + } +vld3_dup_s64_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_u8(a: *const u8) -> uint8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_u16(a: *const u16) -> uint16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_u32(a: *const u32) -> uint32x2x3_t { + transmute(vld3_dup_s32(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_dup_u8(a: *const u8) -> uint8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_dup_u16(a: *const u16) -> uint16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_dup_u32(a: *const u32) -> uint32x4x3_t { + transmute(vld3q_dup_s32(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_p8(a: *const p8) -> poly8x8x3_t { + transmute(vld3_dup_s8(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_p16(a: *const p16) -> poly16x4x3_t { + transmute(vld3_dup_s16(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_dup_p8(a: *const p8) -> poly8x16x3_t { + transmute(vld3q_dup_s8(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_dup_p16(a: *const p16) -> poly16x8x3_t { + transmute(vld3q_dup_s16(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_u64(a: *const u64) -> uint64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_dup_p64(a: *const p64) -> poly64x1x3_t { + transmute(vld3_dup_s64(transmute(a))) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v2f32.p0i8")] + fn vld3_dup_f32_(ptr: *const i8, size: i32) -> float32x2x3_t; + } +vld3_dup_f32_(a as *const i8, 4) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_dup_f32(a: *const f32) -> float32x2x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v2f32.p0f32")] + fn vld3_dup_f32_(ptr: *const f32) -> float32x2x3_t; + } +vld3_dup_f32_(a as _) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3))] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3dup.v4f32.p0i8")] + fn vld3q_dup_f32_(ptr: *const i8, size: i32) -> float32x4x3_t; + } +vld3q_dup_f32_(a as *const i8, 4) +} + +/// Load single 3-element structure and replicate to all lanes of three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_dup_f32(a: *const f32) -> float32x4x3_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3r.v4f32.p0f32")] + fn vld3q_dup_f32_(ptr: *const f32) -> float32x4x3_t; + } +vld3q_dup_f32_(a as _) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i8.p0i8")] + fn vld3_lane_s8_(ptr: *const i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32) -> int8x8x3_t; + } +vld3_lane_s8_(a as _, b.0, b.1, b.2, LANE, 1) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s8(a: *const i8, b: int8x8x3_t) -> int8x8x3_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v8i8.p0i8")] + fn vld3_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *const i8) -> int8x8x3_t; + } +vld3_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i16.p0i8")] + fn vld3_lane_s16_(ptr: *const i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i32, size: i32) -> int16x4x3_t; + } +vld3_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s16(a: *const i16, b: int16x4x3_t) -> int16x4x3_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v4i16.p0i8")] + fn vld3_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *const i8) -> int16x4x3_t; + } +vld3_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2i32.p0i8")] + fn vld3_lane_s32_(ptr: *const i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i32, size: i32) -> int32x2x3_t; + } +vld3_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_s32(a: *const i32, b: int32x2x3_t) -> int32x2x3_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2i32.p0i8")] + fn vld3_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *const i8) -> int32x2x3_t; + } +vld3_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v8i16.p0i8")] + fn vld3q_lane_s16_(ptr: *const i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i32, size: i32) -> int16x8x3_t; + } +vld3q_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s16(a: *const i16, b: int16x8x3_t) -> int16x8x3_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v8i16.p0i8")] + fn vld3q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *const i8) -> int16x8x3_t; + } +vld3q_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4i32.p0i8")] + fn vld3q_lane_s32_(ptr: *const i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i32, size: i32) -> int32x4x3_t; + } +vld3q_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Load multiple 3-element structures to two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_s32(a: *const i32, b: int32x4x3_t) -> int32x4x3_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v4i32.p0i8")] + fn vld3q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *const i8) -> int32x4x3_t; + } +vld3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_lane_u8(a: *const u8, b: uint8x8x3_t) -> uint8x8x3_t { + static_assert_imm3!(LANE); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_lane_u16(a: *const u16, b: uint16x4x3_t) -> uint16x4x3_t { + static_assert_imm2!(LANE); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_lane_u32(a: *const u32, b: uint32x2x3_t) -> uint32x2x3_t { + static_assert_imm1!(LANE); + transmute(vld3_lane_s32::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_lane_u16(a: *const u16, b: uint16x8x3_t) -> uint16x8x3_t { + static_assert_imm3!(LANE); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_lane_u32(a: *const u32, b: uint32x4x3_t) -> uint32x4x3_t { + static_assert_imm2!(LANE); + transmute(vld3q_lane_s32::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_lane_p8(a: *const p8, b: poly8x8x3_t) -> poly8x8x3_t { + static_assert_imm3!(LANE); + transmute(vld3_lane_s8::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3_lane_p16(a: *const p16, b: poly16x4x3_t) -> poly16x4x3_t { + static_assert_imm2!(LANE); + transmute(vld3_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld3q_lane_p16(a: *const p16, b: poly16x8x3_t) -> poly16x8x3_t { + static_assert_imm3!(LANE); + transmute(vld3q_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v2f32.p0i8")] + fn vld3_lane_f32_(ptr: *const i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i32, size: i32) -> float32x2x3_t; + } +vld3_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3_lane_f32(a: *const f32, b: float32x2x3_t) -> float32x2x3_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v2f32.p0i8")] + fn vld3_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *const i8) -> float32x2x3_t; + } +vld3_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld3lane.v4f32.p0i8")] + fn vld3q_lane_f32_(ptr: *const i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i32, size: i32) -> float32x4x3_t; + } +vld3q_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Load multiple 3-element structures to three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld3q_lane_f32(a: *const f32, b: float32x4x3_t) -> float32x4x3_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld3lane.v4f32.p0i8")] + fn vld3q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *const i8) -> float32x4x3_t; + } +vld3q_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i8.p0i8")] + fn vld4_s8_(ptr: *const i8, size: i32) -> int8x8x4_t; + } +vld4_s8_(a as *const i8, 1) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_s8(a: *const i8) -> int8x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v8i8.p0v8i8")] + fn vld4_s8_(ptr: *const int8x8_t) -> int8x8x4_t; + } +vld4_s8_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i16.p0i8")] + fn vld4_s16_(ptr: *const i8, size: i32) -> int16x4x4_t; + } +vld4_s16_(a as *const i8, 2) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_s16(a: *const i16) -> int16x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v4i16.p0v4i16")] + fn vld4_s16_(ptr: *const int16x4_t) -> int16x4x4_t; + } +vld4_s16_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2i32.p0i8")] + fn vld4_s32_(ptr: *const i8, size: i32) -> int32x2x4_t; + } +vld4_s32_(a as *const i8, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_s32(a: *const i32) -> int32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2i32.p0v2i32")] + fn vld4_s32_(ptr: *const int32x2_t) -> int32x2x4_t; + } +vld4_s32_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v16i8.p0i8")] + fn vld4q_s8_(ptr: *const i8, size: i32) -> int8x16x4_t; + } +vld4q_s8_(a as *const i8, 1) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_s8(a: *const i8) -> int8x16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v16i8.p0v16i8")] + fn vld4q_s8_(ptr: *const int8x16_t) -> int8x16x4_t; + } +vld4q_s8_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v8i16.p0i8")] + fn vld4q_s16_(ptr: *const i8, size: i32) -> int16x8x4_t; + } +vld4q_s16_(a as *const i8, 2) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_s16(a: *const i16) -> int16x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v8i16.p0v8i16")] + fn vld4q_s16_(ptr: *const int16x8_t) -> int16x8x4_t; + } +vld4q_s16_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4i32.p0i8")] + fn vld4q_s32_(ptr: *const i8, size: i32) -> int32x4x4_t; + } +vld4q_s32_(a as *const i8, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_s32(a: *const i32) -> int32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v4i32.p0v4i32")] + fn vld4q_s32_(ptr: *const int32x4_t) -> int32x4x4_t; + } +vld4q_s32_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v1i64.p0i8")] + fn vld4_s64_(ptr: *const i8, size: i32) -> int64x1x4_t; + } +vld4_s64_(a as *const i8, 8) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_s64(a: *const i64) -> int64x1x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v1i64.p0v1i64")] + fn vld4_s64_(ptr: *const int64x1_t) -> int64x1x4_t; + } +vld4_s64_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_s8(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_s16(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_s32(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_s8(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_s16(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_s32(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_s8(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_s16(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_s8(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_s16(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_s64(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_s64(transmute(a))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v2f32.p0i8")] + fn vld4_f32_(ptr: *const i8, size: i32) -> float32x2x4_t; + } +vld4_f32_(a as *const i8, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_f32(a: *const f32) -> float32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v2f32.p0v2f32")] + fn vld4_f32_(ptr: *const float32x2_t) -> float32x2x4_t; + } +vld4_f32_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4.v4f32.p0i8")] + fn vld4q_f32_(ptr: *const i8, size: i32) -> float32x4x4_t; + } +vld4q_f32_(a as *const i8, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_f32(a: *const f32) -> float32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4.v4f32.p0v4f32")] + fn vld4q_f32_(ptr: *const float32x4_t) -> float32x4x4_t; + } +vld4q_f32_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i8.p0i8")] + fn vld4_dup_s8_(ptr: *const i8, size: i32) -> int8x8x4_t; + } +vld4_dup_s8_(a as *const i8, 1) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s8(a: *const i8) -> int8x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v8i8.p0i8")] + fn vld4_dup_s8_(ptr: *const i8) -> int8x8x4_t; + } +vld4_dup_s8_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i16.p0i8")] + fn vld4_dup_s16_(ptr: *const i8, size: i32) -> int16x4x4_t; + } +vld4_dup_s16_(a as *const i8, 2) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s16(a: *const i16) -> int16x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v4i16.p0i16")] + fn vld4_dup_s16_(ptr: *const i16) -> int16x4x4_t; + } +vld4_dup_s16_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2i32.p0i8")] + fn vld4_dup_s32_(ptr: *const i8, size: i32) -> int32x2x4_t; + } +vld4_dup_s32_(a as *const i8, 4) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s32(a: *const i32) -> int32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2i32.p0i32")] + fn vld4_dup_s32_(ptr: *const i32) -> int32x2x4_t; + } +vld4_dup_s32_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v16i8.p0i8")] + fn vld4q_dup_s8_(ptr: *const i8, size: i32) -> int8x16x4_t; + } +vld4q_dup_s8_(a as *const i8, 1) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s8(a: *const i8) -> int8x16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v16i8.p0i8")] + fn vld4q_dup_s8_(ptr: *const i8) -> int8x16x4_t; + } +vld4q_dup_s8_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v8i16.p0i8")] + fn vld4q_dup_s16_(ptr: *const i8, size: i32) -> int16x8x4_t; + } +vld4q_dup_s16_(a as *const i8, 2) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s16(a: *const i16) -> int16x8x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v8i16.p0i16")] + fn vld4q_dup_s16_(ptr: *const i16) -> int16x8x4_t; + } +vld4q_dup_s16_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4i32.p0i8")] + fn vld4q_dup_s32_(ptr: *const i8, size: i32) -> int32x4x4_t; + } +vld4q_dup_s32_(a as *const i8, 4) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_s32(a: *const i32) -> int32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v4i32.p0i32")] + fn vld4q_dup_s32_(ptr: *const i32) -> int32x4x4_t; + } +vld4q_dup_s32_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v1i64.p0i8")] + fn vld4_dup_s64_(ptr: *const i8, size: i32) -> int64x1x4_t; + } +vld4_dup_s64_(a as *const i8, 8) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_s64(a: *const i64) -> int64x1x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v1i64.p0i64")] + fn vld4_dup_s64_(ptr: *const i64) -> int64x1x4_t; + } +vld4_dup_s64_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_u8(a: *const u8) -> uint8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_u16(a: *const u16) -> uint16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_u32(a: *const u32) -> uint32x2x4_t { + transmute(vld4_dup_s32(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_dup_u8(a: *const u8) -> uint8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_dup_u16(a: *const u16) -> uint16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_dup_u32(a: *const u32) -> uint32x4x4_t { + transmute(vld4q_dup_s32(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_p8(a: *const p8) -> poly8x8x4_t { + transmute(vld4_dup_s8(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_p16(a: *const p16) -> poly16x4x4_t { + transmute(vld4_dup_s16(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_dup_p8(a: *const p8) -> poly8x16x4_t { + transmute(vld4q_dup_s8(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_dup_p16(a: *const p16) -> poly16x8x4_t { + transmute(vld4q_dup_s16(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_u64(a: *const u64) -> uint64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4r))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_dup_p64(a: *const p64) -> poly64x1x4_t { + transmute(vld4_dup_s64(transmute(a))) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v2f32.p0i8")] + fn vld4_dup_f32_(ptr: *const i8, size: i32) -> float32x2x4_t; + } +vld4_dup_f32_(a as *const i8, 4) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_dup_f32(a: *const f32) -> float32x2x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v2f32.p0f32")] + fn vld4_dup_f32_(ptr: *const f32) -> float32x2x4_t; + } +vld4_dup_f32_(a as _) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4))] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4dup.v4f32.p0i8")] + fn vld4q_dup_f32_(ptr: *const i8, size: i32) -> float32x4x4_t; + } +vld4q_dup_f32_(a as *const i8, 4) +} + +/// Load single 4-element structure and replicate to all lanes of four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4r))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_dup_f32(a: *const f32) -> float32x4x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4r.v4f32.p0f32")] + fn vld4q_dup_f32_(ptr: *const f32) -> float32x4x4_t; + } +vld4q_dup_f32_(a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i8.p0i8")] + fn vld4_lane_s8_(ptr: *const i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i32, size: i32) -> int8x8x4_t; + } +vld4_lane_s8_(a as _, b.0, b.1, b.2, b.3, LANE, 1) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s8(a: *const i8, b: int8x8x4_t) -> int8x8x4_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v8i8.p0i8")] + fn vld4_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *const i8) -> int8x8x4_t; + } +vld4_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i16.p0i8")] + fn vld4_lane_s16_(ptr: *const i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i32, size: i32) -> int16x4x4_t; + } +vld4_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s16(a: *const i16, b: int16x4x4_t) -> int16x4x4_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v4i16.p0i8")] + fn vld4_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i64, ptr: *const i8) -> int16x4x4_t; + } +vld4_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2i32.p0i8")] + fn vld4_lane_s32_(ptr: *const i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i32, size: i32) -> int32x2x4_t; + } +vld4_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_s32(a: *const i32, b: int32x2x4_t) -> int32x2x4_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2i32.p0i8")] + fn vld4_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i64, ptr: *const i8) -> int32x2x4_t; + } +vld4_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v8i16.p0i8")] + fn vld4q_lane_s16_(ptr: *const i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i32, size: i32) -> int16x8x4_t; + } +vld4q_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s16(a: *const i16, b: int16x8x4_t) -> int16x8x4_t { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v8i16.p0i8")] + fn vld4q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i64, ptr: *const i8) -> int16x8x4_t; + } +vld4q_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4i32.p0i8")] + fn vld4q_lane_s32_(ptr: *const i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i32, size: i32) -> int32x4x4_t; + } +vld4q_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_s32(a: *const i32, b: int32x4x4_t) -> int32x4x4_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v4i32.p0i8")] + fn vld4q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i64, ptr: *const i8) -> int32x4x4_t; + } +vld4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_lane_u8(a: *const u8, b: uint8x8x4_t) -> uint8x8x4_t { + static_assert_imm3!(LANE); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_lane_u16(a: *const u16, b: uint16x4x4_t) -> uint16x4x4_t { + static_assert_imm2!(LANE); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_lane_u32(a: *const u32, b: uint32x2x4_t) -> uint32x2x4_t { + static_assert_imm1!(LANE); + transmute(vld4_lane_s32::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_lane_u16(a: *const u16, b: uint16x8x4_t) -> uint16x8x4_t { + static_assert_imm3!(LANE); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_lane_u32(a: *const u32, b: uint32x4x4_t) -> uint32x4x4_t { + static_assert_imm2!(LANE); + transmute(vld4q_lane_s32::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_lane_p8(a: *const p8, b: poly8x8x4_t) -> poly8x8x4_t { + static_assert_imm3!(LANE); + transmute(vld4_lane_s8::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4_lane_p16(a: *const p16, b: poly16x4x4_t) -> poly16x4x4_t { + static_assert_imm2!(LANE); + transmute(vld4_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vld4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vld4q_lane_p16(a: *const p16, b: poly16x8x4_t) -> poly16x8x4_t { + static_assert_imm3!(LANE); + transmute(vld4q_lane_s16::(transmute(a), transmute(b))) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v2f32.p0i8")] + fn vld4_lane_f32_(ptr: *const i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i32, size: i32) -> float32x2x4_t; + } +vld4_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4_lane_f32(a: *const f32, b: float32x2x4_t) -> float32x2x4_t { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v2f32.p0i8")] + fn vld4_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i64, ptr: *const i8) -> float32x2x4_t; + } +vld4_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vld4lane.v4f32.p0i8")] + fn vld4q_lane_f32_(ptr: *const i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i32, size: i32) -> float32x4x4_t; + } +vld4q_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Load multiple 4-element structures to four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(ld4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vld4q_lane_f32(a: *const f32, b: float32x4x4_t) -> float32x4x4_t { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ld4lane.v4f32.p0i8")] + fn vld4q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i64, ptr: *const i8) -> float32x4x4_t; + } +vld4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_s8(a: *mut i8, b: int8x8_t) { + static_assert_imm3!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_s16(a: *mut i16, b: int16x4_t) { + static_assert_imm2!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_s32(a: *mut i32, b: int32x2_t) { + static_assert_imm1!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_s64(a: *mut i64, b: int64x1_t) { + static_assert!(LANE : i32 where LANE == 0); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_s8(a: *mut i8, b: int8x16_t) { + static_assert_imm4!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_s16(a: *mut i16, b: int16x8_t) { + static_assert_imm3!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_s32(a: *mut i32, b: int32x4_t) { + static_assert_imm2!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_s64(a: *mut i64, b: int64x2_t) { + static_assert_imm1!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_u8(a: *mut u8, b: uint8x8_t) { + static_assert_imm3!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_u16(a: *mut u16, b: uint16x4_t) { + static_assert_imm2!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_u32(a: *mut u32, b: uint32x2_t) { + static_assert_imm1!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_u64(a: *mut u64, b: uint64x1_t) { + static_assert!(LANE : i32 where LANE == 0); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_u8(a: *mut u8, b: uint8x16_t) { + static_assert_imm4!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_u16(a: *mut u16, b: uint16x8_t) { + static_assert_imm3!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_u32(a: *mut u32, b: uint32x4_t) { + static_assert_imm2!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_u64(a: *mut u64, b: uint64x2_t) { + static_assert_imm1!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_p8(a: *mut p8, b: poly8x8_t) { + static_assert_imm3!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_p16(a: *mut p16, b: poly16x4_t) { + static_assert_imm2!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_p8(a: *mut p8, b: poly8x16_t) { + static_assert_imm4!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_p16(a: *mut p16, b: poly16x8_t) { + static_assert_imm3!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_p64(a: *mut p64, b: poly64x1_t) { + static_assert!(LANE : i32 where LANE == 0); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_p64(a: *mut p64, b: poly64x2_t) { + static_assert_imm1!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_lane_f32(a: *mut f32, b: float32x2_t) { + static_assert_imm1!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_lane_f32(a: *mut f32, b: float32x4_t) { + static_assert_imm2!(LANE); + *a = simd_extract(b, LANE as u32); +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v8i8")] + fn vst1_s8_x2_(ptr: *mut i8, a: int8x8_t, b: int8x8_t); + } +vst1_s8_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s8_x2(a: *mut i8, b: int8x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v8i8.p0i8")] + fn vst1_s8_x2_(a: int8x8_t, b: int8x8_t, ptr: *mut i8); + } +vst1_s8_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v4i16")] + fn vst1_s16_x2_(ptr: *mut i16, a: int16x4_t, b: int16x4_t); + } +vst1_s16_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s16_x2(a: *mut i16, b: int16x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4i16.p0i16")] + fn vst1_s16_x2_(a: int16x4_t, b: int16x4_t, ptr: *mut i16); + } +vst1_s16_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v2i32")] + fn vst1_s32_x2_(ptr: *mut i32, a: int32x2_t, b: int32x2_t); + } +vst1_s32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s32_x2(a: *mut i32, b: int32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2i32.p0i32")] + fn vst1_s32_x2_(a: int32x2_t, b: int32x2_t, ptr: *mut i32); + } +vst1_s32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v1i64")] + fn vst1_s64_x2_(ptr: *mut i64, a: int64x1_t, b: int64x1_t); + } +vst1_s64_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s64_x2(a: *mut i64, b: int64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v1i64.p0i64")] + fn vst1_s64_x2_(a: int64x1_t, b: int64x1_t, ptr: *mut i64); + } +vst1_s64_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i8.v16i8")] + fn vst1q_s8_x2_(ptr: *mut i8, a: int8x16_t, b: int8x16_t); + } +vst1q_s8_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s8_x2(a: *mut i8, b: int8x16x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v16i8.p0i8")] + fn vst1q_s8_x2_(a: int8x16_t, b: int8x16_t, ptr: *mut i8); + } +vst1q_s8_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i16.v8i16")] + fn vst1q_s16_x2_(ptr: *mut i16, a: int16x8_t, b: int16x8_t); + } +vst1q_s16_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s16_x2(a: *mut i16, b: int16x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v8i16.p0i16")] + fn vst1q_s16_x2_(a: int16x8_t, b: int16x8_t, ptr: *mut i16); + } +vst1q_s16_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i32.v4i32")] + fn vst1q_s32_x2_(ptr: *mut i32, a: int32x4_t, b: int32x4_t); + } +vst1q_s32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s32_x2(a: *mut i32, b: int32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4i32.p0i32")] + fn vst1q_s32_x2_(a: int32x4_t, b: int32x4_t, ptr: *mut i32); + } +vst1q_s32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0i64.v2i64")] + fn vst1q_s64_x2_(ptr: *mut i64, a: int64x2_t, b: int64x2_t); + } +vst1q_s64_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s64_x2(a: *mut i64, b: int64x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2i64.p0i64")] + fn vst1q_s64_x2_(a: int64x2_t, b: int64x2_t, ptr: *mut i64); + } +vst1q_s64_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v8i8")] + fn vst1_s8_x3_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t); + } +vst1_s8_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s8_x3(a: *mut i8, b: int8x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v8i8.p0i8")] + fn vst1_s8_x3_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); + } +vst1_s8_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v4i16")] + fn vst1_s16_x3_(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t); + } +vst1_s16_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s16_x3(a: *mut i16, b: int16x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4i16.p0i16")] + fn vst1_s16_x3_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i16); + } +vst1_s16_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v2i32")] + fn vst1_s32_x3_(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t); + } +vst1_s32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s32_x3(a: *mut i32, b: int32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2i32.p0i32")] + fn vst1_s32_x3_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i32); + } +vst1_s32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v1i64")] + fn vst1_s64_x3_(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t); + } +vst1_s64_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s64_x3(a: *mut i64, b: int64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v1i64.p0i64")] + fn vst1_s64_x3_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i64); + } +vst1_s64_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i8.v16i8")] + fn vst1q_s8_x3_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t); + } +vst1q_s8_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s8_x3(a: *mut i8, b: int8x16x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v16i8.p0i8")] + fn vst1q_s8_x3_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); + } +vst1q_s8_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i16.v8i16")] + fn vst1q_s16_x3_(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t); + } +vst1q_s16_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s16_x3(a: *mut i16, b: int16x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v8i16.p0i16")] + fn vst1q_s16_x3_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i16); + } +vst1q_s16_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i32.v4i32")] + fn vst1q_s32_x3_(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t); + } +vst1q_s32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s32_x3(a: *mut i32, b: int32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4i32.p0i32")] + fn vst1q_s32_x3_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i32); + } +vst1q_s32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0i64.v2i64")] + fn vst1q_s64_x3_(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t); + } +vst1q_s64_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s64_x3(a: *mut i64, b: int64x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2i64.p0i64")] + fn vst1q_s64_x3_(a: int64x2_t, b: int64x2_t, c: int64x2_t, ptr: *mut i64); + } +vst1q_s64_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v8i8")] + fn vst1_s8_x4_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t); + } +vst1_s8_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s8_x4(a: *mut i8, b: int8x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v8i8.p0i8")] + fn vst1_s8_x4_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } +vst1_s8_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v4i16")] + fn vst1_s16_x4_(ptr: *mut i16, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t); + } +vst1_s16_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s16_x4(a: *mut i16, b: int16x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4i16.p0i16")] + fn vst1_s16_x4_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i16); + } +vst1_s16_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v2i32")] + fn vst1_s32_x4_(ptr: *mut i32, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t); + } +vst1_s32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s32_x4(a: *mut i32, b: int32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2i32.p0i32")] + fn vst1_s32_x4_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i32); + } +vst1_s32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v1i64")] + fn vst1_s64_x4_(ptr: *mut i64, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t); + } +vst1_s64_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_s64_x4(a: *mut i64, b: int64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v1i64.p0i64")] + fn vst1_s64_x4_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i64); + } +vst1_s64_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i8.v16i8")] + fn vst1q_s8_x4_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t); + } +vst1q_s8_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s8_x4(a: *mut i8, b: int8x16x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v16i8.p0i8")] + fn vst1q_s8_x4_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } +vst1q_s8_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i16.v8i16")] + fn vst1q_s16_x4_(ptr: *mut i16, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t); + } +vst1q_s16_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s16_x4(a: *mut i16, b: int16x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v8i16.p0i16")] + fn vst1q_s16_x4_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i16); + } +vst1q_s16_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i32.v4i32")] + fn vst1q_s32_x4_(ptr: *mut i32, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t); + } +vst1q_s32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s32_x4(a: *mut i32, b: int32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4i32.p0i32")] + fn vst1q_s32_x4_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i32); + } +vst1q_s32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0i64.v2i64")] + fn vst1q_s64_x4_(ptr: *mut i64, a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t); + } +vst1q_s64_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures from one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_s64_x4(a: *mut i64, b: int64x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2i64.p0i64")] + fn vst1q_s64_x4_(a: int64x2_t, b: int64x2_t, c: int64x2_t, d: int64x2_t, ptr: *mut i64); + } +vst1q_s64_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u8_x2(a: *mut u8, b: uint8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u16_x2(a: *mut u16, b: uint16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u32_x2(a: *mut u32, b: uint32x2x2_t) { + vst1_s32_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u64_x2(a: *mut u64, b: uint64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u8_x2(a: *mut u8, b: uint8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u16_x2(a: *mut u16, b: uint16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u32_x2(a: *mut u32, b: uint32x4x2_t) { + vst1q_s32_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u64_x2(a: *mut u64, b: uint64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u8_x3(a: *mut u8, b: uint8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u16_x3(a: *mut u16, b: uint16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u32_x3(a: *mut u32, b: uint32x2x3_t) { + vst1_s32_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u64_x3(a: *mut u64, b: uint64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u8_x3(a: *mut u8, b: uint8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u16_x3(a: *mut u16, b: uint16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u32_x3(a: *mut u32, b: uint32x4x3_t) { + vst1q_s32_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u64_x3(a: *mut u64, b: uint64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u8_x4(a: *mut u8, b: uint8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u16_x4(a: *mut u16, b: uint16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u32_x4(a: *mut u32, b: uint32x2x4_t) { + vst1_s32_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_u64_x4(a: *mut u64, b: uint64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u8_x4(a: *mut u8, b: uint8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u16_x4(a: *mut u16, b: uint16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u32_x4(a: *mut u32, b: uint32x4x4_t) { + vst1q_s32_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_u64_x4(a: *mut u64, b: uint64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p8_x2(a: *mut p8, b: poly8x8x2_t) { + vst1_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p8_x3(a: *mut p8, b: poly8x8x3_t) { + vst1_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p8_x4(a: *mut p8, b: poly8x8x4_t) { + vst1_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p8_x2(a: *mut p8, b: poly8x16x2_t) { + vst1q_s8_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p8_x3(a: *mut p8, b: poly8x16x3_t) { + vst1q_s8_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p8_x4(a: *mut p8, b: poly8x16x4_t) { + vst1q_s8_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p16_x2(a: *mut p16, b: poly16x4x2_t) { + vst1_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p16_x3(a: *mut p16, b: poly16x4x3_t) { + vst1_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p16_x4(a: *mut p16, b: poly16x4x4_t) { + vst1_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p16_x2(a: *mut p16, b: poly16x8x2_t) { + vst1q_s16_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p16_x3(a: *mut p16, b: poly16x8x3_t) { + vst1q_s16_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p16_x4(a: *mut p16, b: poly16x8x4_t) { + vst1q_s16_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p64_x2(a: *mut p64, b: poly64x1x2_t) { + vst1_s64_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p64_x3(a: *mut p64, b: poly64x1x3_t) { + vst1_s64_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1_p64_x4(a: *mut p64, b: poly64x1x4_t) { + vst1_s64_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p64_x2(a: *mut p64, b: poly64x2x2_t) { + vst1q_s64_x2(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p64_x3(a: *mut p64, b: poly64x2x3_t) { + vst1q_s64_x3(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st1))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst1q_p64_x4(a: *mut p64, b: poly64x2x4_t) { + vst1q_s64_x4(transmute(a), transmute(b)) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v2f32")] + fn vst1_f32_x2_(ptr: *mut f32, a: float32x2_t, b: float32x2_t); + } +vst1_f32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x2(a: *mut f32, b: float32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v2f32.p0f32")] + fn vst1_f32_x2_(a: float32x2_t, b: float32x2_t, ptr: *mut f32); + } +vst1_f32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x2.p0f32.v4f32")] + fn vst1q_f32_x2_(ptr: *mut f32, a: float32x4_t, b: float32x4_t); + } +vst1q_f32_x2_(a, b.0, b.1) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x2(a: *mut f32, b: float32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x2.v4f32.p0f32")] + fn vst1q_f32_x2_(a: float32x4_t, b: float32x4_t, ptr: *mut f32); + } +vst1q_f32_x2_(b.0, b.1, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v2f32")] + fn vst1_f32_x3_(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t); + } +vst1_f32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x3(a: *mut f32, b: float32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v2f32.p0f32")] + fn vst1_f32_x3_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut f32); + } +vst1_f32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x3.p0f32.v4f32")] + fn vst1q_f32_x3_(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t); + } +vst1q_f32_x3_(a, b.0, b.1, b.2) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x3(a: *mut f32, b: float32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x3.v4f32.p0f32")] + fn vst1q_f32_x3_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut f32); + } +vst1q_f32_x3_(b.0, b.1, b.2, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v2f32")] + fn vst1_f32_x4_(ptr: *mut f32, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t); + } +vst1_f32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1_f32_x4(a: *mut f32, b: float32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v2f32.p0f32")] + fn vst1_f32_x4_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut f32); + } +vst1_f32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst1))] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst1x4.p0f32.v4f32")] + fn vst1q_f32_x4_(ptr: *mut f32, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t); + } +vst1q_f32_x4_(a, b.0, b.1, b.2, b.3) +} + +/// Store multiple single-element structures to one, two, three, or four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st1))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst1q_f32_x4(a: *mut f32, b: float32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st1x4.v4f32.p0f32")] + fn vst1q_f32_x4_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut f32); + } +vst1q_f32_x4_(b.0, b.1, b.2, b.3, a) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i8")] + fn vst2_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, size: i32); + } +vst2_s8_(a as _, b.0, b.1, 1) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_s8(a: *mut i8, b: int8x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v8i8.p0i8")] + fn vst2_s8_(a: int8x8_t, b: int8x8_t, ptr: *mut i8); + } +vst2_s8_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i16")] + fn vst2_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, size: i32); + } +vst2_s16_(a as _, b.0, b.1, 2) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_s16(a: *mut i16, b: int16x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v4i16.p0i8")] + fn vst2_s16_(a: int16x4_t, b: int16x4_t, ptr: *mut i8); + } +vst2_s16_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2i32")] + fn vst2_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, size: i32); + } +vst2_s32_(a as _, b.0, b.1, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_s32(a: *mut i32, b: int32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2i32.p0i8")] + fn vst2_s32_(a: int32x2_t, b: int32x2_t, ptr: *mut i8); + } +vst2_s32_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v16i8")] + fn vst2q_s8_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, size: i32); + } +vst2q_s8_(a as _, b.0, b.1, 1) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_s8(a: *mut i8, b: int8x16x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v16i8.p0i8")] + fn vst2q_s8_(a: int8x16_t, b: int8x16_t, ptr: *mut i8); + } +vst2q_s8_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v8i16")] + fn vst2q_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, size: i32); + } +vst2q_s16_(a as _, b.0, b.1, 2) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_s16(a: *mut i16, b: int16x8x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v8i16.p0i8")] + fn vst2q_s16_(a: int16x8_t, b: int16x8_t, ptr: *mut i8); + } +vst2q_s16_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4i32")] + fn vst2q_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, size: i32); + } +vst2q_s32_(a as _, b.0, b.1, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_s32(a: *mut i32, b: int32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v4i32.p0i8")] + fn vst2q_s32_(a: int32x4_t, b: int32x4_t, ptr: *mut i8); + } +vst2q_s32_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v1i64")] + fn vst2_s64_(ptr: *mut i8, a: int64x1_t, b: int64x1_t, size: i32); + } +vst2_s64_(a as _, b.0, b.1, 8) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_s64(a: *mut i64, b: int64x1x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v1i64.p0i8")] + fn vst2_s64_(a: int64x1_t, b: int64x1_t, ptr: *mut i8); + } +vst2_s64_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_u8(a: *mut u8, b: uint8x8x2_t) { + transmute(vst2_s8(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_u16(a: *mut u16, b: uint16x4x2_t) { + transmute(vst2_s16(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_u32(a: *mut u32, b: uint32x2x2_t) { + transmute(vst2_s32(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_u8(a: *mut u8, b: uint8x16x2_t) { + transmute(vst2q_s8(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_u16(a: *mut u16, b: uint16x8x2_t) { + transmute(vst2q_s16(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_u32(a: *mut u32, b: uint32x4x2_t) { + transmute(vst2q_s32(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_p8(a: *mut p8, b: poly8x8x2_t) { + transmute(vst2_s8(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_p16(a: *mut p16, b: poly16x4x2_t) { + transmute(vst2_s16(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_p8(a: *mut p8, b: poly8x16x2_t) { + transmute(vst2q_s8(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_p16(a: *mut p16, b: poly16x8x2_t) { + transmute(vst2q_s16(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_u64(a: *mut u64, b: uint64x1x2_t) { + transmute(vst2_s64(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_p64(a: *mut p64, b: poly64x1x2_t) { + transmute(vst2_s64(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v2f32")] + fn vst2_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, size: i32); + } +vst2_f32_(a as _, b.0, b.1, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_f32(a: *mut f32, b: float32x2x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v2f32.p0i8")] + fn vst2_f32_(a: float32x2_t, b: float32x2_t, ptr: *mut i8); + } +vst2_f32_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2))] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2.p0i8.v4f32")] + fn vst2q_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, size: i32); + } +vst2q_f32_(a as _, b.0, b.1, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_f32(a: *mut f32, b: float32x4x2_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2.v4f32.p0i8")] + fn vst2q_f32_(a: float32x4_t, b: float32x4_t, ptr: *mut i8); + } +vst2q_f32_(b.0, b.1, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i8")] + fn vst2_lane_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, n: i32, size: i32); + } +vst2_lane_s8_(a as _, b.0, b.1, LANE, 1) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s8(a: *mut i8, b: int8x8x2_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v8i8.p0i8")] + fn vst2_lane_s8_(a: int8x8_t, b: int8x8_t, n: i64, ptr: *mut i8); + } +vst2_lane_s8_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i16")] + fn vst2_lane_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, n: i32, size: i32); + } +vst2_lane_s16_(a as _, b.0, b.1, LANE, 2) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s16(a: *mut i16, b: int16x4x2_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v4i16.p0i8")] + fn vst2_lane_s16_(a: int16x4_t, b: int16x4_t, n: i64, ptr: *mut i8); + } +vst2_lane_s16_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2i32")] + fn vst2_lane_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, n: i32, size: i32); + } +vst2_lane_s32_(a as _, b.0, b.1, LANE, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_s32(a: *mut i32, b: int32x2x2_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2i32.p0i8")] + fn vst2_lane_s32_(a: int32x2_t, b: int32x2_t, n: i64, ptr: *mut i8); + } +vst2_lane_s32_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v8i16")] + fn vst2q_lane_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, n: i32, size: i32); + } +vst2q_lane_s16_(a as _, b.0, b.1, LANE, 2) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s16(a: *mut i16, b: int16x8x2_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v8i16.p0i8")] + fn vst2q_lane_s16_(a: int16x8_t, b: int16x8_t, n: i64, ptr: *mut i8); + } +vst2q_lane_s16_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4i32")] + fn vst2q_lane_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, n: i32, size: i32); + } +vst2q_lane_s32_(a as _, b.0, b.1, LANE, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_s32(a: *mut i32, b: int32x4x2_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v4i32.p0i8")] + fn vst2q_lane_s32_(a: int32x4_t, b: int32x4_t, n: i64, ptr: *mut i8); + } +vst2q_lane_s32_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_lane_u8(a: *mut u8, b: uint8x8x2_t) { + static_assert_imm3!(LANE); + transmute(vst2_lane_s8::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_lane_u16(a: *mut u16, b: uint16x4x2_t) { + static_assert_imm2!(LANE); + transmute(vst2_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_lane_u32(a: *mut u32, b: uint32x2x2_t) { + static_assert_imm1!(LANE); + transmute(vst2_lane_s32::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_lane_u16(a: *mut u16, b: uint16x8x2_t) { + static_assert_imm3!(LANE); + transmute(vst2q_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_lane_u32(a: *mut u32, b: uint32x4x2_t) { + static_assert_imm2!(LANE); + transmute(vst2q_lane_s32::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_lane_p8(a: *mut p8, b: poly8x8x2_t) { + static_assert_imm3!(LANE); + transmute(vst2_lane_s8::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2_lane_p16(a: *mut p16, b: poly16x4x2_t) { + static_assert_imm2!(LANE); + transmute(vst2_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst2, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst2q_lane_p16(a: *mut p16, b: poly16x8x2_t) { + static_assert_imm3!(LANE); + transmute(vst2q_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v2f32")] + fn vst2_lane_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, n: i32, size: i32); + } +vst2_lane_f32_(a as _, b.0, b.1, LANE, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2_lane_f32(a: *mut f32, b: float32x2x2_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v2f32.p0i8")] + fn vst2_lane_f32_(a: float32x2_t, b: float32x2_t, n: i64, ptr: *mut i8); + } +vst2_lane_f32_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst2lane.p0i8.v4f32")] + fn vst2q_lane_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, n: i32, size: i32); + } +vst2q_lane_f32_(a as _, b.0, b.1, LANE, 4) +} + +/// Store multiple 2-element structures from two registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st2, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst2q_lane_f32(a: *mut f32, b: float32x4x2_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st2lane.v4f32.p0i8")] + fn vst2q_lane_f32_(a: float32x4_t, b: float32x4_t, n: i64, ptr: *mut i8); + } +vst2q_lane_f32_(b.0, b.1, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i8")] + fn vst3_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, size: i32); + } +vst3_s8_(a as _, b.0, b.1, b.2, 1) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_s8(a: *mut i8, b: int8x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v8i8.p0i8")] + fn vst3_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, ptr: *mut i8); + } +vst3_s8_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i16")] + fn vst3_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, size: i32); + } +vst3_s16_(a as _, b.0, b.1, b.2, 2) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_s16(a: *mut i16, b: int16x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v4i16.p0i8")] + fn vst3_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, ptr: *mut i8); + } +vst3_s16_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2i32")] + fn vst3_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, size: i32); + } +vst3_s32_(a as _, b.0, b.1, b.2, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_s32(a: *mut i32, b: int32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2i32.p0i8")] + fn vst3_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, ptr: *mut i8); + } +vst3_s32_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v16i8")] + fn vst3q_s8_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, size: i32); + } +vst3q_s8_(a as _, b.0, b.1, b.2, 1) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_s8(a: *mut i8, b: int8x16x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v16i8.p0i8")] + fn vst3q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, ptr: *mut i8); + } +vst3q_s8_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v8i16")] + fn vst3q_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, size: i32); + } +vst3q_s16_(a as _, b.0, b.1, b.2, 2) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_s16(a: *mut i16, b: int16x8x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v8i16.p0i8")] + fn vst3q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, ptr: *mut i8); + } +vst3q_s16_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4i32")] + fn vst3q_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, size: i32); + } +vst3q_s32_(a as _, b.0, b.1, b.2, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_s32(a: *mut i32, b: int32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v4i32.p0i8")] + fn vst3q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, ptr: *mut i8); + } +vst3q_s32_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v1i64")] + fn vst3_s64_(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, size: i32); + } +vst3_s64_(a as _, b.0, b.1, b.2, 8) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_s64(a: *mut i64, b: int64x1x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v1i64.p0i8")] + fn vst3_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, ptr: *mut i8); + } +vst3_s64_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_u8(a: *mut u8, b: uint8x8x3_t) { + transmute(vst3_s8(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_u16(a: *mut u16, b: uint16x4x3_t) { + transmute(vst3_s16(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_u32(a: *mut u32, b: uint32x2x3_t) { + transmute(vst3_s32(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_u8(a: *mut u8, b: uint8x16x3_t) { + transmute(vst3q_s8(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_u16(a: *mut u16, b: uint16x8x3_t) { + transmute(vst3q_s16(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_u32(a: *mut u32, b: uint32x4x3_t) { + transmute(vst3q_s32(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_p8(a: *mut p8, b: poly8x8x3_t) { + transmute(vst3_s8(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_p16(a: *mut p16, b: poly16x4x3_t) { + transmute(vst3_s16(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_p8(a: *mut p8, b: poly8x16x3_t) { + transmute(vst3q_s8(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_p16(a: *mut p16, b: poly16x8x3_t) { + transmute(vst3q_s16(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_u64(a: *mut u64, b: uint64x1x3_t) { + transmute(vst3_s64(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_p64(a: *mut p64, b: poly64x1x3_t) { + transmute(vst3_s64(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v2f32")] + fn vst3_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, size: i32); + } +vst3_f32_(a as _, b.0, b.1, b.2, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_f32(a: *mut f32, b: float32x2x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v2f32.p0i8")] + fn vst3_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, ptr: *mut i8); + } +vst3_f32_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3))] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3.p0i8.v4f32")] + fn vst3q_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, size: i32); + } +vst3q_f32_(a as _, b.0, b.1, b.2, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_f32(a: *mut f32, b: float32x4x3_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3.v4f32.p0i8")] + fn vst3q_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, ptr: *mut i8); + } +vst3q_f32_(b.0, b.1, b.2, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i8")] + fn vst3_lane_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i32, size: i32); + } +vst3_lane_s8_(a as _, b.0, b.1, b.2, LANE, 1) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s8(a: *mut i8, b: int8x8x3_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v8i8.p0i8")] + fn vst3_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, n: i64, ptr: *mut i8); + } +vst3_lane_s8_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i16")] + fn vst3_lane_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i32, size: i32); + } +vst3_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s16(a: *mut i16, b: int16x4x3_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v4i16.p0i8")] + fn vst3_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, n: i64, ptr: *mut i8); + } +vst3_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2i32")] + fn vst3_lane_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i32, size: i32); + } +vst3_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_s32(a: *mut i32, b: int32x2x3_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2i32.p0i8")] + fn vst3_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, n: i64, ptr: *mut i8); + } +vst3_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v8i16")] + fn vst3q_lane_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i32, size: i32); + } +vst3q_lane_s16_(a as _, b.0, b.1, b.2, LANE, 2) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s16(a: *mut i16, b: int16x8x3_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v8i16.p0i8")] + fn vst3q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, n: i64, ptr: *mut i8); + } +vst3q_lane_s16_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4i32")] + fn vst3q_lane_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i32, size: i32); + } +vst3q_lane_s32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_s32(a: *mut i32, b: int32x4x3_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v4i32.p0i8")] + fn vst3q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, n: i64, ptr: *mut i8); + } +vst3q_lane_s32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_lane_u8(a: *mut u8, b: uint8x8x3_t) { + static_assert_imm3!(LANE); + transmute(vst3_lane_s8::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_lane_u16(a: *mut u16, b: uint16x4x3_t) { + static_assert_imm2!(LANE); + transmute(vst3_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_lane_u32(a: *mut u32, b: uint32x2x3_t) { + static_assert_imm1!(LANE); + transmute(vst3_lane_s32::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_lane_u16(a: *mut u16, b: uint16x8x3_t) { + static_assert_imm3!(LANE); + transmute(vst3q_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_lane_u32(a: *mut u32, b: uint32x4x3_t) { + static_assert_imm2!(LANE); + transmute(vst3q_lane_s32::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_lane_p8(a: *mut p8, b: poly8x8x3_t) { + static_assert_imm3!(LANE); + transmute(vst3_lane_s8::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3_lane_p16(a: *mut p16, b: poly16x4x3_t) { + static_assert_imm2!(LANE); + transmute(vst3_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst3, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst3q_lane_p16(a: *mut p16, b: poly16x8x3_t) { + static_assert_imm3!(LANE); + transmute(vst3q_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v2f32")] + fn vst3_lane_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i32, size: i32); + } +vst3_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3_lane_f32(a: *mut f32, b: float32x2x3_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v2f32.p0i8")] + fn vst3_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, n: i64, ptr: *mut i8); + } +vst3_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst3lane.p0i8.v4f32")] + fn vst3q_lane_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i32, size: i32); + } +vst3q_lane_f32_(a as _, b.0, b.1, b.2, LANE, 4) +} + +/// Store multiple 3-element structures from three registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st3, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst3q_lane_f32(a: *mut f32, b: float32x4x3_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st3lane.v4f32.p0i8")] + fn vst3q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, n: i64, ptr: *mut i8); + } +vst3q_lane_f32_(b.0, b.1, b.2, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i8")] + fn vst4_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, size: i32); + } +vst4_s8_(a as _, b.0, b.1, b.2, b.3, 1) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_s8(a: *mut i8, b: int8x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v8i8.p0i8")] + fn vst4_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, ptr: *mut i8); + } +vst4_s8_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i16")] + fn vst4_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, size: i32); + } +vst4_s16_(a as _, b.0, b.1, b.2, b.3, 2) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_s16(a: *mut i16, b: int16x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v4i16.p0i8")] + fn vst4_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, ptr: *mut i8); + } +vst4_s16_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2i32")] + fn vst4_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, size: i32); + } +vst4_s32_(a as _, b.0, b.1, b.2, b.3, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_s32(a: *mut i32, b: int32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2i32.p0i8")] + fn vst4_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, ptr: *mut i8); + } +vst4_s32_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v16i8")] + fn vst4q_s8_(ptr: *mut i8, a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, size: i32); + } +vst4q_s8_(a as _, b.0, b.1, b.2, b.3, 1) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_s8(a: *mut i8, b: int8x16x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v16i8.p0i8")] + fn vst4q_s8_(a: int8x16_t, b: int8x16_t, c: int8x16_t, d: int8x16_t, ptr: *mut i8); + } +vst4q_s8_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v8i16")] + fn vst4q_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, size: i32); + } +vst4q_s16_(a as _, b.0, b.1, b.2, b.3, 2) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_s16(a: *mut i16, b: int16x8x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v8i16.p0i8")] + fn vst4q_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, ptr: *mut i8); + } +vst4q_s16_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4i32")] + fn vst4q_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, size: i32); + } +vst4q_s32_(a as _, b.0, b.1, b.2, b.3, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_s32(a: *mut i32, b: int32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v4i32.p0i8")] + fn vst4q_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, ptr: *mut i8); + } +vst4q_s32_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(nop))] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v1i64")] + fn vst4_s64_(ptr: *mut i8, a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, size: i32); + } +vst4_s64_(a as _, b.0, b.1, b.2, b.3, 8) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(nop))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_s64(a: *mut i64, b: int64x1x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v1i64.p0i8")] + fn vst4_s64_(a: int64x1_t, b: int64x1_t, c: int64x1_t, d: int64x1_t, ptr: *mut i8); + } +vst4_s64_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_u8(a: *mut u8, b: uint8x8x4_t) { + transmute(vst4_s8(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_u16(a: *mut u16, b: uint16x4x4_t) { + transmute(vst4_s16(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_u32(a: *mut u32, b: uint32x2x4_t) { + transmute(vst4_s32(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_u8(a: *mut u8, b: uint8x16x4_t) { + transmute(vst4q_s8(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_u16(a: *mut u16, b: uint16x8x4_t) { + transmute(vst4q_s16(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_u32(a: *mut u32, b: uint32x4x4_t) { + transmute(vst4q_s32(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_p8(a: *mut p8, b: poly8x8x4_t) { + transmute(vst4_s8(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_p16(a: *mut p16, b: poly16x4x4_t) { + transmute(vst4_s16(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_p8(a: *mut p8, b: poly8x16x4_t) { + transmute(vst4q_s8(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_p16(a: *mut p16, b: poly16x8x4_t) { + transmute(vst4q_s16(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_u64(a: *mut u64, b: uint64x1x4_t) { + transmute(vst4_s64(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_p64(a: *mut p64, b: poly64x1x4_t) { + transmute(vst4_s64(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v2f32")] + fn vst4_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, size: i32); + } +vst4_f32_(a as _, b.0, b.1, b.2, b.3, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_f32(a: *mut f32, b: float32x2x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v2f32.p0i8")] + fn vst4_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, ptr: *mut i8); + } +vst4_f32_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4))] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4.p0i8.v4f32")] + fn vst4q_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, size: i32); + } +vst4q_f32_(a as _, b.0, b.1, b.2, b.3, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4))] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_f32(a: *mut f32, b: float32x4x4_t) { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4.v4f32.p0i8")] + fn vst4q_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, ptr: *mut i8); + } +vst4q_f32_(b.0, b.1, b.2, b.3, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i8")] + fn vst4_lane_s8_(ptr: *mut i8, a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i32, size: i32); + } +vst4_lane_s8_(a as _, b.0, b.1, b.2, b.3, LANE, 1) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s8(a: *mut i8, b: int8x8x4_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v8i8.p0i8")] + fn vst4_lane_s8_(a: int8x8_t, b: int8x8_t, c: int8x8_t, d: int8x8_t, n: i64, ptr: *mut i8); + } +vst4_lane_s8_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i16")] + fn vst4_lane_s16_(ptr: *mut i8, a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i32, size: i32); + } +vst4_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s16(a: *mut i16, b: int16x4x4_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v4i16.p0i8")] + fn vst4_lane_s16_(a: int16x4_t, b: int16x4_t, c: int16x4_t, d: int16x4_t, n: i64, ptr: *mut i8); + } +vst4_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2i32")] + fn vst4_lane_s32_(ptr: *mut i8, a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i32, size: i32); + } +vst4_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_s32(a: *mut i32, b: int32x2x4_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2i32.p0i8")] + fn vst4_lane_s32_(a: int32x2_t, b: int32x2_t, c: int32x2_t, d: int32x2_t, n: i64, ptr: *mut i8); + } +vst4_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v8i16")] + fn vst4q_lane_s16_(ptr: *mut i8, a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i32, size: i32); + } +vst4q_lane_s16_(a as _, b.0, b.1, b.2, b.3, LANE, 2) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_s16(a: *mut i16, b: int16x8x4_t) { + static_assert_imm3!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v8i16.p0i8")] + fn vst4q_lane_s16_(a: int16x8_t, b: int16x8_t, c: int16x8_t, d: int16x8_t, n: i64, ptr: *mut i8); + } +vst4q_lane_s16_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4i32")] + fn vst4q_lane_s32_(ptr: *mut i8, a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i32, size: i32); + } +vst4q_lane_s32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_s32(a: *mut i32, b: int32x4x4_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v4i32.p0i8")] + fn vst4q_lane_s32_(a: int32x4_t, b: int32x4_t, c: int32x4_t, d: int32x4_t, n: i64, ptr: *mut i8); + } +vst4q_lane_s32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_lane_u8(a: *mut u8, b: uint8x8x4_t) { + static_assert_imm3!(LANE); + transmute(vst4_lane_s8::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_lane_u16(a: *mut u16, b: uint16x4x4_t) { + static_assert_imm2!(LANE); + transmute(vst4_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_lane_u32(a: *mut u32, b: uint32x2x4_t) { + static_assert_imm1!(LANE); + transmute(vst4_lane_s32::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_lane_u16(a: *mut u16, b: uint16x8x4_t) { + static_assert_imm3!(LANE); + transmute(vst4q_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_lane_u32(a: *mut u32, b: uint32x4x4_t) { + static_assert_imm2!(LANE); + transmute(vst4q_lane_s32::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_lane_p8(a: *mut p8, b: poly8x8x4_t) { + static_assert_imm3!(LANE); + transmute(vst4_lane_s8::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4_lane_p16(a: *mut p16, b: poly16x4x4_t) { + static_assert_imm2!(LANE); + transmute(vst4_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vst4, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vst4q_lane_p16(a: *mut p16, b: poly16x8x4_t) { + static_assert_imm3!(LANE); + transmute(vst4q_lane_s16::(transmute(a), transmute(b))) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v2f32")] + fn vst4_lane_f32_(ptr: *mut i8, a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i32, size: i32); + } +vst4_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4_lane_f32(a: *mut f32, b: float32x2x4_t) { + static_assert_imm1!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v2f32.p0i8")] + fn vst4_lane_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t, d: float32x2_t, n: i64, ptr: *mut i8); + } +vst4_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vst4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vst4lane.p0i8.v4f32")] + fn vst4q_lane_f32_(ptr: *mut i8, a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i32, size: i32); + } +vst4q_lane_f32_(a as _, b.0, b.1, b.2, b.3, LANE, 4) +} + +/// Store multiple 4-element structures from four registers +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(st4, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vst4q_lane_f32(a: *mut f32, b: float32x4x4_t) { + static_assert_imm2!(LANE); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.st4lane.v4f32.p0i8")] + fn vst4q_lane_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t, d: float32x4_t, n: i64, ptr: *mut i8); + } +vst4q_lane_f32_(b.0, b.1, b.2, b.3, LANE as i64, a as _) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_mul(a, b) +} + +/// Polynomial multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v8i8")] + fn vmul_p8_(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t; + } +vmul_p8_(a, b) +} + +/// Polynomial multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulp.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmul.v16i8")] + fn vmulq_p8_(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t; + } +vmulq_p8_(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_mul(a, b) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmul.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_mul(a, b) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + simd_mul(a, vdup_n_s16(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + simd_mul(a, vdupq_n_s16(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + simd_mul(a, vdup_n_s32(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + simd_mul(a, vdupq_n_s32(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_n_u16(a: uint16x4_t, b: u16) -> uint16x4_t { + simd_mul(a, vdup_n_u16(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_n_u16(a: uint16x8_t, b: u16) -> uint16x8_t { + simd_mul(a, vdupq_n_u16(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_n_u32(a: uint32x2_t, b: u32) -> uint32x2_t { + simd_mul(a, vdup_n_u32(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_n_u32(a: uint32x4_t, b: u32) -> uint32x4_t { + simd_mul(a, vdupq_n_u32(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_n_f32(a: float32x2_t, b: f32) -> float32x2_t { + simd_mul(a, vdup_n_f32(b)) +} + +/// Vector multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_n_f32(a: float32x4_t, b: f32) -> float32x4_t { + simd_mul(a, vdupq_n_f32(b)) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t { + static_assert_imm3!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_lane_u16(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_laneq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + simd_mul(a, simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_lane_u32(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t { + static_assert_imm1!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mul, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_laneq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Floating-point multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_lane_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Floating-point multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmul_laneq_f32(a: float32x2_t, b: float32x4_t) -> float32x2_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Floating-point multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_lane_f32(a: float32x4_t, b: float32x2_t) -> float32x4_t { + static_assert_imm1!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Floating-point multiply +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmul, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmul, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmulq_laneq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + simd_mul(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Signed multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v8i8")] + fn vmull_s8_(a: int8x8_t, b: int8x8_t) -> int16x8_t; + } +vmull_s8_(a, b) +} + +/// Signed multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v4i16")] + fn vmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } +vmull_s16_(a, b) +} + +/// Signed multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmulls.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smull.v2i32")] + fn vmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } +vmull_s32_(a, b) +} + +/// Unsigned multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v8i8")] + fn vmull_u8_(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t; + } +vmull_u8_(a, b) +} + +/// Unsigned multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v4i16")] + fn vmull_u16_(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t; + } +vmull_u16_(a, b) +} + +/// Unsigned multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umull.v2i32")] + fn vmull_u32_(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t; + } +vmull_u32_(a, b) +} + +/// Polynomial multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmull.p8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(pmull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_p8(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmullp.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.pmull.v8i8")] + fn vmull_p8_(a: poly8x8_t, b: poly8x8_t) -> poly16x8_t; + } +vmull_p8_(a, b) +} + +/// Vector long multiply with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vmull_s16(a, vdup_n_s16(b)) +} + +/// Vector long multiply with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vmull_s32(a, vdup_n_s32(b)) +} + +/// Vector long multiply with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_n_u16(a: uint16x4_t, b: u16) -> uint32x4_t { + vmull_u16(a, vdup_n_u16(b)) +} + +/// Vector long multiply with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_n_u32(a: uint32x2_t, b: u32) -> uint64x2_t { + vmull_u32(a, vdup_n_u32(b)) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vmull_s16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_laneq_s16(a: int16x4_t, b: int16x8_t) -> int32x4_t { + static_assert_imm3!(LANE); + vmull_s16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + vmull_s32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_laneq_s32(a: int32x2_t, b: int32x4_t) -> int64x2_t { + static_assert_imm2!(LANE); + vmull_s32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_lane_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + vmull_u16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_laneq_u16(a: uint16x4_t, b: uint16x8_t) -> uint32x4_t { + static_assert_imm3!(LANE); + vmull_u16(a, simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_lane_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + vmull_u32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Vector long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmull, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umull, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmull_laneq_u32(a: uint32x2_t, b: uint32x4_t) -> uint64x2_t { + static_assert_imm2!(LANE); + vmull_u32(a, simd_shuffle2!(b, b, [LANE as u32, LANE as u32])) +} + +/// Floating-point fused Multiply-Add to accumulator(vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfma_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v2f32")] + fn vfma_f32_(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t; + } +vfma_f32_(b, c, a) +} + +/// Floating-point fused Multiply-Add to accumulator(vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfmaq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.fma.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.fma.v4f32")] + fn vfmaq_f32_(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t; + } +vfmaq_f32_(b, c, a) +} + +/// Floating-point fused Multiply-Add to accumulator(vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfma_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfma_f32(a, b, vdup_n_f32_vfp4(c)) +} + +/// Floating-point fused Multiply-Add to accumulator(vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfma))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmla))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfmaq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmaq_f32(a, b, vdupq_n_f32_vfp4(c)) +} + +/// Floating-point fused multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfms_f32(a: float32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + let b: float32x2_t = simd_neg(b); + vfma_f32(a, b, c) +} + +/// Floating-point fused multiply-subtract from accumulator +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfmsq_f32(a: float32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + let b: float32x4_t = simd_neg(b); + vfmaq_f32(a, b, c) +} + +/// Floating-point fused Multiply-subtract to accumulator(vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfms_n_f32(a: float32x2_t, b: float32x2_t, c: f32) -> float32x2_t { + vfms_f32(a, b, vdup_n_f32_vfp4(c)) +} + +/// Floating-point fused Multiply-subtract to accumulator(vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vfms))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmls))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vfmsq_n_f32(a: float32x4_t, b: float32x4_t, c: f32) -> float32x4_t { + vfmsq_f32(a, b, vdupq_n_f32_vfp4(c)) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.i64"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsub_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_sub(a, b) +} + +/// Subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vsub.f32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_sub(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vadd_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8_t { + simd_xor(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vadd_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4_t { + simd_xor(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vaddq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16_t { + simd_xor(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vaddq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8_t { + simd_xor(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vadd_p64(a: poly64x1_t, b: poly64x1_t) -> poly64x1_t { + simd_xor(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vaddq_p64(a: poly64x2_t, b: poly64x2_t) -> poly64x2_t { + simd_xor(a, b) +} + +/// Bitwise exclusive OR +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vaddq_p128(a: p128, b: p128) -> p128 { + a ^ b +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + let c: i16x8 = i16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + let c: i32x4 = i32x4::new(16, 16, 16, 16); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + let c: i64x2 = i64x2::new(32, 32); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + let c: u16x8 = u16x8::new(8, 8, 8, 8, 8, 8, 8, 8); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + let c: u32x4 = u32x4::new(16, 16, 16, 16); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + let c: u64x2 = u64x2::new(32, 32); + simd_cast(simd_shr(simd_sub(a, b), transmute(c))) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_high_s16(a: int8x8_t, b: int16x8_t, c: int16x8_t) -> int8x16_t { + let d: int8x8_t = vsubhn_s16(b, c); + simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_high_s32(a: int16x4_t, b: int32x4_t, c: int32x4_t) -> int16x8_t { + let d: int16x4_t = vsubhn_s32(b, c); + simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_high_s64(a: int32x2_t, b: int64x2_t, c: int64x2_t) -> int32x4_t { + let d: int32x2_t = vsubhn_s64(b, c); + simd_shuffle4!(a, d, [0, 1, 2, 3]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_high_u16(a: uint8x8_t, b: uint16x8_t, c: uint16x8_t) -> uint8x16_t { + let d: uint8x8_t = vsubhn_u16(b, c); + simd_shuffle16!(a, d, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_high_u32(a: uint16x4_t, b: uint32x4_t, c: uint32x4_t) -> uint16x8_t { + let d: uint16x4_t = vsubhn_u32(b, c); + simd_shuffle8!(a, d, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(subhn2))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubhn_high_u64(a: uint32x2_t, b: uint64x2_t, c: uint64x2_t) -> uint32x4_t { + let d: uint32x2_t = vsubhn_u64(b, c); + simd_shuffle4!(a, d, [0, 1, 2, 3]) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsub_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i8")] + fn vhsub_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vhsub_u8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsubq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v16i8")] + fn vhsubq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vhsubq_u8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsub_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i16")] + fn vhsub_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vhsub_u16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsubq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v8i16")] + fn vhsubq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vhsubq_u16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsub_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v2i32")] + fn vhsub_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vhsub_u32_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uhsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsubq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uhsub.v4i32")] + fn vhsubq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vhsubq_u32_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsub_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i8")] + fn vhsub_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vhsub_s8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsubq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v16i8")] + fn vhsubq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vhsubq_s8_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsub_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i16")] + fn vhsub_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vhsub_s16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsubq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v8i16")] + fn vhsubq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vhsubq_s16_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsub_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v2i32")] + fn vhsub_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vhsub_s32_(a, b) +} + +/// Signed halving subtract +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vhsub.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shsub))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vhsubq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vhsubs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.shsub.v4i32")] + fn vhsubq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vhsubq_s32_(a, b) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + simd_sub(a, simd_cast(b)) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + simd_sub(a, simd_cast(b)) +} + +/// Signed Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubw))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + simd_sub(a, simd_cast(b)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + simd_sub(a, simd_cast(b)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + simd_sub(a, simd_cast(b)) +} + +/// Unsigned Subtract Wide +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubw))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + simd_sub(a, simd_cast(b)) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + let c: int16x8_t = simd_cast(a); + let d: int16x8_t = simd_cast(b); + simd_sub(c, d) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + let c: int32x4_t = simd_cast(a); + let d: int32x4_t = simd_cast(b); + simd_sub(c, d) +} + +/// Signed Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssubl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + let c: int64x2_t = simd_cast(a); + let d: int64x2_t = simd_cast(b); + simd_sub(c, d) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + let c: uint16x8_t = simd_cast(a); + let d: uint16x8_t = simd_cast(b); + simd_sub(c, d) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + let c: uint32x4_t = simd_cast(a); + let d: uint32x4_t = simd_cast(b); + simd_sub(c, d) +} + +/// Unsigned Subtract Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsubl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usubl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsubl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + let c: uint64x2_t = simd_cast(a); + let d: uint64x2_t = simd_cast(b); + simd_sub(c, d) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i8")] + fn vmax_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vmax_s8_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v16i8")] + fn vmaxq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vmaxq_s8_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i16")] + fn vmax_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vmax_s16_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v8i16")] + fn vmaxq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vmaxq_s16_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v2i32")] + fn vmax_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vmax_s32_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smax.v4i32")] + fn vmaxq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vmaxq_s32_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i8")] + fn vmax_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vmax_u8_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v16i8")] + fn vmaxq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vmaxq_u8_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i16")] + fn vmax_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vmax_u16_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v8i16")] + fn vmaxq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vmaxq_u16_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v2i32")] + fn vmax_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vmax_u32_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umax.v4i32")] + fn vmaxq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vmaxq_u32_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v2f32")] + fn vmax_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vmax_f32_(a, b) +} + +/// Maximum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmax))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxs.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmax.v4f32")] + fn vmaxq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vmaxq_f32_(a, b) +} + +/// Floating-point Maximum Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v2f32")] + fn vmaxnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vmaxnm_f32_(a, b) +} + +/// Floating-point Maximum Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmaxnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxnm))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmaxnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmaxnm.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxnm.v4f32")] + fn vmaxnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vmaxnmq_f32_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i8")] + fn vmin_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vmin_s8_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v16i8")] + fn vminq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vminq_s8_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i16")] + fn vmin_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vmin_s16_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v8i16")] + fn vminq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vminq_s16_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v2i32")] + fn vmin_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vmin_s32_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smin.v4i32")] + fn vminq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vminq_s32_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i8")] + fn vmin_u8_(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + } +vmin_u8_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v16i8")] + fn vminq_u8_(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t; + } +vminq_u8_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i16")] + fn vmin_u16_(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + } +vmin_u16_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v8i16")] + fn vminq_u16_(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t; + } +vminq_u16_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v2i32")] + fn vmin_u32_(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + } +vmin_u32_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umin.v4i32")] + fn vminq_u32_(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t; + } +vminq_u32_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v2f32")] + fn vmin_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vmin_f32_(a, b) +} + +/// Minimum (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmin))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vmins.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmin.v4f32")] + fn vminq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vminq_f32_(a, b) +} + +/// Floating-point Minimum Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminnm_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v2f32")] + fn vminnm_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vminnm_f32_(a, b) +} + +/// Floating-point Minimum Number (vector) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "fp-armv8,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vminnm))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminnm))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vminnmq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vminnm.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminnm.v4f32")] + fn vminnmq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vminnmq_f32_(a, b) +} + +/// Floating-point add pairwise +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(faddp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vpadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.faddp.v2f32")] + fn vpadd_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vpadd_f32_(a, b) +} + +/// Signed saturating doubling multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmull_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v4i32")] + fn vqdmull_s16_(a: int16x4_t, b: int16x4_t) -> int32x4_t; + } +vqdmull_s16_(a, b) +} + +/// Signed saturating doubling multiply long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmull_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmull.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmull.v2i64")] + fn vqdmull_s32_(a: int32x2_t, b: int32x2_t) -> int64x2_t; + } +vqdmull_s32_(a, b) +} + +/// Vector saturating doubling long multiply with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmull_n_s16(a: int16x4_t, b: i16) -> int32x4_t { + vqdmull_s16(a, vdup_n_s16(b)) +} + +/// Vector saturating doubling long multiply with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmull_n_s32(a: int32x2_t, b: i32) -> int64x2_t { + vqdmull_s32(a, vdup_n_s32(b)) +} + +/// Vector saturating doubling long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmull_lane_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + static_assert_imm2!(N); + let b: int16x4_t = simd_shuffle4!(b, b, [N as u32, N as u32, N as u32, N as u32]); + vqdmull_s16(a, b) +} + +/// Vector saturating doubling long multiply by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmull, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmull, N = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmull_lane_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + static_assert_imm1!(N); + let b: int32x2_t = simd_shuffle2!(b, b, [N as u32, N as u32]); + vqdmull_s32(a, b) +} + +/// Signed saturating doubling multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqaddq_s32(a, vqdmull_s16(b, c)) +} + +/// Signed saturating doubling multiply-add long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqaddq_s64(a, vqdmull_s32(b, c)) +} + +/// Vector widening saturating doubling multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlal_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqaddq_s32(a, vqdmull_n_s16(b, c)) +} + +/// Vector widening saturating doubling multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlal_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqaddq_s64(a, vqdmull_n_s32(b, c)) +} + +/// Vector widening saturating doubling multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 2))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlal_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(N); + vqaddq_s32(a, vqdmull_lane_s16::(b, c)) +} + +/// Vector widening saturating doubling multiply accumulate with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlal, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlal, N = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlal_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(N); + vqaddq_s64(a, vqdmull_lane_s32::(b, c)) +} + +/// Signed saturating doubling multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlsl_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + vqsubq_s32(a, vqdmull_s16(b, c)) +} + +/// Signed saturating doubling multiply-subtract long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlsl_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + vqsubq_s64(a, vqdmull_s32(b, c)) +} + +/// Vector widening saturating doubling multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlsl_n_s16(a: int32x4_t, b: int16x4_t, c: i16) -> int32x4_t { + vqsubq_s32(a, vqdmull_n_s16(b, c)) +} + +/// Vector widening saturating doubling multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlsl_n_s32(a: int64x2_t, b: int32x2_t, c: i32) -> int64x2_t { + vqsubq_s64(a, vqdmull_n_s32(b, c)) +} + +/// Vector widening saturating doubling multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 2))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlsl_lane_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + static_assert_imm2!(N); + vqsubq_s32(a, vqdmull_lane_s16::(b, c)) +} + +/// Vector widening saturating doubling multiply subtract with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmlsl, N = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmlsl, N = 1))] +#[rustc_legacy_const_generics(3)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmlsl_lane_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + static_assert_imm1!(N); + vqsubq_s64(a, vqdmull_lane_s32::(b, c)) +} + +/// Signed saturating doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i16")] + fn vqdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqdmulh_s16_(a, b) +} + +/// Signed saturating doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v8i16")] + fn vqdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqdmulhq_s16_(a, b) +} + +/// Signed saturating doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v2i32")] + fn vqdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqdmulh_s32_(a, b) +} + +/// Signed saturating doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqdmulh.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqdmulh.v4i32")] + fn vqdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqdmulhq_s32_(a, b) +} + +/// Vector saturating doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + let b: int16x4_t = vdup_n_s16(b); + vqdmulh_s16(a, b) +} + +/// Vector saturating doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + let b: int32x2_t = vdup_n_s32(b); + vqdmulh_s32(a, b) +} + +/// Vector saturating doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + let b: int16x8_t = vdupq_n_s16(b); + vqdmulhq_s16(a, b) +} + +/// Vector saturating doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + let b: int32x4_t = vdupq_n_s32(b); + vqdmulhq_s32(a, b) +} + +/// Vector saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + vqdmulhq_s16(a, vdupq_n_s16(simd_extract(b, LANE as u32))) +} + +/// Vector saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + vqdmulh_s16(a, vdup_n_s16(simd_extract(b, LANE as u32))) +} + +/// Vector saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + vqdmulhq_s32(a, vdupq_n_s32(simd_extract(b, LANE as u32))) +} + +/// Vector saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqdmulh, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqdmulh, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + vqdmulh_s32(a, vdup_n_s32(simd_extract(b, LANE as u32))) +} + +/// Signed saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovn_s16(a: int16x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v8i8")] + fn vqmovn_s16_(a: int16x8_t) -> int8x8_t; + } +vqmovn_s16_(a) +} + +/// Signed saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovn_s32(a: int32x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v4i16")] + fn vqmovn_s32_(a: int32x4_t) -> int16x4_t; + } +vqmovn_s32_(a) +} + +/// Signed saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovn_s64(a: int64x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovns.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtn.v2i32")] + fn vqmovn_s64_(a: int64x2_t) -> int32x2_t; + } +vqmovn_s64_(a) +} + +/// Unsigned saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovn_u16(a: uint16x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v8i8")] + fn vqmovn_u16_(a: uint16x8_t) -> uint8x8_t; + } +vqmovn_u16_(a) +} + +/// Unsigned saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovn_u32(a: uint32x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v4i16")] + fn vqmovn_u32_(a: uint32x4_t) -> uint16x4_t; + } +vqmovn_u32_(a) +} + +/// Unsigned saturating extract narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqxtn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovn_u64(a: uint64x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqxtn.v2i32")] + fn vqmovn_u64_(a: uint64x2_t) -> uint32x2_t; + } +vqmovn_u64_(a) +} + +/// Signed saturating extract unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovun_s16(a: int16x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v8i8")] + fn vqmovun_s16_(a: int16x8_t) -> uint8x8_t; + } +vqmovun_s16_(a) +} + +/// Signed saturating extract unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovun_s32(a: int32x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v4i16")] + fn vqmovun_s32_(a: int32x4_t) -> uint16x4_t; + } +vqmovun_s32_(a) +} + +/// Signed saturating extract unsigned narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqmovun))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqxtun))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqmovun_s64(a: int64x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqmovnsu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqxtun.v2i32")] + fn vqmovun_s64_(a: int64x2_t) -> uint32x2_t; + } +vqmovun_s64_(a) +} + +/// Signed saturating rounding doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i16")] + fn vqrdmulh_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqrdmulh_s16_(a, b) +} + +/// Signed saturating rounding doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v8i16")] + fn vqrdmulhq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqrdmulhq_s16_(a, b) +} + +/// Signed saturating rounding doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v2i32")] + fn vqrdmulh_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqrdmulh_s32_(a, b) +} + +/// Signed saturating rounding doubling multiply returning high half +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrdmulh.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrdmulh.v4i32")] + fn vqrdmulhq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqrdmulhq_s32_(a, b) +} + +/// Vector saturating rounding doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_n_s16(a: int16x4_t, b: i16) -> int16x4_t { + vqrdmulh_s16(a, vdup_n_s16(b)) +} + +/// Vector saturating rounding doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_n_s16(a: int16x8_t, b: i16) -> int16x8_t { + vqrdmulhq_s16(a, vdupq_n_s16(b)) +} + +/// Vector saturating rounding doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_n_s32(a: int32x2_t, b: i32) -> int32x2_t { + vqrdmulh_s32(a, vdup_n_s32(b)) +} + +/// Vector saturating rounding doubling multiply high with scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_n_s32(a: int32x4_t, b: i32) -> int32x4_t { + vqrdmulhq_s32(a, vdupq_n_s32(b)) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_lane_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + let b: int16x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulh_s16(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_laneq_s16(a: int16x4_t, b: int16x8_t) -> int16x4_t { + static_assert_imm3!(LANE); + let b: int16x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulh_s16(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_lane_s16(a: int16x8_t, b: int16x4_t) -> int16x8_t { + static_assert_imm2!(LANE); + let b: int16x8_t = simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulhq_s16(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_laneq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + let b: int16x8_t = simd_shuffle8!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulhq_s16(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_lane_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + let b: int32x2_t = simd_shuffle2!(b, b, [LANE as u32, LANE as u32]); + vqrdmulh_s32(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulh_laneq_s32(a: int32x2_t, b: int32x4_t) -> int32x2_t { + static_assert_imm2!(LANE); + let b: int32x2_t = simd_shuffle2!(b, b, [LANE as u32, LANE as u32]); + vqrdmulh_s32(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_lane_s32(a: int32x4_t, b: int32x2_t) -> int32x4_t { + static_assert_imm1!(LANE); + let b: int32x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulhq_s32(a, b) +} + +/// Vector rounding saturating doubling multiply high by scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrdmulh, LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrdmulh, LANE = 1))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrdmulhq_laneq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + let b: int32x4_t = simd_shuffle4!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]); + vqrdmulhq_s32(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i8")] + fn vqrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqrshl_s8_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v16i8")] + fn vqrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqrshlq_s8_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i16")] + fn vqrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqrshl_s16_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v8i16")] + fn vqrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqrshlq_s16_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i32")] + fn vqrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqrshl_s32_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v4i32")] + fn vqrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqrshlq_s32_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v1i64")] + fn vqrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqrshl_s64_(a, b) +} + +/// Signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshl.v2i64")] + fn vqrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqrshlq_s64_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i8")] + fn vqrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vqrshl_u8_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v16i8")] + fn vqrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vqrshlq_u8_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i16")] + fn vqrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vqrshl_u16_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v8i16")] + fn vqrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vqrshlq_u16_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i32")] + fn vqrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vqrshl_u32_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v4i32")] + fn vqrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vqrshlq_u32_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v1i64")] + fn vqrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vqrshl_u64_(a, b) +} + +/// Unsigned signed saturating rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqrshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshl.v2i64")] + fn vqrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vqrshlq_u64_(a, b) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v8i8")] + fn vqrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vqrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v8i8")] + fn vqrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vqrshrn_n_s16_(a, N) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v4i16")] + fn vqrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vqrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v4i16")] + fn vqrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vqrshrn_n_s32_(a, N) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftns.v2i32")] + fn vqrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vqrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrn.v2i32")] + fn vqrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vqrshrn_n_s64_(a, N) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v8i8")] + fn vqrshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } +vqrshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v8i8")] + fn vqrshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; + } +vqrshrn_n_u16_(a, N) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v4i16")] + fn vqrshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } +vqrshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v4i16")] + fn vqrshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; + } +vqrshrn_n_u32_(a, N) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnu.v2i32")] + fn vqrshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } +vqrshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) +} + +/// Unsigned signed saturating rounded shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqrshrn.v2i32")] + fn vqrshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; + } +vqrshrn_n_u64_(a, N) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v8i8")] + fn vqrshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } +vqrshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v8i8")] + fn vqrshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; + } +vqrshrun_n_s16_(a, N) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v4i16")] + fn vqrshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } +vqrshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v4i16")] + fn vqrshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + } +vqrshrun_n_s32_(a, N) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqrshiftnsu.v2i32")] + fn vqrshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } +vqrshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating rounded shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqrshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqrshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqrshrun.v2i32")] + fn vqrshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + } +vqrshrun_n_s64_(a, N) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i8")] + fn vqshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vqshl_s8_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v16i8")] + fn vqshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vqshlq_s8_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i16")] + fn vqshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vqshl_s16_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v8i16")] + fn vqshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vqshlq_s16_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i32")] + fn vqshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vqshl_s32_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v4i32")] + fn vqshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vqshlq_s32_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v1i64")] + fn vqshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vqshl_s64_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshl.v2i64")] + fn vqshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vqshlq_s64_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i8")] + fn vqshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vqshl_u8_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v16i8")] + fn vqshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vqshlq_u8_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i16")] + fn vqshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vqshl_u16_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v8i16")] + fn vqshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vqshlq_u16_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i32")] + fn vqshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vqshl_u32_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v4i32")] + fn vqshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vqshlq_u32_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v1i64")] + fn vqshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vqshl_u64_(a, b) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshl.v2i64")] + fn vqshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vqshlq_u64_(a, b) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + vqshl_s8(a, vdup_n_s8(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + vqshlq_s8(a, vdupq_n_s8(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + vqshl_s16(a, vdup_n_s16(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + vqshlq_s16(a, vdupq_n_s16(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm5!(N); + vqshl_s32(a, vdup_n_s32(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm5!(N); + vqshlq_s32(a, vdupq_n_s32(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_imm6!(N); + vqshl_s64(a, vdup_n_s64(N as _)) +} + +/// Signed saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm6!(N); + vqshlq_s64(a, vdupq_n_s64(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + vqshl_u8(a, vdup_n_s8(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + vqshlq_u8(a, vdupq_n_s8(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + vqshl_u16(a, vdup_n_s16(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + vqshlq_u16(a, vdupq_n_s16(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + vqshl_u32(a, vdup_n_s32(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + vqshlq_u32(a, vdupq_n_s32(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + vqshl_u64(a, vdup_n_s64(N as _)) +} + +/// Unsigned saturating shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vqshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uqshl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + vqshlq_u64(a, vdupq_n_s64(N as _)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i8")] + fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } +vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshlu_n_s8(a: int8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i8")] + fn vqshlu_n_s8_(a: int8x8_t, n: int8x8_t) -> uint8x8_t; + } +vqshlu_n_s8_(a, int8x8_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i16")] + fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } +vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshlu_n_s16(a: int16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i16")] + fn vqshlu_n_s16_(a: int16x4_t, n: int16x4_t) -> uint16x4_t; + } +vqshlu_n_s16_(a, int16x4_t(N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i32")] + fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } +vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshlu_n_s32(a: int32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i32")] + fn vqshlu_n_s32_(a: int32x2_t, n: int32x2_t) -> uint32x2_t; + } +vqshlu_n_s32_(a, int32x2_t(N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v1i64")] + fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } +vqshlu_n_s64_(a, int64x1_t(N as i64)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshlu_n_s64(a: int64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v1i64")] + fn vqshlu_n_s64_(a: int64x1_t, n: int64x1_t) -> uint64x1_t; + } +vqshlu_n_s64_(a, int64x1_t(N as i64)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v16i8")] + fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } +vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshluq_n_s8(a: int8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v16i8")] + fn vqshluq_n_s8_(a: int8x16_t, n: int8x16_t) -> uint8x16_t; + } +vqshluq_n_s8_(a, int8x16_t(N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8, N as i8)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v8i16")] + fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } +vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshluq_n_s16(a: int16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v8i16")] + fn vqshluq_n_s16_(a: int16x8_t, n: int16x8_t) -> uint16x8_t; + } +vqshluq_n_s16_(a, int16x8_t(N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16, N as i16)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v4i32")] + fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } +vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshluq_n_s32(a: int32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v4i32")] + fn vqshluq_n_s32_(a: int32x4_t, n: int32x4_t) -> uint32x4_t; + } +vqshluq_n_s32_(a, int32x4_t(N as i32, N as i32, N as i32, N as i32)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftsu.v2i64")] + fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } +vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64)) +} + +/// Signed saturating shift left unsigned +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshlu, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshluq_n_s64(a: int64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshlu.v2i64")] + fn vqshluq_n_s64_(a: int64x2_t, n: int64x2_t) -> uint64x2_t; + } +vqshluq_n_s64_(a, int64x2_t(N as i64, N as i64)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v8i8")] + fn vqshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vqshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v8i8")] + fn vqshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vqshrn_n_s16_(a, N) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v4i16")] + fn vqshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vqshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v4i16")] + fn vqshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vqshrn_n_s32_(a, N) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftns.v2i32")] + fn vqshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vqshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrn.v2i32")] + fn vqshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vqshrn_n_s64_(a, N) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v8i8")] + fn vqshrn_n_u16_(a: uint16x8_t, n: uint16x8_t) -> uint8x8_t; + } +vqshrn_n_u16_(a, uint16x8_t(-N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16, -N as u16)) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v8i8")] + fn vqshrn_n_u16_(a: uint16x8_t, n: i32) -> uint8x8_t; + } +vqshrn_n_u16_(a, N) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v4i16")] + fn vqshrn_n_u32_(a: uint32x4_t, n: uint32x4_t) -> uint16x4_t; + } +vqshrn_n_u32_(a, uint32x4_t(-N as u32, -N as u32, -N as u32, -N as u32)) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v4i16")] + fn vqshrn_n_u32_(a: uint32x4_t, n: i32) -> uint16x4_t; + } +vqshrn_n_u32_(a, N) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnu.v2i32")] + fn vqshrn_n_u64_(a: uint64x2_t, n: uint64x2_t) -> uint32x2_t; + } +vqshrn_n_u64_(a, uint64x2_t(-N as u64, -N as u64)) +} + +/// Unsigned saturating shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(uqshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uqshrn.v2i32")] + fn vqshrn_n_u64_(a: uint64x2_t, n: i32) -> uint32x2_t; + } +vqshrn_n_u64_(a, N) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v8i8")] + fn vqshrun_n_s16_(a: int16x8_t, n: int16x8_t) -> uint8x8_t; + } +vqshrun_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrun_n_s16(a: int16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v8i8")] + fn vqshrun_n_s16_(a: int16x8_t, n: i32) -> uint8x8_t; + } +vqshrun_n_s16_(a, N) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v4i16")] + fn vqshrun_n_s32_(a: int32x4_t, n: int32x4_t) -> uint16x4_t; + } +vqshrun_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrun_n_s32(a: int32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v4i16")] + fn vqshrun_n_s32_(a: int32x4_t, n: i32) -> uint16x4_t; + } +vqshrun_n_s32_(a, N) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqshiftnsu.v2i32")] + fn vqshrun_n_s64_(a: int64x2_t, n: int64x2_t) -> uint32x2_t; + } +vqshrun_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Signed saturating shift right unsigned narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(sqshrun, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vqshrun_n_s64(a: int64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqshrun.v2i32")] + fn vqshrun_n_s64_(a: int64x2_t, n: i32) -> uint32x2_t; + } +vqshrun_n_s64_(a, N) +} + +/// Reciprocal square-root estimate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsqrte_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v2f32")] + fn vrsqrte_f32_(a: float32x2_t) -> float32x2_t; + } +vrsqrte_f32_(a) +} + +/// Reciprocal square-root estimate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrte))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsqrteq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrte.v4f32")] + fn vrsqrteq_f32_(a: float32x4_t) -> float32x4_t; + } +vrsqrteq_f32_(a) +} + +/// Unsigned reciprocal square root estimate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursqrte))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsqrte_u32(a: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ursqrte.v2i32")] + fn vrsqrte_u32_(a: uint32x2_t) -> uint32x2_t; + } +vrsqrte_u32_(a) +} + +/// Unsigned reciprocal square root estimate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrte))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursqrte))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsqrteq_u32(a: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrte.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ursqrte.v4i32")] + fn vrsqrteq_u32_(a: uint32x4_t) -> uint32x4_t; + } +vrsqrteq_u32_(a) +} + +/// Floating-point reciprocal square root step +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrts))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsqrts_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v2f32")] + fn vrsqrts_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vrsqrts_f32_(a, b) +} + +/// Floating-point reciprocal square root step +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsqrts))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frsqrts))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsqrtsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsqrts.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frsqrts.v4f32")] + fn vrsqrtsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vrsqrtsq_f32_(a, b) +} + +/// Reciprocal estimate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrecpe_f32(a: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v2f32")] + fn vrecpe_f32_(a: float32x2_t) -> float32x2_t; + } +vrecpe_f32_(a) +} + +/// Reciprocal estimate. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecpe))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrecpeq_f32(a: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecpe.v4f32")] + fn vrecpeq_f32_(a: float32x4_t) -> float32x4_t; + } +vrecpeq_f32_(a) +} + +/// Unsigned reciprocal estimate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urecpe))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrecpe_u32(a: uint32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urecpe.v2i32")] + fn vrecpe_u32_(a: uint32x2_t) -> uint32x2_t; + } +vrecpe_u32_(a) +} + +/// Unsigned reciprocal estimate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecpe))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urecpe))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrecpeq_u32(a: uint32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecpe.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urecpe.v4i32")] + fn vrecpeq_u32_(a: uint32x4_t) -> uint32x4_t; + } +vrecpeq_u32_(a) +} + +/// Floating-point reciprocal step +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecps))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrecps_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v2f32")] + fn vrecps_f32_(a: float32x2_t, b: float32x2_t) -> float32x2_t; + } +vrecps_f32_(a, b) +} + +/// Floating-point reciprocal step +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrecps))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(frecps))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrecpsq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrecps.v4f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.frecps.v4f32")] + fn vrecpsq_f32_(a: float32x4_t, b: float32x4_t) -> float32x4_t; + } +vrecpsq_f32_(a, b) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_u8(a: uint8x8_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_p8(a: poly8x8_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_p16(a: poly16x4_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_u16(a: uint16x4_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_u32(a: uint32x2_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_u64(a: uint64x1_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_u8(a: uint8x16_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_p8(a: poly8x16_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_p16(a: poly16x8_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_u16(a: uint16x8_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_u32(a: uint32x4_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_u64(a: uint64x2_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_p8(a: poly8x8_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_s8(a: int8x8_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_p16(a: poly16x4_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_s16(a: int16x4_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_s32(a: int32x2_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_s64(a: int64x1_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_p8(a: poly8x16_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_s8(a: int8x16_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_p16(a: poly16x8_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_s16(a: int16x8_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_s32(a: int32x4_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_s64(a: int64x2_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_s8(a: int8x8_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_u8(a: uint8x8_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_s16(a: int16x4_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_u16(a: uint16x4_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_s8(a: int8x16_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_u8(a: uint8x16_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_s16(a: int16x8_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_u16(a: uint16x8_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_s16(a: int16x4_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_u16(a: uint16x4_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_p16(a: poly16x4_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_s32(a: int32x2_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_u32(a: uint32x2_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_s64(a: int64x1_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_u64(a: uint64x1_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_s16(a: int16x8_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_u16(a: uint16x8_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_p16(a: poly16x8_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_s32(a: int32x4_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_u32(a: uint32x4_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_s64(a: int64x2_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_u64(a: uint64x2_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_p16(a: poly16x4_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_s16(a: int16x4_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_u16(a: uint16x4_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_s32(a: int32x2_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_u32(a: uint32x2_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_s64(a: int64x1_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_u64(a: uint64x1_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_p16(a: poly16x8_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_s16(a: int16x8_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_u16(a: uint16x8_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_s32(a: int32x4_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_u32(a: uint32x4_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_s64(a: int64x2_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_u64(a: uint64x2_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_p16(a: poly16x4_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_s16(a: int16x4_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_u16(a: uint16x4_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_s32(a: int32x2_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_u32(a: uint32x2_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_p16(a: poly16x8_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_s16(a: int16x8_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_u16(a: uint16x8_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_s32(a: int32x4_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_u32(a: uint32x4_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_p64(a: poly64x1_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_p64(a: poly64x1_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_p64(a: poly64x2_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_p64(a: poly64x2_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_p128(a: p128) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_p128(a: p128) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_p128(a: p128) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_p8(a: poly8x8_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_s8(a: int8x8_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_u8(a: uint8x8_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_p16(a: poly16x4_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_s16(a: int16x4_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_u16(a: uint16x4_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_s32(a: int32x2_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_u32(a: uint32x2_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_p8(a: poly8x16_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_s8(a: int8x16_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_u8(a: uint8x16_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_p16(a: poly16x8_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_s16(a: int16x8_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_u16(a: uint16x8_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_s32(a: int32x4_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_u32(a: uint32x4_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_p8(a: poly8x8_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_s8(a: int8x8_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_u8(a: uint8x8_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_p16(a: poly16x4_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_s16(a: int16x4_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_u16(a: uint16x4_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_s32(a: int32x2_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_u32(a: uint32x2_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_p8(a: poly8x16_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_s8(a: int8x16_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_u8(a: uint8x16_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_p16(a: poly16x8_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_s16(a: int16x8_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_u16(a: uint16x8_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_s32(a: int32x4_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_u32(a: uint32x4_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_p8(a: poly8x8_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_s8(a: int8x8_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_u8(a: uint8x8_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_p8(a: poly8x16_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_s8(a: int8x16_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_u8(a: uint8x16_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_s32(a: int32x2_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_u32(a: uint32x2_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_s32(a: int32x4_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_u32(a: uint32x4_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_s64(a: int64x2_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_u64(a: uint64x2_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_p64(a: poly64x2_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_s32(a: int32x2_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_u32(a: uint32x2_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_s64(a: int64x1_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_u64(a: uint64x1_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_s32(a: int32x4_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_u32(a: uint32x4_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_s64(a: int64x2_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_u64(a: uint64x2_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_s32(a: int32x2_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_u32(a: uint32x2_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_s64(a: int64x1_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_u64(a: uint64x1_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_s32(a: int32x4_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_u32(a: uint32x4_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_s64(a: int64x2_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_u64(a: uint64x2_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_s32(a: int32x2_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_u32(a: uint32x2_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_s64(a: int64x1_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_u64(a: uint64x1_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_s32(a: int32x4_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_u32(a: uint32x4_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_s64(a: int64x2_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_u64(a: uint64x2_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_p64(a: poly64x1_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_p64(a: poly64x1_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_p64(a: poly64x1_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_p64(a: poly64x2_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_p64(a: poly64x2_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_p64(a: poly64x2_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_p128(a: p128) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_p128(a: p128) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_p8(a: poly8x8_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_s8(a: int8x8_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_u8(a: uint8x8_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_p16(a: poly16x4_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_s16(a: int16x4_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_u16(a: uint16x4_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_p8(a: poly8x16_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_s8(a: int8x16_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_u8(a: uint8x16_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_p16(a: poly16x8_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_s16(a: int16x8_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_u16(a: uint16x8_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_p8(a: poly8x8_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_s8(a: int8x8_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_u8(a: uint8x8_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_p16(a: poly16x4_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_s16(a: int16x4_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_u16(a: uint16x4_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_p8(a: poly8x16_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_s8(a: int8x16_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_u8(a: uint8x16_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_p16(a: poly16x8_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_s16(a: int16x8_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_u16(a: uint16x8_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_p16(a: poly16x4_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_s16(a: int16x4_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_u16(a: uint16x4_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_p16(a: poly16x8_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_s16(a: int16x8_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_u16(a: uint16x8_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_s32(a: int32x4_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_u32(a: uint32x4_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_s64(a: int64x1_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_u64(a: uint64x1_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_s64(a: int64x1_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_u64(a: uint64x1_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_s64(a: int64x1_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_u64(a: uint64x1_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_s64(a: int64x2_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_u64(a: uint64x2_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_s64(a: int64x2_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_u64(a: uint64x2_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_s64(a: int64x2_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_u64(a: uint64x2_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_p64(a: poly64x1_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_p64(a: poly64x1_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_p64(a: poly64x1_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_p64(a: poly64x2_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_p64(a: poly64x2_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_p64(a: poly64x2_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_p128(a: p128) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_p128(a: p128) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_p128(a: p128) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_p8(a: poly8x8_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_s8(a: int8x8_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_u8(a: uint8x8_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_p8(a: poly8x8_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_s8(a: int8x8_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_u8(a: uint8x8_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_p8(a: poly8x16_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_s8(a: int8x16_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_u8(a: uint8x16_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_p8(a: poly8x16_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_s8(a: int8x16_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_u8(a: uint8x16_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_p8(a: poly8x8_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_s8(a: int8x8_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p64_u8(a: uint8x8_t) -> poly64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_p8(a: poly8x16_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_s8(a: int8x16_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p64_u8(a: uint8x16_t) -> poly64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_s16(a: int16x8_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_u16(a: uint16x8_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_p16(a: poly16x8_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_s8(a: int8x16_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_u8(a: uint8x16_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_p8(a: poly8x16_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_p128(a: p128) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_p128(a: p128) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_p128(a: p128) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s8_f32(a: float32x2_t) -> int8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s16_f32(a: float32x2_t) -> int16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s32_f32(a: float32x2_t) -> int32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_s64_f32(a: float32x2_t) -> int64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s8_f32(a: float32x4_t) -> int8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s16_f32(a: float32x4_t) -> int16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s32_f32(a: float32x4_t) -> int32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_s64_f32(a: float32x4_t) -> int64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u8_f32(a: float32x2_t) -> uint8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u16_f32(a: float32x2_t) -> uint16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u32_f32(a: float32x2_t) -> uint32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_u64_f32(a: float32x2_t) -> uint64x1_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u8_f32(a: float32x4_t) -> uint8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u16_f32(a: float32x4_t) -> uint16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u32_f32(a: float32x4_t) -> uint32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_u64_f32(a: float32x4_t) -> uint64x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p8_f32(a: float32x2_t) -> poly8x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_p16_f32(a: float32x2_t) -> poly16x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p8_f32(a: float32x4_t) -> poly8x16_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p16_f32(a: float32x4_t) -> poly16x8_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_p128_f32(a: float32x4_t) -> p128 { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_s8(a: int8x8_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_s16(a: int16x4_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_s32(a: int32x2_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_s64(a: int64x1_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_s8(a: int8x16_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_s16(a: int16x8_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_s32(a: int32x4_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_s64(a: int64x2_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_u8(a: uint8x8_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_u16(a: uint16x4_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_u32(a: uint32x2_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_u64(a: uint64x1_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_u8(a: uint8x16_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_u16(a: uint16x8_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_u32(a: uint32x4_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_u64(a: uint64x2_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_p8(a: poly8x8_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpret_f32_p16(a: poly16x4_t) -> float32x2_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_p8(a: poly8x16_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_p16(a: poly16x8_t) -> float32x4_t { + transmute(a) +} + +/// Vector reinterpret cast operation +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vreinterpretq_f32_p128(a: p128) -> float32x4_t { + transmute(a) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i8")] + fn vrshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vrshl_s8_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v16i8")] + fn vrshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vrshlq_s8_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i16")] + fn vrshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vrshl_s16_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v8i16")] + fn vrshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vrshlq_s16_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i32")] + fn vrshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vrshl_s32_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v4i32")] + fn vrshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vrshlq_s32_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v1i64")] + fn vrshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vrshl_s64_(a, b) +} + +/// Signed rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.srshl.v2i64")] + fn vrshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vrshlq_s64_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i8")] + fn vrshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vrshl_u8_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v16i8")] + fn vrshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vrshlq_u8_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i16")] + fn vrshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vrshl_u16_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v8i16")] + fn vrshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vrshlq_u16_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i32")] + fn vrshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vrshl_u32_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v4i32")] + fn vrshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vrshlq_u32_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v1i64")] + fn vrshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vrshl_u64_(a, b) +} + +/// Unsigned rounding shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.urshl.v2i64")] + fn vrshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vrshlq_u64_(a, b) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshl_s8(a, vdup_n_s8((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshlq_s8(a, vdupq_n_s8((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshl_s16(a, vdup_n_s16((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshlq_s16(a, vdupq_n_s16((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshl_s32(a, vdup_n_s32((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshlq_s32(a, vdupq_n_s32((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshl_s64(a, vdup_n_s64((-N) as _)) +} + +/// Signed rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshlq_s64(a, vdupq_n_s64((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshl_u8(a, vdup_n_s8((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + vrshlq_u8(a, vdupq_n_s8((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshl_u16(a, vdup_n_s16((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + vrshlq_u16(a, vdupq_n_s16((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshl_u32(a, vdup_n_s32((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + vrshlq_u32(a, vdupq_n_s32((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshl_u64(a, vdup_n_s64((-N) as _)) +} + +/// Unsigned rounding shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshr, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(urshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + vrshlq_u64(a, vdupq_n_s64((-N) as _)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v8i8")] + fn vrshrn_n_s16_(a: int16x8_t, n: int16x8_t) -> int8x8_t; + } +vrshrn_n_s16_(a, int16x8_t(-N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16, -N as i16)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vrshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v8i8")] + fn vrshrn_n_s16_(a: int16x8_t, n: i32) -> int8x8_t; + } +vrshrn_n_s16_(a, N) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v4i16")] + fn vrshrn_n_s32_(a: int32x4_t, n: int32x4_t) -> int16x4_t; + } +vrshrn_n_s32_(a, int32x4_t(-N as i32, -N as i32, -N as i32, -N as i32)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vrshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v4i16")] + fn vrshrn_n_s32_(a: int32x4_t, n: i32) -> int16x4_t; + } +vrshrn_n_s32_(a, N) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "arm")] +#[target_feature(enable = "neon,v7")] +#[cfg_attr(test, assert_instr(vrshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrshiftn.v2i32")] + fn vrshrn_n_s64_(a: int64x2_t, n: int64x2_t) -> int32x2_t; + } +vrshrn_n_s64_(a, int64x2_t(-N as i64, -N as i64)) +} + +/// Rounding shift right narrow +#[inline] +#[cfg(target_arch = "aarch64")] +#[target_feature(enable = "neon")] +#[cfg_attr(test, assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "neon_intrinsics", since = "1.59.0")] +pub unsafe fn vrshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rshrn.v2i32")] + fn vrshrn_n_s64_(a: int64x2_t, n: i32) -> int32x2_t; + } +vrshrn_n_s64_(a, N) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + transmute(vrshrn_n_s16::(transmute(a))) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + transmute(vrshrn_n_s32::(transmute(a))) +} + +/// Rounding shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrshrn, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rshrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + transmute(vrshrn_n_s64::(transmute(a))) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshr_n_s8::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshrq_n_s8::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshr_n_s16::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshrq_n_s16::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshr_n_s32::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshrq_n_s32::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshr_n_s64::(b)) +} + +/// Signed rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(srsra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshrq_n_s64::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshr_n_u8::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vrshrq_n_u8::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshr_n_u16::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vrshrq_n_u16::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshr_n_u32::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vrshrq_n_u32::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshr_n_u64::(b)) +} + +/// Unsigned rounding shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ursra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vrshrq_n_u64::(b)) +} + +/// Rounding subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsubhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v8i8")] + fn vrsubhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t; + } +vrsubhn_s16_(a, b) +} + +/// Rounding subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsubhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v4i16")] + fn vrsubhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t; + } +vrsubhn_s32_(a, b) +} + +/// Rounding subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsubhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vrsubhn.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.rsubhn.v2i32")] + fn vrsubhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t; + } +vrsubhn_s64_(a, b) +} + +/// Rounding subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsubhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + transmute(vrsubhn_s16(transmute(a), transmute(b))) +} + +/// Rounding subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsubhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + transmute(vrsubhn_s32(transmute(a), transmute(b))) +} + +/// Rounding subtract returning high narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vrsubhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rsubhn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vrsubhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + transmute(vrsubhn_s64(transmute(a), transmute(b))) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_s8(a: i8, b: int8x8_t) -> int8x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_s16(a: i16, b: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_s32(a: i32, b: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_s64(a: i64, b: int64x1_t) -> int64x1_t { + static_assert!(LANE : i32 where LANE == 0); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_u8(a: u8, b: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_u16(a: u16, b: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_u32(a: u32, b: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_u64(a: u64, b: uint64x1_t) -> uint64x1_t { + static_assert!(LANE : i32 where LANE == 0); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_p8(a: p8, b: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_p16(a: p16, b: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_p64(a: p64, b: poly64x1_t) -> poly64x1_t { + static_assert!(LANE : i32 where LANE == 0); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_s8(a: i8, b: int8x16_t) -> int8x16_t { + static_assert_imm4!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_s16(a: i16, b: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_s32(a: i32, b: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_s64(a: i64, b: int64x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_u8(a: u8, b: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_u16(a: u16, b: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_u32(a: u32, b: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_u64(a: u64, b: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_p8(a: p8, b: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_p16(a: p16, b: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "aes,v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_p64(a: p64, b: poly64x2_t) -> poly64x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vset_lane_f32(a: f32, b: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Insert vector element from another vector element +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop, LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop, LANE = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsetq_lane_f32(a: f32, b: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + simd_insert(b, LANE as u32, a) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i8")] + fn vshl_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + } +vshl_s8_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v16i8")] + fn vshlq_s8_(a: int8x16_t, b: int8x16_t) -> int8x16_t; + } +vshlq_s8_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i16")] + fn vshl_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + } +vshl_s16_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v8i16")] + fn vshlq_s16_(a: int16x8_t, b: int16x8_t) -> int16x8_t; + } +vshlq_s16_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i32")] + fn vshl_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + } +vshl_s32_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v4i32")] + fn vshlq_s32_(a: int32x4_t, b: int32x4_t) -> int32x4_t; + } +vshlq_s32_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v1i64")] + fn vshl_s64_(a: int64x1_t, b: int64x1_t) -> int64x1_t; + } +vshl_s64_(a, b) +} + +/// Signed Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshifts.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sshl.v2i64")] + fn vshlq_s64_(a: int64x2_t, b: int64x2_t) -> int64x2_t; + } +vshlq_s64_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_u8(a: uint8x8_t, b: int8x8_t) -> uint8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i8")] + fn vshl_u8_(a: uint8x8_t, b: int8x8_t) -> uint8x8_t; + } +vshl_u8_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_u8(a: uint8x16_t, b: int8x16_t) -> uint8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v16i8")] + fn vshlq_u8_(a: uint8x16_t, b: int8x16_t) -> uint8x16_t; + } +vshlq_u8_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_u16(a: uint16x4_t, b: int16x4_t) -> uint16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i16")] + fn vshl_u16_(a: uint16x4_t, b: int16x4_t) -> uint16x4_t; + } +vshl_u16_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_u16(a: uint16x8_t, b: int16x8_t) -> uint16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v8i16")] + fn vshlq_u16_(a: uint16x8_t, b: int16x8_t) -> uint16x8_t; + } +vshlq_u16_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_u32(a: uint32x2_t, b: int32x2_t) -> uint32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i32")] + fn vshl_u32_(a: uint32x2_t, b: int32x2_t) -> uint32x2_t; + } +vshl_u32_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_u32(a: uint32x4_t, b: int32x4_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v4i32")] + fn vshlq_u32_(a: uint32x4_t, b: int32x4_t) -> uint32x4_t; + } +vshlq_u32_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_u64(a: uint64x1_t, b: int64x1_t) -> uint64x1_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v1i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v1i64")] + fn vshl_u64_(a: uint64x1_t, b: int64x1_t) -> uint64x1_t; + } +vshl_u64_(a, b) +} + +/// Unsigned Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushl))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_u64(a: uint64x2_t, b: int64x2_t) -> uint64x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vshiftu.v2i64")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.ushl.v2i64")] + fn vshlq_u64_(a: uint64x2_t, b: int64x2_t) -> uint64x2_t; + } +vshlq_u64_(a, b) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_s8(a: int8x8_t) -> int8x8_t { + static_assert_imm3!(N); + simd_shl(a, vdup_n_s8(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert_imm3!(N); + simd_shl(a, vdupq_n_s8(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_s16(a: int16x4_t) -> int16x4_t { + static_assert_imm4!(N); + simd_shl(a, vdup_n_s16(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert_imm4!(N); + simd_shl(a, vdupq_n_s16(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_s32(a: int32x2_t) -> int32x2_t { + static_assert_imm5!(N); + simd_shl(a, vdup_n_s32(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert_imm5!(N); + simd_shl(a, vdupq_n_s32(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(N); + simd_shl(a, vdup_n_u8(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert_imm3!(N); + simd_shl(a, vdupq_n_u8(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert_imm4!(N); + simd_shl(a, vdup_n_u16(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert_imm4!(N); + simd_shl(a, vdupq_n_u16(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert_imm5!(N); + simd_shl(a, vdup_n_u32(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert_imm5!(N); + simd_shl(a, vdupq_n_u32(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_s64(a: int64x1_t) -> int64x1_t { + static_assert_imm6!(N); + simd_shl(a, vdup_n_s64(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert_imm6!(N); + simd_shl(a, vdupq_n_s64(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshl_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert_imm6!(N); + simd_shl(a, vdup_n_u64(N as _)) +} + +/// Shift left +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vshl, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shl, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshlq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert_imm6!(N); + simd_shl(a, vdupq_n_u64(N as _)) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshll_n_s8(a: int8x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + simd_shl(simd_cast(a), vdupq_n_s16(N as _)) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshll_n_s16(a: int16x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + simd_shl(simd_cast(a), vdupq_n_s32(N as _)) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshll, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshll_n_s32(a: int32x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + simd_shl(simd_cast(a), vdupq_n_s64(N as _)) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshll_n_u8(a: uint8x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 0 && N <= 8); + simd_shl(simd_cast(a), vdupq_n_u16(N as _)) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshll_n_u16(a: uint16x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 0 && N <= 16); + simd_shl(simd_cast(a), vdupq_n_u32(N as _)) +} + +/// Signed shift left long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshll.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushll, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshll_n_u32(a: uint32x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 0 && N <= 32); + simd_shl(simd_cast(a), vdupq_n_u64(N as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_s8(a: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + simd_shr(a, vdup_n_s8(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_s8(a: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + let n: i32 = if N == 8 { 7 } else { N }; + simd_shr(a, vdupq_n_s8(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_s16(a: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + simd_shr(a, vdup_n_s16(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_s16(a: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + let n: i32 = if N == 16 { 15 } else { N }; + simd_shr(a, vdupq_n_s16(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_s32(a: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + simd_shr(a, vdup_n_s32(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_s32(a: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + let n: i32 = if N == 32 { 31 } else { N }; + simd_shr(a, vdupq_n_s32(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_s64(a: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + simd_shr(a, vdup_n_s64(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.s64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sshr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_s64(a: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + let n: i32 = if N == 64 { 63 } else { N }; + simd_shr(a, vdupq_n_s64(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_u8(a: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + let n: i32 = if N == 8 { return vdup_n_u8(0); } else { N }; + simd_shr(a, vdup_n_u8(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u8", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_u8(a: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + let n: i32 = if N == 8 { return vdupq_n_u8(0); } else { N }; + simd_shr(a, vdupq_n_u8(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_u16(a: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + let n: i32 = if N == 16 { return vdup_n_u16(0); } else { N }; + simd_shr(a, vdup_n_u16(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_u16(a: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + let n: i32 = if N == 16 { return vdupq_n_u16(0); } else { N }; + simd_shr(a, vdupq_n_u16(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_u32(a: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + let n: i32 = if N == 32 { return vdup_n_u32(0); } else { N }; + simd_shr(a, vdup_n_u32(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_u32(a: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + let n: i32 = if N == 32 { return vdupq_n_u32(0); } else { N }; + simd_shr(a, vdupq_n_u32(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshr_n_u64(a: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + let n: i32 = if N == 64 { return vdup_n_u64(0); } else { N }; + simd_shr(a, vdup_n_u64(n as _)) +} + +/// Shift right +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshr.u64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ushr, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrq_n_u64(a: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + let n: i32 = if N == 64 { return vdupq_n_u64(0); } else { N }; + simd_shr(a, vdupq_n_u64(n as _)) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrn_n_s16(a: int16x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_cast(simd_shr(a, vdupq_n_s16(N as _))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrn_n_s32(a: int32x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_cast(simd_shr(a, vdupq_n_s32(N as _))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrn_n_s64(a: int64x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_cast(simd_shr(a, vdupq_n_s64(N as _))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i16", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrn_n_u16(a: uint16x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_cast(simd_shr(a, vdupq_n_u16(N as _))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i32", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrn_n_u32(a: uint32x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_cast(simd_shr(a, vdupq_n_u32(N as _))) +} + +/// Shift right narrow +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vshrn.i64", N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(shrn, N = 2))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vshrn_n_u64(a: uint64x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_cast(simd_shr(a, vdupq_n_u64(N as _))) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshr_n_s8::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshrq_n_s8::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshr_n_s16::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshrq_n_s16::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshr_n_s32::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshrq_n_s32::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshr_n_s64::(b)) +} + +/// Signed shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ssra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshrq_n_s64::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshr_n_u8::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + static_assert!(N : i32 where N >= 1 && N <= 8); + simd_add(a, vshrq_n_u8::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshr_n_u16::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + static_assert!(N : i32 where N >= 1 && N <= 16); + simd_add(a, vshrq_n_u16::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshr_n_u32::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + static_assert!(N : i32 where N >= 1 && N <= 32); + simd_add(a, vshrq_n_u32::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsra_n_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshr_n_u64::(b)) +} + +/// Unsigned shift right and accumulate +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vsra, N = 2))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usra, N = 2))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vsraq_n_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + static_assert!(N : i32 where N >= 1 && N <= 64); + simd_add(a, vshrq_n_u64::(b)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { + let a1: int8x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int8x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + let a1: int16x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]); + let b1: int16x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { + let a1: int8x16_t = simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]); + let b1: int8x16_t = simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + let a1: int16x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: int16x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + let a1: int32x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]); + let b1: int32x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + let a1: uint8x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint8x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + let a1: uint16x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]); + let b1: uint16x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + let a1: uint8x16_t = simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]); + let b1: uint8x16_t = simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { + let a1: uint16x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: uint16x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + let a1: uint32x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]); + let b1: uint32x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + let a1: poly8x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly8x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + let a1: poly16x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]); + let b1: poly16x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + let a1: poly8x16_t = simd_shuffle16!(a, b, [0, 16, 2, 18, 4, 20, 6, 22, 8, 24, 10, 26, 12, 28, 14, 30]); + let b1: poly8x16_t = simd_shuffle16!(a, b, [1, 17, 3, 19, 5, 21, 7, 23, 9, 25, 11, 27, 13, 29, 15, 31]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + let a1: poly16x8_t = simd_shuffle8!(a, b, [0, 8, 2, 10, 4, 12, 6, 14]); + let b1: poly16x8_t = simd_shuffle8!(a, b, [1, 9, 3, 11, 5, 13, 7, 15]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + let a1: int32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b1: int32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + let a1: uint32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b1: uint32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrn_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + let a1: float32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b1: float32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a1, b1)) +} + +/// Transpose elements +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(trn))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vtrnq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + let a1: float32x4_t = simd_shuffle4!(a, b, [0, 4, 2, 6]); + let b1: float32x4_t = simd_shuffle4!(a, b, [1, 5, 3, 7]); + transmute((a1, b1)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { + let a0: int8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + let a0: int16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]); + let b0: int16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]); + let b0: uint16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly8x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vzip))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]); + let b0: poly16x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + let a0: int32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + let a0: uint32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { + let a0: int8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]); + let b0: int8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + let a0: int16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: int16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + let a0: int32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]); + let b0: int32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]); + let b0: uint8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { + let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: uint16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]); + let b0: uint32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23]); + let b0: poly8x16_t = simd_shuffle16!(a, b, [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 8, 1, 9, 2, 10, 3, 11]); + let b0: poly16x8_t = simd_shuffle8!(a, b, [4, 12, 5, 13, 6, 14, 7, 15]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzip_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + let a0: float32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a0, b0)) +} + +/// Zip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorr))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vzipq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + let a0: float32x4_t = simd_shuffle4!(a, b, [0, 4, 1, 5]); + let b0: float32x4_t = simd_shuffle4!(a, b, [2, 6, 3, 7]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_s8(a: int8x8_t, b: int8x8_t) -> int8x8x2_t { + let a0: int8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int8x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_s16(a: int16x4_t, b: int16x4_t) -> int16x4x2_t { + let a0: int16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]); + let b0: int16x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_s8(a: int8x16_t, b: int8x16_t) -> int8x16x2_t { + let a0: int8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]); + let b0: int8x16_t = simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_s16(a: int16x8_t, b: int16x8_t) -> int16x8x2_t { + let a0: int16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: int16x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_s32(a: int32x4_t, b: int32x4_t) -> int32x4x2_t { + let a0: int32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]); + let b0: int32x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8x2_t { + let a0: uint8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint8x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4x2_t { + let a0: uint16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]); + let b0: uint16x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16x2_t { + let a0: uint8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]); + let b0: uint8x16_t = simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8x2_t { + let a0: uint16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: uint16x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4x2_t { + let a0: uint32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]); + let b0: uint32x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_p8(a: poly8x8_t, b: poly8x8_t) -> poly8x8x2_t { + let a0: poly8x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly8x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_p16(a: poly16x4_t, b: poly16x4_t) -> poly16x4x2_t { + let a0: poly16x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]); + let b0: poly16x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_p8(a: poly8x16_t, b: poly8x16_t) -> poly8x16x2_t { + let a0: poly8x16_t = simd_shuffle16!(a, b, [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]); + let b0: poly8x16_t = simd_shuffle16!(a, b, [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_p16(a: poly16x8_t, b: poly16x8_t) -> poly16x8x2_t { + let a0: poly16x8_t = simd_shuffle8!(a, b, [0, 2, 4, 6, 8, 10, 12, 14]); + let b0: poly16x8_t = simd_shuffle8!(a, b, [1, 3, 5, 7, 9, 11, 13, 15]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_s32(a: int32x2_t, b: int32x2_t) -> int32x2x2_t { + let a0: int32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b0: int32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2x2_t { + let a0: uint32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b0: uint32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vtrn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(zip))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzp_f32(a: float32x2_t, b: float32x2_t) -> float32x2x2_t { + let a0: float32x2_t = simd_shuffle2!(a, b, [0, 2]); + let b0: float32x2_t = simd_shuffle2!(a, b, [1, 3]); + transmute((a0, b0)) +} + +/// Unzip vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vuzp))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uzp))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vuzpq_f32(a: float32x4_t, b: float32x4_t) -> float32x4x2_t { + let a0: float32x4_t = simd_shuffle4!(a, b, [0, 2, 4, 6]); + let b0: float32x4_t = simd_shuffle4!(a, b, [1, 3, 5, 7]); + transmute((a0, b0)) +} + +/// Unsigned Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabal_u8(a: uint16x8_t, b: uint8x8_t, c: uint8x8_t) -> uint16x8_t { + let d: uint8x8_t = vabd_u8(b, c); + simd_add(a, simd_cast(d)) +} + +/// Unsigned Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabal_u16(a: uint32x4_t, b: uint16x4_t, c: uint16x4_t) -> uint32x4_t { + let d: uint16x4_t = vabd_u16(b, c); + simd_add(a, simd_cast(d)) +} + +/// Unsigned Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uabal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabal_u32(a: uint64x2_t, b: uint32x2_t, c: uint32x2_t) -> uint64x2_t { + let d: uint32x2_t = vabd_u32(b, c); + simd_add(a, simd_cast(d)) +} + +/// Signed Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabal_s8(a: int16x8_t, b: int8x8_t, c: int8x8_t) -> int16x8_t { + let d: int8x8_t = vabd_s8(b, c); + let e: uint8x8_t = simd_cast(d); + simd_add(a, simd_cast(e)) +} + +/// Signed Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabal_s16(a: int32x4_t, b: int16x4_t, c: int16x4_t) -> int32x4_t { + let d: int16x4_t = vabd_s16(b, c); + let e: uint16x4_t = simd_cast(d); + simd_add(a, simd_cast(e)) +} + +/// Signed Absolute difference and Accumulate Long +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vabal.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sabal))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vabal_s32(a: int64x2_t, b: int32x2_t, c: int32x2_t) -> int64x2_t { + let d: int32x2_t = vabd_s32(b, c); + let e: uint32x2_t = simd_cast(d); + simd_add(a, simd_cast(e)) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqabs_s8(a: int8x8_t) -> int8x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i8")] + fn vqabs_s8_(a: int8x8_t) -> int8x8_t; + } +vqabs_s8_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqabsq_s8(a: int8x16_t) -> int8x16_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v16i8")] + fn vqabsq_s8_(a: int8x16_t) -> int8x16_t; + } +vqabsq_s8_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqabs_s16(a: int16x4_t) -> int16x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i16")] + fn vqabs_s16_(a: int16x4_t) -> int16x4_t; + } +vqabs_s16_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqabsq_s16(a: int16x8_t) -> int16x8_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v8i16")] + fn vqabsq_s16_(a: int16x8_t) -> int16x8_t; + } +vqabsq_s16_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqabs_s32(a: int32x2_t) -> int32x2_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v2i32")] + fn vqabs_s32_(a: int32x2_t) -> int32x2_t; + } +vqabs_s32_(a) +} + +/// Singned saturating Absolute value +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vqabs.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sqabs))] +#[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] +pub unsafe fn vqabsq_s32(a: int32x4_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vqabs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sqabs.v4i32")] + fn vqabsq_s32_(a: int32x4_t) -> int32x4_t; + } +vqabsq_s32_(a) +} + +#[cfg(test)] +#[allow(overflowing_literals)] +mod test { + use super::*; + use crate::core_arch::simd::*; + use std::mem::transmute; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i8x8 = transmute(vand_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: i8x16 = i8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i8x16 = transmute(vandq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let r: i16x4 = transmute(vand_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: i16x8 = transmute(vandq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x0F, 0x0F); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x00); + let r: i32x2 = transmute(vand_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let r: i32x4 = transmute(vandq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u8x8 = transmute(vand_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: u8x16 = u8x16::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x00); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u8x16 = transmute(vandq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let r: u16x4 = transmute(vand_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F, 0x0F); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let r: u16x8 = transmute(vandq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x0F, 0x0F); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x00); + let r: u32x2 = transmute(vand_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x0F, 0x0F, 0x0F, 0x0F); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let r: u32x4 = transmute(vandq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x0F); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vand_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x0F, 0x0F); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x00); + let r: i64x2 = transmute(vandq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x0F); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vand_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x0F, 0x0F); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x00); + let r: u64x2 = transmute(vandq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(vorr_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: i8x16 = transmute(vorrq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(vorr_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(vorrq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(vorr_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(vorrq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(vorr_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: u8x16 = transmute(vorrq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(vorr_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(vorrq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(vorr_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(vorrq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(vorr_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(vorrq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(vorr_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(vorrq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s8() { + let a: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i8x8 = transmute(veor_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s8() { + let a: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: i8x16 = i8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x16 = i8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: i8x16 = transmute(veorq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s16() { + let a: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(0x00, 0x00, 0x00, 0x00); + let e: i16x4 = i16x4::new(0x00, 0x01, 0x02, 0x03); + let r: i16x4 = transmute(veor_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s16() { + let a: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: i16x8 = transmute(veorq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s32() { + let a: i32x2 = i32x2::new(0x00, 0x01); + let b: i32x2 = i32x2::new(0x00, 0x00); + let e: i32x2 = i32x2::new(0x00, 0x01); + let r: i32x2 = transmute(veor_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s32() { + let a: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(0x00, 0x00, 0x00, 0x00); + let e: i32x4 = i32x4::new(0x00, 0x01, 0x02, 0x03); + let r: i32x4 = transmute(veorq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u8() { + let a: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x8 = u8x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u8x8 = transmute(veor_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u8() { + let a: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let b: u8x16 = u8x16::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u8x16 = u8x16::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F); + let r: u8x16 = transmute(veorq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u16() { + let a: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0x00, 0x00, 0x00, 0x00); + let e: u16x4 = u16x4::new(0x00, 0x01, 0x02, 0x03); + let r: u16x4 = transmute(veor_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u16() { + let a: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: u16x8 = u16x8::new(0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let r: u16x8 = transmute(veorq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u32() { + let a: u32x2 = u32x2::new(0x00, 0x01); + let b: u32x2 = u32x2::new(0x00, 0x00); + let e: u32x2 = u32x2::new(0x00, 0x01); + let r: u32x2 = transmute(veor_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u32() { + let a: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0x00, 0x00, 0x00, 0x00); + let e: u32x4 = u32x4::new(0x00, 0x01, 0x02, 0x03); + let r: u32x4 = transmute(veorq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s64() { + let a: i64x1 = i64x1::new(0x00); + let b: i64x1 = i64x1::new(0x00); + let e: i64x1 = i64x1::new(0x00); + let r: i64x1 = transmute(veor_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s64() { + let a: i64x2 = i64x2::new(0x00, 0x01); + let b: i64x2 = i64x2::new(0x00, 0x00); + let e: i64x2 = i64x2::new(0x00, 0x01); + let r: i64x2 = transmute(veorq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u64() { + let a: u64x1 = u64x1::new(0x00); + let b: u64x1 = u64x1::new(0x00); + let e: u64x1 = u64x1::new(0x00); + let r: u64x1 = transmute(veor_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u64() { + let a: u64x2 = u64x2::new(0x00, 0x01); + let b: u64x2 = u64x2::new(0x00, 0x00); + let e: u64x2 = u64x2::new(0x00, 0x01); + let r: u64x2 = transmute(veorq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i8x8 = i8x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: i8x8 = transmute(vabd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: i8x16 = i8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15); + let r: i8x16 = transmute(vabdq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(16, 15, 14, 13); + let e: i16x4 = i16x4::new(15, 13, 11, 9); + let r: i16x4 = transmute(vabd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i16x8 = i16x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: i16x8 = transmute(vabdq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(16, 15); + let e: i32x2 = i32x2::new(15, 13); + let r: i32x2 = transmute(vabd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(16, 15, 14, 13); + let e: i32x4 = i32x4::new(15, 13, 11, 9); + let r: i32x4 = transmute(vabdq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u8x8 = u8x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: u8x8 = transmute(vabd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: u8x16 = u8x16::new(15, 13, 11, 9, 7, 5, 3, 1, 1, 3, 5, 7, 9, 11, 13, 15); + let r: u8x16 = transmute(vabdq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(16, 15, 14, 13); + let e: u16x4 = u16x4::new(15, 13, 11, 9); + let r: u16x4 = transmute(vabd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u16x8 = u16x8::new(15, 13, 11, 9, 7, 5, 3, 1); + let r: u16x8 = transmute(vabdq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(16, 15); + let e: u32x2 = u32x2::new(15, 13); + let r: u32x2 = transmute(vabd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(16, 15, 14, 13); + let e: u32x4 = u32x4::new(15, 13, 11, 9); + let r: u32x4 = transmute(vabdq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabd_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(9.0, 3.0); + let e: f32x2 = f32x2::new(8.0, 1.0); + let r: f32x2 = transmute(vabd_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 5.0, -4.0); + let b: f32x4 = f32x4::new(9.0, 3.0, 2.0, 8.0); + let e: f32x4 = f32x4::new(8.0, 1.0, 3.0, 12.0); + let r: f32x4 = transmute(vabdq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdl_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 4, 3, 2, 1); + let b: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let e: u16x8 = u16x8::new(9, 8, 7, 6, 6, 7, 8, 9); + let r: u16x8 = transmute(vabdl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdl_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(10, 10, 10, 10); + let e: u32x4 = u32x4::new(9, 8, 7, 6); + let r: u32x4 = transmute(vabdl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdl_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(10, 10); + let e: u64x2 = u64x2::new(9, 8); + let r: u64x2 = transmute(vabdl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdl_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 4, 3, 2, 1); + let b: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let e: i16x8 = i16x8::new(9, 8, 7, 6, 6, 7, 8, 9); + let r: i16x8 = transmute(vabdl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdl_s16() { + let a: i16x4 = i16x4::new(1, 2, 11, 12); + let b: i16x4 = i16x4::new(10, 10, 10, 10); + let e: i32x4 = i32x4::new(9, 8, 1, 2); + let r: i32x4 = transmute(vabdl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabdl_s32() { + let a: i32x2 = i32x2::new(1, 11); + let b: i32x2 = i32x2::new(10, 10); + let e: i64x2 = i64x2::new(9, 1); + let r: i64x2 = transmute(vabdl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u8() { + let a: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x8 = u8x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u8x8 = u8x8::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u8() { + let a: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); + let b: u8x16 = u8x16::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0xFF); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u8x16 = u8x16::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0xFF); + let b: u8x16 = u8x16::new(0, 0xFF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, 0); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u16() { + let a: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); + let b: u16x4 = u16x4::new(0, 0x01, 0x02, 0x03); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x4 = u16x4::new(0, 0, 0x02, 0x03); + let b: u16x4 = u16x4::new(0, 0xFF_FF, 0x02, 0x04); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x4 = transmute(vceq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u16() { + let a: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u16x8 = u16x8::new(0, 0, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: u16x8 = u16x8::new(0, 0xFF_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vceqq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u32() { + let a: u32x2 = u32x2::new(0, 0x01); + let b: u32x2 = u32x2::new(0, 0x01); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x2 = u32x2::new(0, 0); + let b: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u32() { + let a: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); + let b: u32x4 = u32x4::new(0, 0x01, 0x02, 0x03); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: u32x4 = u32x4::new(0, 0, 0x02, 0x03); + let b: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x02, 0x04); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vceqq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s8() { + let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s8() { + let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s16() { + let a: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); + let b: i16x4 = i16x4::new(-32768, 0x01, 0x02, 0x03); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x4 = i16x4::new(-32768, -32768, 0x02, 0x03); + let b: i16x4 = i16x4::new(-32768, 0x7F_FF, 0x02, 0x04); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x4 = transmute(vceq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(-32768, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i16x8 = i16x8::new(-32768, -32768, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i16x8 = i16x8::new(-32768, 0x7F_FF, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vceqq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x01); + let b: i32x2 = i32x2::new(-2147483648, 0x01); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x2 = i32x2::new(-2147483648, -2147483648); + let b: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vceq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); + let b: i32x4 = i32x4::new(-2147483648, 0x01, 0x02, 0x03); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i32x4 = i32x4::new(-2147483648, -2147483648, 0x02, 0x03); + let b: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, 0x02, 0x04); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vceqq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_p8() { + let a: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x8 = i8x8::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07); + let b: i8x8 = i8x8::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x8 = transmute(vceq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_p8() { + let a: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + + let a: i8x16 = i8x16::new(-128, -128, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0xCC, 0x0D, 0xEE, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x7F, 0x02, 0x04, 0x04, 0x00, 0x06, 0x08, 0x08, 0x00, 0x0A, 0x0A, 0xCC, 0xD0, 0xEE, -128); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vceqq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f32() { + let a: f32x2 = f32x2::new(1.2, 3.4); + let b: f32x2 = f32x2::new(1.2, 3.4); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vceq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f32() { + let a: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + let b: f32x4 = f32x4::new(1.2, 3.4, 5.6, 7.8); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vceqq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_s8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_s8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_s16() { + let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vtst_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vtstq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x00); + let b: i32x2 = i32x2::new(-2147483648, 0x00); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vtst_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); + let b: i32x4 = i32x4::new(-2147483648, 0x00, 0x01, 0x02); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vtstq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_p8() { + let a: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i8x8 = i8x8::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_p8() { + let a: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let b: i8x16 = i8x16::new(-128, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x7F); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_p16() { + let a: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let b: i16x4 = i16x4::new(-32768, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vtst_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_p16() { + let a: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: i16x8 = i16x8::new(-32768, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vtstq_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_u8() { + let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: u8x8 = u8x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u8x8 = u8x8::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vtst_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_u8() { + let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); + let b: u8x16 = u8x16::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0xFF); + let e: u8x16 = u8x16::new(0, 0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vtstq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_u16() { + let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); + let b: u16x4 = u16x4::new(0, 0x00, 0x01, 0x02); + let e: u16x4 = u16x4::new(0, 0, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vtst_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_u16() { + let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let b: u16x8 = u16x8::new(0, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06); + let e: u16x8 = u16x8::new(0, 0, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vtstq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtst_u32() { + let a: u32x2 = u32x2::new(0, 0x00); + let b: u32x2 = u32x2::new(0, 0x00); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vtst_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtstq_u32() { + let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); + let b: u32x4 = u32x4::new(0, 0x00, 0x01, 0x02); + let e: u32x4 = u32x4::new(0, 0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vtstq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabs_f32() { + let a: f32x2 = f32x2::new(-0.1, -2.2); + let e: f32x2 = f32x2::new(0.1, 2.2); + let r: f32x2 = transmute(vabs_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabsq_f32() { + let a: f32x4 = f32x4::new(-0.1, -2.2, -3.3, -6.6); + let e: f32x4 = f32x4::new(0.1, 2.2, 3.3, 6.6); + let r: f32x4 = transmute(vabsq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcgt_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgtq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcgt_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgtq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcgt_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgtq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcgt_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgtq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f32() { + let a: f32x2 = f32x2::new(1.2, 2.3); + let b: f32x2 = f32x2::new(0.1, 1.2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcgt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f32() { + let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgtq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vclt_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcltq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vclt_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcltq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vclt_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcltq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vclt_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcltq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_f32() { + let a: f32x2 = f32x2::new(0.1, 1.2); + let b: f32x2 = f32x2::new(1.2, 2.3); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vclt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f32() { + let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcltq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcle_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcleq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcle_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcleq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcle_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcleq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcle_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcleq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_f32() { + let a: f32x2 = f32x2::new(0.1, 1.2); + let b: f32x2 = f32x2::new(1.2, 2.3); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcle_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f32() { + let a: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let b: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcleq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcge_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgeq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcge_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgeq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcge_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgeq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vcge_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x16 = transmute(vcgeq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vcge_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x8 = transmute(vcgeq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 1); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcge_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgeq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_f32() { + let a: f32x2 = f32x2::new(1.2, 2.3); + let b: f32x2 = f32x2::new(0.1, 1.2); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcge_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f32() { + let a: f32x4 = f32x4::new(1.2, 2.3, 3.4, 4.5); + let b: f32x4 = f32x4::new(0.1, 1.2, 2.3, 3.4); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcgeq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s8() { + let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(0, 7, 7, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vcls_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s8() { + let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7F); + let e: i8x16 = i8x16::new(0, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); + let r: i8x16 = transmute(vclsq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s16() { + let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x00); + let e: i16x4 = i16x4::new(0, 15, 15, 15); + let r: i16x4 = transmute(vcls_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s16() { + let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(0, 15, 15, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclsq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_s32() { + let a: i32x2 = i32x2::new(-2147483648, -1); + let e: i32x2 = i32x2::new(0, 31); + let r: i32x2 = transmute(vcls_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_s32() { + let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x00); + let e: i32x4 = i32x4::new(0, 31, 31, 31); + let r: i32x4 = transmute(vclsq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_u8() { + let a: u8x8 = u8x8::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i8x8 = i8x8::new(7, 7, 7, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vcls_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_u8() { + let a: u8x16 = u8x16::new(0, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF); + let e: i8x16 = i8x16::new(7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7); + let r: i8x16 = transmute(vclsq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_u16() { + let a: u16x4 = u16x4::new(0, 0xFF_FF, 0x00, 0x00); + let e: i16x4 = i16x4::new(15, 15, 15, 15); + let r: i16x4 = transmute(vcls_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_u16() { + let a: u16x8 = u16x8::new(0, 0xFF_FF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); + let e: i16x8 = i16x8::new(15, 15, 15, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclsq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcls_u32() { + let a: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let e: i32x2 = i32x2::new(31, 31); + let r: i32x2 = transmute(vcls_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclsq_u32() { + let a: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0x00, 0x00); + let e: i32x4 = i32x4::new(31, 31, 31, 31); + let r: i32x4 = transmute(vclsq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s8() { + let a: i8x8 = i8x8::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: i8x8 = i8x8::new(0, 0, 8, 7, 7, 7, 7, 7); + let r: i8x8 = transmute(vclz_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s8() { + let a: i8x16 = i8x16::new(-128, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x7F); + let e: i8x16 = i8x16::new(0, 0, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 1); + let r: i8x16 = transmute(vclzq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s16() { + let a: i16x4 = i16x4::new(-32768, -1, 0x00, 0x01); + let e: i16x4 = i16x4::new(0, 0, 16, 15); + let r: i16x4 = transmute(vclz_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s16() { + let a: i16x8 = i16x8::new(-32768, -1, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: i16x8 = i16x8::new(0, 0, 16, 15, 15, 15, 15, 15); + let r: i16x8 = transmute(vclzq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_s32() { + let a: i32x2 = i32x2::new(-2147483648, -1); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vclz_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_s32() { + let a: i32x4 = i32x4::new(-2147483648, -1, 0x00, 0x01); + let e: i32x4 = i32x4::new(0, 0, 32, 31); + let r: i32x4 = transmute(vclzq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u8() { + let a: u8x8 = u8x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: u8x8 = u8x8::new(8, 8, 7, 7, 7, 7, 7, 7); + let r: u8x8 = transmute(vclz_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u8() { + let a: u8x16 = u8x16::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0xFF); + let e: u8x16 = u8x16::new(8, 8, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 0); + let r: u8x16 = transmute(vclzq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u16() { + let a: u16x4 = u16x4::new(0, 0x00, 0x01, 0x01); + let e: u16x4 = u16x4::new(16, 16, 15, 15); + let r: u16x4 = transmute(vclz_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u16() { + let a: u16x8 = u16x8::new(0, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01); + let e: u16x8 = u16x8::new(16, 16, 15, 15, 15, 15, 15, 15); + let r: u16x8 = transmute(vclzq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclz_u32() { + let a: u32x2 = u32x2::new(0, 0x00); + let e: u32x2 = u32x2::new(32, 32); + let r: u32x2 = transmute(vclz_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclzq_u32() { + let a: u32x4 = u32x4::new(0, 0x00, 0x01, 0x01); + let e: u32x4 = u32x4::new(32, 32, 31, 31); + let r: u32x4 = transmute(vclzq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagt_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(!0, 0); + let r: u32x2 = transmute(vcagt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcagtq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(!0, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vcagtq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcage_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(!0, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcage_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcageq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(!0, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vcageq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcalt_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vcalt_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaltq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0, 0, 0, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcaltq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcale_f32() { + let a: f32x2 = f32x2::new(-1.2, 0.0); + let b: f32x2 = f32x2::new(-1.1, 0.0); + let e: u32x2 = u32x2::new(0, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vcale_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcaleq_f32() { + let a: f32x4 = f32x4::new(-1.2, 0.0, 1.2, 2.3); + let b: f32x4 = f32x4::new(-1.1, 0.0, 1.1, 2.4); + let e: u32x4 = u32x4::new(0, 0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF); + let r: u32x4 = transmute(vcaleq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_s8() { + let a: u64 = 1; + let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vcreate_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_s16() { + let a: u64 = 1; + let e: i16x4 = i16x4::new(1, 0, 0, 0); + let r: i16x4 = transmute(vcreate_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_s32() { + let a: u64 = 1; + let e: i32x2 = i32x2::new(1, 0); + let r: i32x2 = transmute(vcreate_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_s64() { + let a: u64 = 1; + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vcreate_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_u8() { + let a: u64 = 1; + let e: u8x8 = u8x8::new(1, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vcreate_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_u16() { + let a: u64 = 1; + let e: u16x4 = u16x4::new(1, 0, 0, 0); + let r: u16x4 = transmute(vcreate_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_u32() { + let a: u64 = 1; + let e: u32x2 = u32x2::new(1, 0); + let r: u32x2 = transmute(vcreate_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_u64() { + let a: u64 = 1; + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vcreate_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_p8() { + let a: u64 = 1; + let e: i8x8 = i8x8::new(1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vcreate_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_p16() { + let a: u64 = 1; + let e: i16x4 = i16x4::new(1, 0, 0, 0); + let r: i16x4 = transmute(vcreate_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_p64() { + let a: u64 = 1; + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vcreate_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcreate_f32() { + let a: u64 = 0; + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vcreate_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_f32_s32() { + let a: i32x2 = i32x2::new(1, 2); + let e: f32x2 = f32x2::new(1., 2.); + let r: f32x2 = transmute(vcvt_f32_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_f32_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let e: f32x4 = f32x4::new(1., 2., 3., 4.); + let r: f32x4 = transmute(vcvtq_f32_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_f32_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: f32x2 = f32x2::new(1., 2.); + let r: f32x2 = transmute(vcvt_f32_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_f32_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let e: f32x4 = f32x4::new(1., 2., 3., 4.); + let r: f32x4 = transmute(vcvtq_f32_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_n_f32_s32() { + let a: i32x2 = i32x2::new(1, 2); + let e: f32x2 = f32x2::new(0.25, 0.5); + let r: f32x2 = transmute(vcvt_n_f32_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_n_f32_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.); + let r: f32x4 = transmute(vcvtq_n_f32_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_n_f32_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: f32x2 = f32x2::new(0.25, 0.5); + let r: f32x2 = transmute(vcvt_n_f32_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_n_f32_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let e: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.); + let r: f32x4 = transmute(vcvtq_n_f32_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_n_s32_f32() { + let a: f32x2 = f32x2::new(0.25, 0.5); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vcvt_n_s32_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_n_s32_f32() { + let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vcvtq_n_s32_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_n_u32_f32() { + let a: f32x2 = f32x2::new(0.25, 0.5); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vcvt_n_u32_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_n_u32_f32() { + let a: f32x4 = f32x4::new(0.25, 0.5, 0.75, 1.); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vcvtq_n_u32_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_s32_f32() { + let a: f32x2 = f32x2::new(-1.1, 2.1); + let e: i32x2 = i32x2::new(-1, 2); + let r: i32x2 = transmute(vcvt_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_s32_f32() { + let a: f32x4 = f32x4::new(-1.1, 2.1, -2.9, 3.9); + let e: i32x4 = i32x4::new(-1, 2, -2, 3); + let r: i32x4 = transmute(vcvtq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvt_u32_f32() { + let a: f32x2 = f32x2::new(1.1, 2.1); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vcvt_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcvtq_u32_f32() { + let a: f32x4 = f32x4::new(1.1, 2.1, 2.9, 3.9); + let e: u32x4 = u32x4::new(1, 2, 2, 3); + let r: u32x4 = transmute(vcvtq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_lane_s8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_laneq_s8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_lane_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_laneq_s16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vdup_lane_s32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vdupq_laneq_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_laneq_s8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_laneq_s16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 4); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vdup_laneq_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_lane_s8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_lane_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s32() { + let a: i32x2 = i32x2::new(1, 1); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vdupq_lane_s32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x8 = transmute(vdup_lane_u8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x16 = transmute(vdupq_laneq_u8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16x4 = u16x4::new(1, 1, 1, 1); + let r: u16x4 = transmute(vdup_lane_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u16x8 = transmute(vdupq_laneq_u16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32x2 = u32x2::new(1, 1); + let r: u32x2 = transmute(vdup_lane_u32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32x4 = u32x4::new(1, 1, 1, 1); + let r: u32x4 = transmute(vdupq_laneq_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x8 = transmute(vdup_laneq_u8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u16x4 = u16x4::new(1, 1, 1, 1); + let r: u16x4 = transmute(vdup_laneq_u16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 4); + let e: u32x2 = u32x2::new(1, 1); + let r: u32x2 = transmute(vdup_laneq_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: u8x16 = transmute(vdupq_lane_u8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 4); + let e: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: u16x8 = transmute(vdupq_lane_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u32() { + let a: u32x2 = u32x2::new(1, 1); + let e: u32x4 = u32x4::new(1, 1, 1, 1); + let r: u32x4 = transmute(vdupq_lane_u32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_lane_p8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_laneq_p8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_lane_p16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_laneq_p16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_p8() { + let a: i8x16 = i8x16::new(1, 1, 1, 4, 1, 6, 7, 8, 1, 10, 11, 12, 13, 14, 15, 16); + let e: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x8 = transmute(vdup_laneq_p8::<8>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_p16() { + let a: i16x8 = i16x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vdup_laneq_p16::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_p8() { + let a: i8x8 = i8x8::new(1, 1, 1, 4, 1, 6, 7, 8); + let e: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let r: i8x16 = transmute(vdupq_lane_p8::<4>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_p16() { + let a: i16x4 = i16x4::new(1, 1, 1, 4); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vdupq_lane_p16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_s64() { + let a: i64x2 = i64x2::new(1, 1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_laneq_s64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_s64() { + let a: i64x1 = i64x1::new(1); + let e: i64x2 = i64x2::new(1, 1); + let r: i64x2 = transmute(vdupq_lane_s64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_u64() { + let a: u64x2 = u64x2::new(1, 1); + let e: u64x2 = u64x2::new(1, 1); + let r: u64x2 = transmute(vdupq_laneq_u64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_u64() { + let a: u64x1 = u64x1::new(1); + let e: u64x2 = u64x2::new(1, 1); + let r: u64x2 = transmute(vdupq_lane_u64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32x2 = f32x2::new(1., 1.); + let r: f32x2 = transmute(vdup_lane_f32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32x4 = f32x4::new(1., 1., 1., 1.); + let r: f32x4 = transmute(vdupq_laneq_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_f32() { + let a: f32x4 = f32x4::new(1., 1., 1., 4.); + let e: f32x2 = f32x2::new(1., 1.); + let r: f32x2 = transmute(vdup_laneq_f32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_lane_f32() { + let a: f32x2 = f32x2::new(1., 1.); + let e: f32x4 = f32x4::new(1., 1., 1., 1.); + let r: f32x4 = transmute(vdupq_lane_f32::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_s64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vdup_lane_s64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_lane_u64() { + let a: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vdup_lane_u64::<0>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vdup_laneq_s64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_laneq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vdup_laneq_u64::<1>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s8() { + let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i8x8 = transmute(vext_s8::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s8() { + let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_s8::<8>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s16() { + let a: i16x4 = i16x4::new(0, 8, 8, 9); + let b: i16x4 = i16x4::new(9, 11, 14, 15); + let e: i16x4 = i16x4::new(8, 9, 9, 11); + let r: i16x4 = transmute(vext_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s16() { + let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i16x8 = transmute(vextq_s16::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s32() { + let a: i32x2 = i32x2::new(0, 8); + let b: i32x2 = i32x2::new(9, 11); + let e: i32x2 = i32x2::new(8, 9); + let r: i32x2 = transmute(vext_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s32() { + let a: i32x4 = i32x4::new(0, 8, 8, 9); + let b: i32x4 = i32x4::new(9, 11, 14, 15); + let e: i32x4 = i32x4::new(8, 9, 9, 11); + let r: i32x4 = transmute(vextq_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u8() { + let a: u8x8 = u8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: u8x8 = u8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: u8x8 = u8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: u8x8 = transmute(vext_u8::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u8() { + let a: u8x16 = u8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: u8x16 = u8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: u8x16 = u8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: u8x16 = transmute(vextq_u8::<8>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u16() { + let a: u16x4 = u16x4::new(0, 8, 8, 9); + let b: u16x4 = u16x4::new(9, 11, 14, 15); + let e: u16x4 = u16x4::new(8, 9, 9, 11); + let r: u16x4 = transmute(vext_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u16() { + let a: u16x8 = u16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: u16x8 = u16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: u16x8 = u16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: u16x8 = transmute(vextq_u16::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u32() { + let a: u32x2 = u32x2::new(0, 8); + let b: u32x2 = u32x2::new(9, 11); + let e: u32x2 = u32x2::new(8, 9); + let r: u32x2 = transmute(vext_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u32() { + let a: u32x4 = u32x4::new(0, 8, 8, 9); + let b: u32x4 = u32x4::new(9, 11, 14, 15); + let e: u32x4 = u32x4::new(8, 9, 9, 11); + let r: u32x4 = transmute(vextq_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_p8() { + let a: i8x8 = i8x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i8x8 = i8x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i8x8 = i8x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i8x8 = transmute(vext_p8::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_p8() { + let a: i8x16 = i8x16::new(0, 8, 8, 9, 8, 9, 9, 11, 8, 9, 9, 11, 9, 11, 14, 15); + let b: i8x16 = i8x16::new(9, 11, 14, 15, 16, 17, 18, 19, 0, 8, 8, 9, 8, 9, 9, 11); + let e: i8x16 = i8x16::new(8, 9, 9, 11, 9, 11, 14, 15, 9, 11, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vextq_p8::<8>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_p16() { + let a: i16x4 = i16x4::new(0, 8, 8, 9); + let b: i16x4 = i16x4::new(9, 11, 14, 15); + let e: i16x4 = i16x4::new(8, 9, 9, 11); + let r: i16x4 = transmute(vext_p16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_p16() { + let a: i16x8 = i16x8::new(0, 8, 8, 9, 8, 9, 9, 11); + let b: i16x8 = i16x8::new(9, 11, 14, 15, 16, 17, 18, 19); + let e: i16x8 = i16x8::new(8, 9, 9, 11, 9, 11, 14, 15); + let r: i16x8 = transmute(vextq_p16::<4>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_s64() { + let a: i64x2 = i64x2::new(0, 8); + let b: i64x2 = i64x2::new(9, 11); + let e: i64x2 = i64x2::new(8, 9); + let r: i64x2 = transmute(vextq_s64::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_u64() { + let a: u64x2 = u64x2::new(0, 8); + let b: u64x2 = u64x2::new(9, 11); + let e: u64x2 = u64x2::new(8, 9); + let r: u64x2 = transmute(vextq_u64::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_f32() { + let a: f32x2 = f32x2::new(0., 2.); + let b: f32x2 = f32x2::new(3., 4.); + let e: f32x2 = f32x2::new(2., 3.); + let r: f32x2 = transmute(vext_f32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vextq_f32() { + let a: f32x4 = f32x4::new(0., 2., 2., 3.); + let b: f32x4 = f32x4::new(3., 4., 5., 6.); + let e: f32x4 = f32x4::new(2., 3., 3., 4.); + let r: f32x4 = transmute(vextq_f32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i8x8 = transmute(vmla_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let r: i8x16 = transmute(vmlaq_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u8x8 = transmute(vmla_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let r: u8x16 = transmute(vmlaq_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(3., 3.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(3., 3., 3., 3.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16 = 3; + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32 = 3; + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16 = 3; + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32 = 3; + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_n_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32 = 3.; + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_n_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32 = 3.; + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(6, 7, 8, 9); + let r: i16x4 = transmute(vmla_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlaq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x2 = i32x2::new(6, 7); + let r: i32x2 = transmute(vmla_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlaq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(6, 7, 8, 9); + let r: u16x4 = transmute(vmla_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlaq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x2 = u32x2::new(6, 7); + let r: u32x2 = transmute(vmla_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlaq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_lane_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmla_laneq_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x2 = f32x2::new(6., 7.); + let r: f32x2 = transmute(vmla_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_lane_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlaq_laneq_f32() { + let a: f32x4 = f32x4::new(0., 1., 2., 3.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x4 = f32x4::new(6., 7., 8., 9.); + let r: f32x4 = transmute(vmlaq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_s8() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: i16x8 = transmute(vmlal_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_u8() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let r: u16x8 = transmute(vmlal_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_n_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(6, 7, 8, 9); + let r: i32x4 = transmute(vmlal_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i64x2 = i64x2::new(6, 7); + let r: i64x2 = transmute(vmlal_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(6, 7, 8, 9); + let r: u32x4 = transmute(vmlal_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_lane_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlal_laneq_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u64x2 = u64x2::new(6, 7); + let r: u64x2 = transmute(vmlal_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_s8() { + let a: i8x8 = i8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vmls_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_s8() { + let a: i8x16 = i8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x16 = i8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vmlsq_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_u8() { + let a: u8x8 = u8x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vmls_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_u8() { + let a: u8x16 = u8x16::new(6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21); + let b: u8x16 = u8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x16 = u8x16::new(3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vmlsq_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(3., 3.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(3., 3., 3., 3.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16 = 3; + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32 = 3; + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16 = 3; + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32 = 3; + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_n_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32 = 3.; + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_n_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32 = 3.; + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_s16() { + let a: i16x4 = i16x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vmls_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_s16() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsq_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_s32() { + let a: i32x2 = i32x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vmls_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_s32() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsq_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_u16() { + let a: u16x4 = u16x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vmls_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_u16() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u16x8 = u16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsq_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_u32() { + let a: u32x2 = u32x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vmls_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_u32() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u32x4 = u32x4::new(2, 2, 2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsq_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_lane_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmls_laneq_f32() { + let a: f32x2 = f32x2::new(6., 7.); + let b: f32x2 = f32x2::new(2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x2 = f32x2::new(0., 1.); + let r: f32x2 = transmute(vmls_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_lane_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x2 = f32x2::new(0., 3.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_lane_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsq_laneq_f32() { + let a: f32x4 = f32x4::new(6., 7., 8., 9.); + let b: f32x4 = f32x4::new(2., 2., 2., 2.); + let c: f32x4 = f32x4::new(0., 3., 0., 0.); + let e: f32x4 = f32x4::new(0., 1., 2., 3.); + let r: f32x4 = transmute(vmlsq_laneq_f32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_s8() { + let a: i16x8 = i16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: i8x8 = i8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vmlsl_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(3, 3, 3, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(3, 3); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_u8() { + let a: u16x8 = u16x8::new(6, 7, 8, 9, 10, 11, 12, 13); + let b: u8x8 = u8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let c: u8x8 = u8x8::new(3, 3, 3, 3, 3, 3, 3, 3); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vmlsl_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(3, 3, 3, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(3, 3); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16 = 3; + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32 = 3; + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16 = 3; + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_n_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_n_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32 = 3; + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_n_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x4 = i16x4::new(0, 3, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_lane_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_s16() { + let a: i32x4 = i32x4::new(6, 7, 8, 9); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let c: i16x8 = i16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vmlsl_laneq_s16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x2 = i32x2::new(0, 3); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_s32() { + let a: i64x2 = i64x2::new(6, 7); + let b: i32x2 = i32x2::new(2, 2); + let c: i32x4 = i32x4::new(0, 3, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vmlsl_laneq_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x4 = u16x4::new(0, 3, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_lane_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_u16() { + let a: u32x4 = u32x4::new(6, 7, 8, 9); + let b: u16x4 = u16x4::new(2, 2, 2, 2); + let c: u16x8 = u16x8::new(0, 3, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vmlsl_laneq_u16::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_lane_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x2 = u32x2::new(0, 3); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_lane_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmlsl_laneq_u32() { + let a: u64x2 = u64x2::new(6, 7); + let b: u32x2 = u32x2::new(2, 2); + let c: u32x4 = u32x4::new(0, 3, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vmlsl_laneq_u32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s8() { + let a: i8x8 = i8x8::new(0, 1, -1, 2, -2, 3, -3, 4); + let e: i8x8 = i8x8::new(0, -1, 1, -2, 2, -3, 3, -4); + let r: i8x8 = transmute(vneg_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s8() { + let a: i8x16 = i8x16::new(0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7, 8); + let e: i8x16 = i8x16::new(0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7, -8); + let r: i8x16 = transmute(vnegq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s16() { + let a: i16x4 = i16x4::new(0, 1, -1, 2); + let e: i16x4 = i16x4::new(0, -1, 1, -2); + let r: i16x4 = transmute(vneg_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s16() { + let a: i16x8 = i16x8::new(0, 1, -1, 2, -2, 3, -3, 4); + let e: i16x8 = i16x8::new(0, -1, 1, -2, 2, -3, 3, -4); + let r: i16x8 = transmute(vnegq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i32x2 = i32x2::new(0, -1); + let r: i32x2 = transmute(vneg_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_s32() { + let a: i32x4 = i32x4::new(0, 1, -1, 2); + let e: i32x4 = i32x4::new(0, -1, 1, -2); + let r: i32x4 = transmute(vnegq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vneg_f32() { + let a: f32x2 = f32x2::new(0., 1.); + let e: f32x2 = f32x2::new(0., -1.); + let r: f32x2 = transmute(vneg_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vnegq_f32() { + let a: f32x4 = f32x4::new(0., 1., -1., 2.); + let e: f32x4 = f32x4::new(0., -1., 1., -2.); + let r: f32x4 = transmute(vnegq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s8() { + let a: i8x8 = i8x8::new(-128, 0, 1, -1, 2, -2, 3, -3); + let e: i8x8 = i8x8::new(0x7F, 0, -1, 1, -2, 2, -3, 3); + let r: i8x8 = transmute(vqneg_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s8() { + let a: i8x16 = i8x16::new(-128, 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, 6, -6, 7, -7); + let e: i8x16 = i8x16::new(0x7F, 0, -1, 1, -2, 2, -3, 3, -4, 4, -5, 5, -6, 6, -7, 7); + let r: i8x16 = transmute(vqnegq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s16() { + let a: i16x4 = i16x4::new(-32768, 0, 1, -1); + let e: i16x4 = i16x4::new(0x7F_FF, 0, -1, 1); + let r: i16x4 = transmute(vqneg_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s16() { + let a: i16x8 = i16x8::new(-32768, 0, 1, -1, 2, -2, 3, -3); + let e: i16x8 = i16x8::new(0x7F_FF, 0, -1, 1, -2, 2, -3, 3); + let r: i16x8 = transmute(vqnegq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqneg_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0); + let r: i32x2 = transmute(vqneg_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqnegq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0, 1, -1); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, -1, 1); + let r: i32x4 = transmute(vqnegq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: u8x8 = transmute(vqsub_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26); + let r: u8x16 = transmute(vqsubq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(41, 40, 39, 38); + let r: u16x4 = transmute(vqsub_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: u16x8 = transmute(vqsubq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(41, 40); + let r: u32x2 = transmute(vqsub_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(41, 40, 39, 38); + let r: u32x4 = transmute(vqsubq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u64() { + let a: u64x1 = u64x1::new(42); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(41); + let r: u64x1 = transmute(vqsub_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u64() { + let a: u64x2 = u64x2::new(42, 42); + let b: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(41, 40); + let r: u64x2 = transmute(vqsubq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: i8x8 = transmute(vqsub_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26); + let r: i8x16 = transmute(vqsubq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(41, 40, 39, 38); + let r: i16x4 = transmute(vqsub_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(41, 40, 39, 38, 37, 36, 35, 34); + let r: i16x8 = transmute(vqsubq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(41, 40); + let r: i32x2 = transmute(vqsub_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(41, 40, 39, 38); + let r: i32x4 = transmute(vqsubq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s64() { + let a: i64x1 = i64x1::new(42); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(41); + let r: i64x1 = transmute(vqsub_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s64() { + let a: i64x2 = i64x2::new(42, 42); + let b: i64x2 = i64x2::new(1, 2); + let e: i64x2 = i64x2::new(41, 40); + let r: i64x2 = transmute(vqsubq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: u8x8 = transmute(vhadd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29); + let r: u8x16 = transmute(vhaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(21, 22, 22, 23); + let r: u16x4 = transmute(vhadd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: u16x8 = transmute(vhaddq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(21, 22); + let r: u32x2 = transmute(vhadd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(21, 22, 22, 23); + let r: u32x4 = transmute(vhaddq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: i8x8 = transmute(vhadd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29); + let r: i8x16 = transmute(vhaddq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(21, 22, 22, 23); + let r: i16x4 = transmute(vhadd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(21, 22, 22, 23, 23, 24, 24, 25); + let r: i16x8 = transmute(vhaddq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(21, 22); + let r: i32x2 = transmute(vhadd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(21, 22, 22, 23); + let r: i32x4 = transmute(vhaddq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: u8x8 = transmute(vrhadd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29); + let r: u8x16 = transmute(vrhaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(22, 22, 23, 23); + let r: u16x4 = transmute(vrhadd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: u16x8 = transmute(vrhaddq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(22, 22); + let r: u32x2 = transmute(vrhadd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(22, 22, 23, 23); + let r: u32x4 = transmute(vrhaddq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: i8x8 = transmute(vrhadd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29); + let r: i8x16 = transmute(vrhaddq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(22, 22, 23, 23); + let r: i16x4 = transmute(vrhadd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(22, 22, 23, 23, 24, 24, 25, 25); + let r: i16x8 = transmute(vrhaddq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(22, 22); + let r: i32x2 = transmute(vrhadd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(22, 22, 23, 23); + let r: i32x4 = transmute(vrhaddq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndn_f32() { + let a: f32x2 = f32x2::new(-1.5, 0.5); + let e: f32x2 = f32x2::new(-2.0, 0.0); + let r: f32x2 = transmute(vrndn_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrndnq_f32() { + let a: f32x4 = f32x4::new(-1.5, 0.5, 1.5, 2.5); + let e: f32x4 = f32x4::new(-2.0, 0.0, 2.0, 2.0); + let r: f32x4 = transmute(vrndnq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u8() { + let a: u8x8 = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: u8x8 = transmute(vqadd_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u8() { + let a: u8x16 = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58); + let r: u8x16 = transmute(vqaddq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u16() { + let a: u16x4 = u16x4::new(42, 42, 42, 42); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(43, 44, 45, 46); + let r: u16x4 = transmute(vqadd_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u16() { + let a: u16x8 = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: u16x8 = transmute(vqaddq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u32() { + let a: u32x2 = u32x2::new(42, 42); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(43, 44); + let r: u32x2 = transmute(vqadd_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u32() { + let a: u32x4 = u32x4::new(42, 42, 42, 42); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(43, 44, 45, 46); + let r: u32x4 = transmute(vqaddq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u64() { + let a: u64x1 = u64x1::new(42); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(43); + let r: u64x1 = transmute(vqadd_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u64() { + let a: u64x2 = u64x2::new(42, 42); + let b: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(43, 44); + let r: u64x2 = transmute(vqaddq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s8() { + let a: i8x8 = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: i8x8 = transmute(vqadd_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s8() { + let a: i8x16 = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58); + let r: i8x16 = transmute(vqaddq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s16() { + let a: i16x4 = i16x4::new(42, 42, 42, 42); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(43, 44, 45, 46); + let r: i16x4 = transmute(vqadd_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s16() { + let a: i16x8 = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(43, 44, 45, 46, 47, 48, 49, 50); + let r: i16x8 = transmute(vqaddq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s32() { + let a: i32x2 = i32x2::new(42, 42); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(43, 44); + let r: i32x2 = transmute(vqadd_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s32() { + let a: i32x4 = i32x4::new(42, 42, 42, 42); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(43, 44, 45, 46); + let r: i32x4 = transmute(vqaddq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s64() { + let a: i64x1 = i64x1::new(42); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(43); + let r: i64x1 = transmute(vqadd_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s64() { + let a: i64x2 = i64x2::new(42, 42); + let b: i64x2 = i64x2::new(1, 2); + let e: i64x2 = i64x2::new(43, 44); + let r: i64x2 = transmute(vqaddq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s8_x2() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8x8; 2] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [i8x8; 2] = transmute(vld1_s8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s16_x2() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i16x4; 2] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8)]; + let r: [i16x4; 2] = transmute(vld1_s16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s32_x2() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(3, 4)]; + let r: [i32x2; 2] = transmute(vld1_s32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s64_x2() { + let a: [i64; 3] = [0, 1, 2]; + let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)]; + let r: [i64x1; 2] = transmute(vld1_s64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8_x2() { + let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8x16; 2] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i8x16; 2] = transmute(vld1q_s8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s16_x2() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16x8; 2] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [i16x8; 2] = transmute(vld1q_s16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s32_x2() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i32x4; 2] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8)]; + let r: [i32x4; 2] = transmute(vld1q_s32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s64_x2() { + let a: [i64; 5] = [0, 1, 2, 3, 4]; + let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(3, 4)]; + let r: [i64x2; 2] = transmute(vld1q_s64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s8_x3() { + let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i8x8; 3] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24)]; + let r: [i8x8; 3] = transmute(vld1_s8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s16_x3() { + let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i16x4; 3] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12)]; + let r: [i16x4; 3] = transmute(vld1_s16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s32_x3() { + let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(3, 4), i32x2::new(5, 6)]; + let r: [i32x2; 3] = transmute(vld1_s32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s64_x3() { + let a: [i64; 4] = [0, 1, 2, 3]; + let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(3)]; + let r: [i64x1; 3] = transmute(vld1_s64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8_x3() { + let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8x16; 3] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [i8x16; 3] = transmute(vld1q_s8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s16_x3() { + let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i16x8; 3] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24)]; + let r: [i16x8; 3] = transmute(vld1q_s16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s32_x3() { + let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i32x4; 3] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8), i32x4::new(9, 10, 11, 12)]; + let r: [i32x4; 3] = transmute(vld1q_s32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s64_x3() { + let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6)]; + let r: [i64x2; 3] = transmute(vld1q_s64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s8_x4() { + let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8x8; 4] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24), i8x8::new(25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i8x8; 4] = transmute(vld1_s8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s16_x4() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16x4; 4] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12), i16x4::new(13, 14, 15, 16)]; + let r: [i16x4; 4] = transmute(vld1_s16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s32_x4() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(3, 4), i32x2::new(5, 6), i32x2::new(7, 8)]; + let r: [i32x2; 4] = transmute(vld1_s32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_s64_x4() { + let a: [i64; 5] = [0, 1, 2, 3, 4]; + let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(3), i64x1::new(4)]; + let r: [i64x1; 4] = transmute(vld1_s64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s8_x4() { + let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8x16; 4] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i8x16; 4] = transmute(vld1q_s8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s16_x4() { + let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i16x8; 4] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24), i16x8::new(25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i16x8; 4] = transmute(vld1q_s16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s32_x4() { + let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i32x4; 4] = [i32x4::new(1, 2, 3, 4), i32x4::new(5, 6, 7, 8), i32x4::new(9, 10, 11, 12), i32x4::new(13, 14, 15, 16)]; + let r: [i32x4; 4] = transmute(vld1q_s32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_s64_x4() { + let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6), i64x2::new(7, 8)]; + let r: [i64x2; 4] = transmute(vld1q_s64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u8_x2() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8x8; 2] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [u8x8; 2] = transmute(vld1_u8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u16_x2() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16x4; 2] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8)]; + let r: [u16x4; 2] = transmute(vld1_u16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u32_x2() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(3, 4)]; + let r: [u32x2; 2] = transmute(vld1_u32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u64_x2() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)]; + let r: [u64x1; 2] = transmute(vld1_u64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8_x2() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8x16; 2] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [u8x16; 2] = transmute(vld1q_u8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u16_x2() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16x8; 2] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [u16x8; 2] = transmute(vld1q_u16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u32_x2() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u32x4; 2] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8)]; + let r: [u32x4; 2] = transmute(vld1q_u32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u64_x2() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64x2; 2] = [u64x2::new(1, 2), u64x2::new(3, 4)]; + let r: [u64x2; 2] = transmute(vld1q_u64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u8_x3() { + let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u8x8; 3] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16), u8x8::new(17, 18, 19, 20, 21, 22, 23, 24)]; + let r: [u8x8; 3] = transmute(vld1_u8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u16_x3() { + let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u16x4; 3] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8), u16x4::new(9, 10, 11, 12)]; + let r: [u16x4; 3] = transmute(vld1_u16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u32_x3() { + let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(3, 4), u32x2::new(5, 6)]; + let r: [u32x2; 3] = transmute(vld1_u32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u64_x3() { + let a: [u64; 4] = [0, 1, 2, 3]; + let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(3)]; + let r: [u64x1; 3] = transmute(vld1_u64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8_x3() { + let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8x16; 3] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [u8x16; 3] = transmute(vld1q_u8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u16_x3() { + let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u16x8; 3] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16), u16x8::new(17, 18, 19, 20, 21, 22, 23, 24)]; + let r: [u16x8; 3] = transmute(vld1q_u16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u32_x3() { + let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u32x4; 3] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8), u32x4::new(9, 10, 11, 12)]; + let r: [u32x4; 3] = transmute(vld1q_u32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u64_x3() { + let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u64x2; 3] = [u64x2::new(1, 2), u64x2::new(3, 4), u64x2::new(5, 6)]; + let r: [u64x2; 3] = transmute(vld1q_u64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u8_x4() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8x8; 4] = [u8x8::new(1, 2, 3, 4, 5, 6, 7, 8), u8x8::new(9, 10, 11, 12, 13, 14, 15, 16), u8x8::new(17, 18, 19, 20, 21, 22, 23, 24), u8x8::new(25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [u8x8; 4] = transmute(vld1_u8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u16_x4() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16x4; 4] = [u16x4::new(1, 2, 3, 4), u16x4::new(5, 6, 7, 8), u16x4::new(9, 10, 11, 12), u16x4::new(13, 14, 15, 16)]; + let r: [u16x4; 4] = transmute(vld1_u16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u32_x4() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(3, 4), u32x2::new(5, 6), u32x2::new(7, 8)]; + let r: [u32x2; 4] = transmute(vld1_u32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_u64_x4() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(3), u64x1::new(4)]; + let r: [u64x1; 4] = transmute(vld1_u64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u8_x4() { + let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8x16; 4] = [u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), u8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [u8x16; 4] = transmute(vld1q_u8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u16_x4() { + let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u16x8; 4] = [u16x8::new(1, 2, 3, 4, 5, 6, 7, 8), u16x8::new(9, 10, 11, 12, 13, 14, 15, 16), u16x8::new(17, 18, 19, 20, 21, 22, 23, 24), u16x8::new(25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [u16x8; 4] = transmute(vld1q_u16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u32_x4() { + let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u32x4; 4] = [u32x4::new(1, 2, 3, 4), u32x4::new(5, 6, 7, 8), u32x4::new(9, 10, 11, 12), u32x4::new(13, 14, 15, 16)]; + let r: [u32x4; 4] = transmute(vld1q_u32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_u64_x4() { + let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u64x2; 4] = [u64x2::new(1, 2), u64x2::new(3, 4), u64x2::new(5, 6), u64x2::new(7, 8)]; + let r: [u64x2; 4] = transmute(vld1q_u64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p8_x2() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8x8; 2] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [i8x8; 2] = transmute(vld1_p8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p8_x3() { + let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i8x8; 3] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24)]; + let r: [i8x8; 3] = transmute(vld1_p8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p8_x4() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8x8; 4] = [i8x8::new(1, 2, 3, 4, 5, 6, 7, 8), i8x8::new(9, 10, 11, 12, 13, 14, 15, 16), i8x8::new(17, 18, 19, 20, 21, 22, 23, 24), i8x8::new(25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i8x8; 4] = transmute(vld1_p8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p8_x2() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8x16; 2] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i8x16; 2] = transmute(vld1q_p8_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p8_x3() { + let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8x16; 3] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [i8x16; 3] = transmute(vld1q_p8_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p8_x4() { + let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8x16; 4] = [i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16), i8x16::new(17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i8x16; 4] = transmute(vld1q_p8_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p16_x2() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i16x4; 2] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8)]; + let r: [i16x4; 2] = transmute(vld1_p16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p16_x3() { + let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i16x4; 3] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12)]; + let r: [i16x4; 3] = transmute(vld1_p16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p16_x4() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16x4; 4] = [i16x4::new(1, 2, 3, 4), i16x4::new(5, 6, 7, 8), i16x4::new(9, 10, 11, 12), i16x4::new(13, 14, 15, 16)]; + let r: [i16x4; 4] = transmute(vld1_p16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p16_x2() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16x8; 2] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16)]; + let r: [i16x8; 2] = transmute(vld1q_p16_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p16_x3() { + let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i16x8; 3] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24)]; + let r: [i16x8; 3] = transmute(vld1q_p16_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p16_x4() { + let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i16x8; 4] = [i16x8::new(1, 2, 3, 4, 5, 6, 7, 8), i16x8::new(9, 10, 11, 12, 13, 14, 15, 16), i16x8::new(17, 18, 19, 20, 21, 22, 23, 24), i16x8::new(25, 26, 27, 28, 29, 30, 31, 32)]; + let r: [i16x8; 4] = transmute(vld1q_p16_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p64_x2() { + let a: [u64; 3] = [0, 1, 2]; + let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)]; + let r: [i64x1; 2] = transmute(vld1_p64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p64_x3() { + let a: [u64; 4] = [0, 1, 2, 3]; + let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(3)]; + let r: [i64x1; 3] = transmute(vld1_p64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_p64_x4() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(3), i64x1::new(4)]; + let r: [i64x1; 4] = transmute(vld1_p64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p64_x2() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [i64x2; 2] = [i64x2::new(1, 2), i64x2::new(3, 4)]; + let r: [i64x2; 2] = transmute(vld1q_p64_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p64_x3() { + let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i64x2; 3] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6)]; + let r: [i64x2; 3] = transmute(vld1q_p64_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_p64_x4() { + let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i64x2; 4] = [i64x2::new(1, 2), i64x2::new(3, 4), i64x2::new(5, 6), i64x2::new(7, 8)]; + let r: [i64x2; 4] = transmute(vld1q_p64_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_f32_x2() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(3., 4.)]; + let r: [f32x2; 2] = transmute(vld1_f32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32_x2() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f32x4; 2] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.)]; + let r: [f32x4; 2] = transmute(vld1q_f32_x2(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_f32_x3() { + let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.]; + let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(3., 4.), f32x2::new(5., 6.)]; + let r: [f32x2; 3] = transmute(vld1_f32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32_x3() { + let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]; + let e: [f32x4; 3] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.), f32x4::new(9., 10., 11., 12.)]; + let r: [f32x4; 3] = transmute(vld1q_f32_x3(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_f32_x4() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(3., 4.), f32x2::new(5., 6.), f32x2::new(7., 8.)]; + let r: [f32x2; 4] = transmute(vld1_f32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_f32_x4() { + let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]; + let e: [f32x4; 4] = [f32x4::new(1., 2., 3., 4.), f32x4::new(5., 6., 7., 8.), f32x4::new(9., 10., 11., 12.), f32x4::new(13., 14., 15., 16.)]; + let r: [f32x4; 4] = transmute(vld1q_f32_x4(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_s8() { + let a: [i8; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 3, 2, 3, 4, 5), i8x8::new(2, 3, 4, 5, 6, 7, 8, 9)]; + let r: [i8x8; 2] = transmute(vld2_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_s16() { + let a: [i16; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5]; + let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 3), i16x4::new(2, 3, 4, 5)]; + let r: [i16x4; 2] = transmute(vld2_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_s32() { + let a: [i32; 5] = [0, 1, 2, 2, 3]; + let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(2, 3)]; + let r: [i32x2; 2] = transmute(vld2_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_s8() { + let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9), i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)]; + let r: [i8x16; 2] = transmute(vld2q_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 3, 2, 3, 4, 5), i16x8::new(2, 3, 4, 5, 6, 7, 8, 9)]; + let r: [i16x8; 2] = transmute(vld2q_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5]; + let e: [i32x4; 2] = [i32x4::new(1, 2, 2, 3), i32x4::new(2, 3, 4, 5)]; + let r: [i32x4; 2] = transmute(vld2q_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_s64() { + let a: [i64; 3] = [0, 1, 2]; + let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)]; + let r: [i64x1; 2] = transmute(vld2_s64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_u8() { + let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let e: [u8x8; 2] = [u8x8::new(1, 2, 2, 3, 2, 3, 4, 5), u8x8::new(2, 3, 4, 5, 6, 7, 8, 9)]; + let r: [u8x8; 2] = transmute(vld2_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_u16() { + let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5]; + let e: [u16x4; 2] = [u16x4::new(1, 2, 2, 3), u16x4::new(2, 3, 4, 5)]; + let r: [u16x4; 2] = transmute(vld2_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_u32() { + let a: [u32; 5] = [0, 1, 2, 2, 3]; + let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(2, 3)]; + let r: [u32x2; 2] = transmute(vld2_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_u8() { + let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [u8x16; 2] = [u8x16::new(1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9), u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)]; + let r: [u8x16; 2] = transmute(vld2q_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let e: [u16x8; 2] = [u16x8::new(1, 2, 2, 3, 2, 3, 4, 5), u16x8::new(2, 3, 4, 5, 6, 7, 8, 9)]; + let r: [u16x8; 2] = transmute(vld2q_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5]; + let e: [u32x4; 2] = [u32x4::new(1, 2, 2, 3), u32x4::new(2, 3, 4, 5)]; + let r: [u32x4; 2] = transmute(vld2q_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_p8() { + let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 3, 2, 3, 4, 5), i8x8::new(2, 3, 4, 5, 6, 7, 8, 9)]; + let r: [i8x8; 2] = transmute(vld2_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_p16() { + let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 4, 3, 5]; + let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 3), i16x4::new(2, 3, 4, 5)]; + let r: [i16x4; 2] = transmute(vld2_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_p8() { + let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [i8x16; 2] = [i8x16::new(1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9), i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17)]; + let r: [i8x16; 2] = transmute(vld2q_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 3, 2, 3, 4, 5), i16x8::new(2, 3, 4, 5, 6, 7, 8, 9)]; + let r: [i16x8; 2] = transmute(vld2q_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_u64() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(2)]; + let r: [u64x1; 2] = transmute(vld2_u64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_p64() { + let a: [u64; 3] = [0, 1, 2]; + let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(2)]; + let r: [i64x1; 2] = transmute(vld2_p64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_f32() { + let a: [f32; 5] = [0., 1., 2., 2., 3.]; + let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(2., 3.)]; + let r: [f32x2; 2] = transmute(vld2_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 3., 2., 4., 3., 5.]; + let e: [f32x4; 2] = [f32x4::new(1., 2., 2., 3.), f32x4::new(2., 3., 4., 5.)]; + let r: [f32x4; 2] = transmute(vld2q_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_s8() { + let a: [i8; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x8; 2] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x8; 2] = transmute(vld2_dup_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_s16() { + let a: [i16; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5]; + let e: [i16x4; 2] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)]; + let r: [i16x4; 2] = transmute(vld2_dup_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_s32() { + let a: [i32; 5] = [0, 1, 1, 2, 3]; + let e: [i32x2; 2] = [i32x2::new(1, 1), i32x2::new(1, 1)]; + let r: [i32x2; 2] = transmute(vld2_dup_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_s8() { + let a: [i8; 33] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [i8x16; 2] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x16; 2] = transmute(vld2q_dup_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_s16() { + let a: [i16; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x8; 2] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i16x8; 2] = transmute(vld2q_dup_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_s32() { + let a: [i32; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5]; + let e: [i32x4; 2] = [i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1)]; + let r: [i32x4; 2] = transmute(vld2q_dup_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_s64() { + let a: [i64; 3] = [0, 1, 1]; + let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(1)]; + let r: [i64x1; 2] = transmute(vld2_dup_s64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_u8() { + let a: [u8; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9]; + let e: [u8x8; 2] = [u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u8x8; 2] = transmute(vld2_dup_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_u16() { + let a: [u16; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5]; + let e: [u16x4; 2] = [u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1)]; + let r: [u16x4; 2] = transmute(vld2_dup_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_u32() { + let a: [u32; 5] = [0, 1, 1, 2, 3]; + let e: [u32x2; 2] = [u32x2::new(1, 1), u32x2::new(1, 1)]; + let r: [u32x2; 2] = transmute(vld2_dup_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_u8() { + let a: [u8; 33] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [u8x16; 2] = [u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u8x16; 2] = transmute(vld2q_dup_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_u16() { + let a: [u16; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9]; + let e: [u16x8; 2] = [u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u16x8; 2] = transmute(vld2q_dup_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_u32() { + let a: [u32; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5]; + let e: [u32x4; 2] = [u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1)]; + let r: [u32x4; 2] = transmute(vld2q_dup_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_p8() { + let a: [u8; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x8; 2] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x8; 2] = transmute(vld2_dup_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_p16() { + let a: [u16; 9] = [0, 1, 1, 2, 3, 1, 4, 3, 5]; + let e: [i16x4; 2] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)]; + let r: [i16x4; 2] = transmute(vld2_dup_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_p8() { + let a: [u8; 33] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [i8x16; 2] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x16; 2] = transmute(vld2q_dup_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_p16() { + let a: [u16; 17] = [0, 1, 1, 2, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x8; 2] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i16x8; 2] = transmute(vld2q_dup_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_u64() { + let a: [u64; 3] = [0, 1, 1]; + let e: [u64x1; 2] = [u64x1::new(1), u64x1::new(1)]; + let r: [u64x1; 2] = transmute(vld2_dup_u64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_p64() { + let a: [u64; 3] = [0, 1, 1]; + let e: [i64x1; 2] = [i64x1::new(1), i64x1::new(1)]; + let r: [i64x1; 2] = transmute(vld2_dup_p64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_dup_f32() { + let a: [f32; 5] = [0., 1., 1., 2., 3.]; + let e: [f32x2; 2] = [f32x2::new(1., 1.), f32x2::new(1., 1.)]; + let r: [f32x2; 2] = transmute(vld2_dup_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_dup_f32() { + let a: [f32; 9] = [0., 1., 1., 2., 3., 1., 4., 3., 5.]; + let e: [f32x4; 2] = [f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.)]; + let r: [f32x4; 2] = transmute(vld2q_dup_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_s8() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i8x8; 2] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i8x8; 2] = transmute(vld2_lane_s8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_s16() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x4; 2] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18)]; + let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18)]; + let r: [i16x4; 2] = transmute(vld2_lane_s16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_s32() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let b: [i32x2; 2] = [i32x2::new(0, 2), i32x2::new(2, 14)]; + let e: [i32x2; 2] = [i32x2::new(1, 2), i32x2::new(2, 14)]; + let r: [i32x2; 2] = transmute(vld2_lane_s32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_lane_s16() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x8; 2] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i16x8; 2] = transmute(vld2q_lane_s16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_lane_s32() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i32x4; 2] = [i32x4::new(0, 2, 2, 14), i32x4::new(2, 16, 17, 18)]; + let e: [i32x4; 2] = [i32x4::new(1, 2, 2, 14), i32x4::new(2, 16, 17, 18)]; + let r: [i32x4; 2] = transmute(vld2q_lane_s32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_u8() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u8x8; 2] = [u8x8::new(0, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [u8x8; 2] = [u8x8::new(1, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [u8x8; 2] = transmute(vld2_lane_u8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_u16() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u16x4; 2] = [u16x4::new(0, 2, 2, 14), u16x4::new(2, 16, 17, 18)]; + let e: [u16x4; 2] = [u16x4::new(1, 2, 2, 14), u16x4::new(2, 16, 17, 18)]; + let r: [u16x4; 2] = transmute(vld2_lane_u16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_u32() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let b: [u32x2; 2] = [u32x2::new(0, 2), u32x2::new(2, 14)]; + let e: [u32x2; 2] = [u32x2::new(1, 2), u32x2::new(2, 14)]; + let r: [u32x2; 2] = transmute(vld2_lane_u32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_lane_u16() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u16x8; 2] = [u16x8::new(0, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [u16x8; 2] = [u16x8::new(1, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [u16x8; 2] = transmute(vld2q_lane_u16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_lane_u32() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u32x4; 2] = [u32x4::new(0, 2, 2, 14), u32x4::new(2, 16, 17, 18)]; + let e: [u32x4; 2] = [u32x4::new(1, 2, 2, 14), u32x4::new(2, 16, 17, 18)]; + let r: [u32x4; 2] = transmute(vld2q_lane_u32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_p8() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i8x8; 2] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i8x8; 2] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i8x8; 2] = transmute(vld2_lane_p8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_p16() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x4; 2] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18)]; + let e: [i16x4; 2] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18)]; + let r: [i16x4; 2] = transmute(vld2_lane_p16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_lane_p16() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x8; 2] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i16x8; 2] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i16x8; 2] = transmute(vld2q_lane_p16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2_lane_f32() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let b: [f32x2; 2] = [f32x2::new(0., 2.), f32x2::new(2., 14.)]; + let e: [f32x2; 2] = [f32x2::new(1., 2.), f32x2::new(2., 14.)]; + let r: [f32x2; 2] = transmute(vld2_lane_f32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld2q_lane_f32() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let b: [f32x4; 2] = [f32x4::new(0., 2., 2., 14.), f32x4::new(2., 16., 17., 18.)]; + let e: [f32x4; 2] = [f32x4::new(1., 2., 2., 14.), f32x4::new(2., 16., 17., 18.)]; + let r: [f32x4; 2] = transmute(vld2q_lane_f32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_s8() { + let a: [i8; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 4, 2, 4, 7, 8), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16)]; + let r: [i8x8; 3] = transmute(vld3_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_s16() { + let a: [i16; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 4), i16x4::new(2, 4, 7, 8), i16x4::new(2, 4, 7, 8)]; + let r: [i16x4; 3] = transmute(vld3_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_s32() { + let a: [i32; 7] = [0, 1, 2, 2, 2, 4, 4]; + let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(2, 4), i32x2::new(2, 4)]; + let r: [i32x2; 3] = transmute(vld3_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_s8() { + let a: [i8; 49] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48]; + let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48)]; + let r: [i8x16; 3] = transmute(vld3q_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_s16() { + let a: [i16; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 4, 2, 4, 7, 8), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16)]; + let r: [i16x8; 3] = transmute(vld3q_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_s32() { + let a: [i32; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let e: [i32x4; 3] = [i32x4::new(1, 2, 2, 4), i32x4::new(2, 4, 7, 8), i32x4::new(2, 4, 7, 8)]; + let r: [i32x4; 3] = transmute(vld3q_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_s64() { + let a: [i64; 4] = [0, 1, 2, 2]; + let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)]; + let r: [i64x1; 3] = transmute(vld3_s64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_u8() { + let a: [u8; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let e: [u8x8; 3] = [u8x8::new(1, 2, 2, 4, 2, 4, 7, 8), u8x8::new(2, 4, 7, 8, 13, 14, 15, 16), u8x8::new(2, 4, 7, 8, 13, 14, 15, 16)]; + let r: [u8x8; 3] = transmute(vld3_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_u16() { + let a: [u16; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let e: [u16x4; 3] = [u16x4::new(1, 2, 2, 4), u16x4::new(2, 4, 7, 8), u16x4::new(2, 4, 7, 8)]; + let r: [u16x4; 3] = transmute(vld3_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_u32() { + let a: [u32; 7] = [0, 1, 2, 2, 2, 4, 4]; + let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(2, 4), u32x2::new(2, 4)]; + let r: [u32x2; 3] = transmute(vld3_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_u8() { + let a: [u8; 49] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48]; + let e: [u8x16; 3] = [u8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16), u8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32), u8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48)]; + let r: [u8x16; 3] = transmute(vld3q_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_u16() { + let a: [u16; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let e: [u16x8; 3] = [u16x8::new(1, 2, 2, 4, 2, 4, 7, 8), u16x8::new(2, 4, 7, 8, 13, 14, 15, 16), u16x8::new(2, 4, 7, 8, 13, 14, 15, 16)]; + let r: [u16x8; 3] = transmute(vld3q_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_u32() { + let a: [u32; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let e: [u32x4; 3] = [u32x4::new(1, 2, 2, 4), u32x4::new(2, 4, 7, 8), u32x4::new(2, 4, 7, 8)]; + let r: [u32x4; 3] = transmute(vld3q_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_p8() { + let a: [u8; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 4, 2, 4, 7, 8), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16), i8x8::new(2, 4, 7, 8, 13, 14, 15, 16)]; + let r: [i8x8; 3] = transmute(vld3_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_p16() { + let a: [u16; 13] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 4), i16x4::new(2, 4, 7, 8), i16x4::new(2, 4, 7, 8)]; + let r: [i16x4; 3] = transmute(vld3_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_p8() { + let a: [u8; 49] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48]; + let e: [i8x16; 3] = [i8x16::new(1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32), i8x16::new(2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48)]; + let r: [i8x16; 3] = transmute(vld3q_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_p16() { + let a: [u16; 25] = [0, 1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 4, 2, 4, 7, 8), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16), i16x8::new(2, 4, 7, 8, 13, 14, 15, 16)]; + let r: [i16x8; 3] = transmute(vld3q_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_u64() { + let a: [u64; 4] = [0, 1, 2, 2]; + let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(2), u64x1::new(2)]; + let r: [u64x1; 3] = transmute(vld3_u64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_p64() { + let a: [u64; 4] = [0, 1, 2, 2]; + let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(2), i64x1::new(2)]; + let r: [i64x1; 3] = transmute(vld3_p64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_f32() { + let a: [f32; 7] = [0., 1., 2., 2., 2., 4., 4.]; + let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(2., 4.), f32x2::new(2., 4.)]; + let r: [f32x2; 3] = transmute(vld3_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_f32() { + let a: [f32; 13] = [0., 1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.]; + let e: [f32x4; 3] = [f32x4::new(1., 2., 2., 4.), f32x4::new(2., 4., 7., 8.), f32x4::new(2., 4., 7., 8.)]; + let r: [f32x4; 3] = transmute(vld3q_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_s8() { + let a: [i8; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13]; + let e: [i8x8; 3] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x8; 3] = transmute(vld3_dup_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_s16() { + let a: [i16; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7]; + let e: [i16x4; 3] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)]; + let r: [i16x4; 3] = transmute(vld3_dup_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_s32() { + let a: [i32; 7] = [0, 1, 1, 1, 3, 1, 4]; + let e: [i32x2; 3] = [i32x2::new(1, 1), i32x2::new(1, 1), i32x2::new(1, 1)]; + let r: [i32x2; 3] = transmute(vld3_dup_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_s8() { + let a: [i8; 49] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [i8x16; 3] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x16; 3] = transmute(vld3q_dup_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_s16() { + let a: [i16; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13]; + let e: [i16x8; 3] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i16x8; 3] = transmute(vld3q_dup_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_s32() { + let a: [i32; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7]; + let e: [i32x4; 3] = [i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1)]; + let r: [i32x4; 3] = transmute(vld3q_dup_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_s64() { + let a: [i64; 4] = [0, 1, 1, 1]; + let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(1), i64x1::new(1)]; + let r: [i64x1; 3] = transmute(vld3_dup_s64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_u8() { + let a: [u8; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13]; + let e: [u8x8; 3] = [u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u8x8; 3] = transmute(vld3_dup_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_u16() { + let a: [u16; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7]; + let e: [u16x4; 3] = [u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1)]; + let r: [u16x4; 3] = transmute(vld3_dup_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_u32() { + let a: [u32; 7] = [0, 1, 1, 1, 3, 1, 4]; + let e: [u32x2; 3] = [u32x2::new(1, 1), u32x2::new(1, 1), u32x2::new(1, 1)]; + let r: [u32x2; 3] = transmute(vld3_dup_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_u8() { + let a: [u8; 49] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [u8x16; 3] = [u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u8x16; 3] = transmute(vld3q_dup_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_u16() { + let a: [u16; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13]; + let e: [u16x8; 3] = [u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u16x8; 3] = transmute(vld3q_dup_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_u32() { + let a: [u32; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7]; + let e: [u32x4; 3] = [u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1)]; + let r: [u32x4; 3] = transmute(vld3q_dup_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_p8() { + let a: [u8; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13]; + let e: [i8x8; 3] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x8; 3] = transmute(vld3_dup_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_p16() { + let a: [u16; 13] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7]; + let e: [i16x4; 3] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)]; + let r: [i16x4; 3] = transmute(vld3_dup_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_p8() { + let a: [u8; 49] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17, 6, 14, 7, 15, 8, 16, 9, 17]; + let e: [i8x16; 3] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x16; 3] = transmute(vld3q_dup_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_p16() { + let a: [u16; 25] = [0, 1, 1, 1, 3, 1, 4, 3, 5, 1, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13]; + let e: [i16x8; 3] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i16x8; 3] = transmute(vld3q_dup_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_u64() { + let a: [u64; 4] = [0, 1, 1, 1]; + let e: [u64x1; 3] = [u64x1::new(1), u64x1::new(1), u64x1::new(1)]; + let r: [u64x1; 3] = transmute(vld3_dup_u64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_p64() { + let a: [u64; 4] = [0, 1, 1, 1]; + let e: [i64x1; 3] = [i64x1::new(1), i64x1::new(1), i64x1::new(1)]; + let r: [i64x1; 3] = transmute(vld3_dup_p64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_dup_f32() { + let a: [f32; 7] = [0., 1., 1., 1., 3., 1., 4.]; + let e: [f32x2; 3] = [f32x2::new(1., 1.), f32x2::new(1., 1.), f32x2::new(1., 1.)]; + let r: [f32x2; 3] = transmute(vld3_dup_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_dup_f32() { + let a: [f32; 13] = [0., 1., 1., 1., 3., 1., 4., 3., 5., 1., 4., 3., 5.]; + let e: [f32x4; 3] = [f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.)]; + let r: [f32x4; 3] = transmute(vld3q_dup_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_s8() { + let a: [i8; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i8x8; 3] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 17, 18)]; + let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 17, 18)]; + let r: [i8x8; 3] = transmute(vld3_lane_s8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_s16() { + let a: [i16; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4]; + let b: [i16x4; 3] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)]; + let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)]; + let r: [i16x4; 3] = transmute(vld3_lane_s16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_s32() { + let a: [i32; 7] = [0, 1, 2, 2, 4, 5, 6]; + let b: [i32x2; 3] = [i32x2::new(0, 2), i32x2::new(2, 14), i32x2::new(2, 16)]; + let e: [i32x2; 3] = [i32x2::new(1, 2), i32x2::new(2, 14), i32x2::new(2, 16)]; + let r: [i32x2; 3] = transmute(vld3_lane_s32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_lane_s16() { + let a: [i16; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x8; 3] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 17, 18)]; + let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 17, 18)]; + let r: [i16x8; 3] = transmute(vld3q_lane_s16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_lane_s32() { + let a: [i32; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4]; + let b: [i32x4; 3] = [i32x4::new(0, 2, 2, 14), i32x4::new(2, 16, 17, 18), i32x4::new(2, 20, 21, 22)]; + let e: [i32x4; 3] = [i32x4::new(1, 2, 2, 14), i32x4::new(2, 16, 17, 18), i32x4::new(2, 20, 21, 22)]; + let r: [i32x4; 3] = transmute(vld3q_lane_s32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_u8() { + let a: [u8; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u8x8; 3] = [u8x8::new(0, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26), u8x8::new(11, 12, 13, 14, 15, 16, 17, 18)]; + let e: [u8x8; 3] = [u8x8::new(1, 2, 2, 14, 2, 16, 17, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26), u8x8::new(2, 12, 13, 14, 15, 16, 17, 18)]; + let r: [u8x8; 3] = transmute(vld3_lane_u8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_u16() { + let a: [u16; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4]; + let b: [u16x4; 3] = [u16x4::new(0, 2, 2, 14), u16x4::new(2, 16, 17, 18), u16x4::new(2, 20, 21, 22)]; + let e: [u16x4; 3] = [u16x4::new(1, 2, 2, 14), u16x4::new(2, 16, 17, 18), u16x4::new(2, 20, 21, 22)]; + let r: [u16x4; 3] = transmute(vld3_lane_u16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_u32() { + let a: [u32; 7] = [0, 1, 2, 2, 4, 5, 6]; + let b: [u32x2; 3] = [u32x2::new(0, 2), u32x2::new(2, 14), u32x2::new(2, 16)]; + let e: [u32x2; 3] = [u32x2::new(1, 2), u32x2::new(2, 14), u32x2::new(2, 16)]; + let r: [u32x2; 3] = transmute(vld3_lane_u32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_lane_u16() { + let a: [u16; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u16x8; 3] = [u16x8::new(0, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26), u16x8::new(11, 12, 13, 14, 15, 16, 17, 18)]; + let e: [u16x8; 3] = [u16x8::new(1, 2, 2, 14, 2, 16, 17, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26), u16x8::new(2, 12, 13, 14, 15, 16, 17, 18)]; + let r: [u16x8; 3] = transmute(vld3q_lane_u16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_lane_u32() { + let a: [u32; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4]; + let b: [u32x4; 3] = [u32x4::new(0, 2, 2, 14), u32x4::new(2, 16, 17, 18), u32x4::new(2, 20, 21, 22)]; + let e: [u32x4; 3] = [u32x4::new(1, 2, 2, 14), u32x4::new(2, 16, 17, 18), u32x4::new(2, 20, 21, 22)]; + let r: [u32x4; 3] = transmute(vld3q_lane_u32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_p8() { + let a: [u8; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i8x8; 3] = [i8x8::new(0, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 17, 18)]; + let e: [i8x8; 3] = [i8x8::new(1, 2, 2, 14, 2, 16, 17, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 17, 18)]; + let r: [i8x8; 3] = transmute(vld3_lane_p8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_p16() { + let a: [u16; 13] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4]; + let b: [i16x4; 3] = [i16x4::new(0, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)]; + let e: [i16x4; 3] = [i16x4::new(1, 2, 2, 14), i16x4::new(2, 16, 17, 18), i16x4::new(2, 20, 21, 22)]; + let r: [i16x4; 3] = transmute(vld3_lane_p16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_lane_p16() { + let a: [u16; 25] = [0, 1, 2, 2, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x8; 3] = [i16x8::new(0, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 17, 18)]; + let e: [i16x8; 3] = [i16x8::new(1, 2, 2, 14, 2, 16, 17, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 17, 18)]; + let r: [i16x8; 3] = transmute(vld3q_lane_p16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3_lane_f32() { + let a: [f32; 7] = [0., 1., 2., 2., 4., 5., 6.]; + let b: [f32x2; 3] = [f32x2::new(0., 2.), f32x2::new(2., 14.), f32x2::new(9., 16.)]; + let e: [f32x2; 3] = [f32x2::new(1., 2.), f32x2::new(2., 14.), f32x2::new(2., 16.)]; + let r: [f32x2; 3] = transmute(vld3_lane_f32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld3q_lane_f32() { + let a: [f32; 13] = [0., 1., 2., 2., 4., 5., 6., 7., 8., 5., 6., 7., 8.]; + let b: [f32x4; 3] = [f32x4::new(0., 2., 2., 14.), f32x4::new(9., 16., 17., 18.), f32x4::new(5., 6., 7., 8.)]; + let e: [f32x4; 3] = [f32x4::new(1., 2., 2., 14.), f32x4::new(2., 16., 17., 18.), f32x4::new(2., 6., 7., 8.)]; + let r: [f32x4; 3] = transmute(vld3q_lane_f32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_s8() { + let a: [i8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 6, 2, 6, 6, 8), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(6, 8, 8, 16, 8, 16, 16, 32)]; + let r: [i8x8; 4] = transmute(vld4_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 6), i16x4::new(2, 6, 6, 8), i16x4::new(2, 6, 6, 8), i16x4::new(6, 8, 8, 16)]; + let r: [i16x4; 4] = transmute(vld4_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8]; + let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(2, 6), i32x2::new(2, 6), i32x2::new(6, 8)]; + let r: [i32x2; 4] = transmute(vld4_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_s8() { + let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48), i8x16::new(6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64)]; + let r: [i8x16; 4] = transmute(vld4q_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_s16() { + let a: [i16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 6, 2, 6, 6, 8), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(6, 8, 8, 16, 8, 16, 16, 32)]; + let r: [i16x8; 4] = transmute(vld4q_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_s32() { + let a: [i32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i32x4; 4] = [i32x4::new(1, 2, 2, 6), i32x4::new(2, 6, 6, 8), i32x4::new(2, 6, 6, 8), i32x4::new(6, 8, 8, 16)]; + let r: [i32x4; 4] = transmute(vld4q_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_s64() { + let a: [i64; 5] = [0, 1, 2, 2, 6]; + let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(6)]; + let r: [i64x1; 4] = transmute(vld4_s64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_u8() { + let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u8x8; 4] = [u8x8::new(1, 2, 2, 6, 2, 6, 6, 8), u8x8::new(2, 6, 6, 8, 6, 8, 8, 16), u8x8::new(2, 6, 6, 8, 6, 8, 8, 16), u8x8::new(6, 8, 8, 16, 8, 16, 16, 32)]; + let r: [u8x8; 4] = transmute(vld4_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u16x4; 4] = [u16x4::new(1, 2, 2, 6), u16x4::new(2, 6, 6, 8), u16x4::new(2, 6, 6, 8), u16x4::new(6, 8, 8, 16)]; + let r: [u16x4; 4] = transmute(vld4_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8]; + let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(2, 6), u32x2::new(2, 6), u32x2::new(6, 8)]; + let r: [u32x2; 4] = transmute(vld4_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_u8() { + let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let e: [u8x16; 4] = [u8x16::new(1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16), u8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32), u8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48), u8x16::new(6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64)]; + let r: [u8x16; 4] = transmute(vld4q_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_u16() { + let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u16x8; 4] = [u16x8::new(1, 2, 2, 6, 2, 6, 6, 8), u16x8::new(2, 6, 6, 8, 6, 8, 8, 16), u16x8::new(2, 6, 6, 8, 6, 8, 8, 16), u16x8::new(6, 8, 8, 16, 8, 16, 16, 32)]; + let r: [u16x8; 4] = transmute(vld4q_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_u32() { + let a: [u32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u32x4; 4] = [u32x4::new(1, 2, 2, 6), u32x4::new(2, 6, 6, 8), u32x4::new(2, 6, 6, 8), u32x4::new(6, 8, 8, 16)]; + let r: [u32x4; 4] = transmute(vld4q_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_p8() { + let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 6, 2, 6, 6, 8), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(2, 6, 6, 8, 6, 8, 8, 16), i8x8::new(6, 8, 8, 16, 8, 16, 16, 32)]; + let r: [i8x8; 4] = transmute(vld4_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 6), i16x4::new(2, 6, 6, 8), i16x4::new(2, 6, 6, 8), i16x4::new(6, 8, 8, 16)]; + let r: [i16x4; 4] = transmute(vld4_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_p8() { + let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let e: [i8x16; 4] = [i8x16::new(1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32), i8x16::new(2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48), i8x16::new(6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64)]; + let r: [i8x16; 4] = transmute(vld4q_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_p16() { + let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 6, 2, 6, 6, 8), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(2, 6, 6, 8, 6, 8, 8, 16), i16x8::new(6, 8, 8, 16, 8, 16, 16, 32)]; + let r: [i16x8; 4] = transmute(vld4q_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_u64() { + let a: [u64; 5] = [0, 1, 2, 2, 6]; + let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(2), u64x1::new(2), u64x1::new(6)]; + let r: [u64x1; 4] = transmute(vld4_u64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_p64() { + let a: [u64; 5] = [0, 1, 2, 2, 6]; + let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(2), i64x1::new(2), i64x1::new(6)]; + let r: [i64x1; 4] = transmute(vld4_p64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.]; + let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(2., 6.), f32x2::new(2., 6.), f32x2::new(6., 8.)]; + let r: [f32x2; 4] = transmute(vld4_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_f32() { + let a: [f32; 17] = [0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 15., 16.]; + let e: [f32x4; 4] = [f32x4::new(1., 2., 2., 6.), f32x4::new(2., 6., 6., 8.), f32x4::new(2., 6., 6., 15.), f32x4::new(6., 8., 8., 16.)]; + let r: [f32x4; 4] = transmute(vld4q_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_s8() { + let a: [i8; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x8; 4] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x8; 4] = transmute(vld4_dup_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_s16() { + let a: [i16; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x4; 4] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)]; + let r: [i16x4; 4] = transmute(vld4_dup_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_s32() { + let a: [i32; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5]; + let e: [i32x2; 4] = [i32x2::new(1, 1), i32x2::new(1, 1), i32x2::new(1, 1), i32x2::new(1, 1)]; + let r: [i32x2; 4] = transmute(vld4_dup_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_s8() { + let a: [i8; 65] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x16; 4] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x16; 4] = transmute(vld4q_dup_s8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_s16() { + let a: [i16; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x8; 4] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i16x8; 4] = transmute(vld4q_dup_s16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_s32() { + let a: [i32; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i32x4; 4] = [i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1), i32x4::new(1, 1, 1, 1)]; + let r: [i32x4; 4] = transmute(vld4q_dup_s32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_s64() { + let a: [i64; 5] = [0, 1, 1, 1, 1]; + let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(1), i64x1::new(1), i64x1::new(1)]; + let r: [i64x1; 4] = transmute(vld4_dup_s64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_u8() { + let a: [u8; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [u8x8; 4] = [u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1), u8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u8x8; 4] = transmute(vld4_dup_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_u16() { + let a: [u16; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [u16x4; 4] = [u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1), u16x4::new(1, 1, 1, 1)]; + let r: [u16x4; 4] = transmute(vld4_dup_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_u32() { + let a: [u32; 9] = [0, 1, 1, 1, 1, 2, 4, 3, 5]; + let e: [u32x2; 4] = [u32x2::new(1, 1), u32x2::new(1, 1), u32x2::new(1, 1), u32x2::new(1, 1)]; + let r: [u32x2; 4] = transmute(vld4_dup_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_u8() { + let a: [u8; 65] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [u8x16; 4] = [u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u8x16; 4] = transmute(vld4q_dup_u8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_u16() { + let a: [u16; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [u16x8; 4] = [u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1), u16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [u16x8; 4] = transmute(vld4q_dup_u16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_u32() { + let a: [u32; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [u32x4; 4] = [u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1), u32x4::new(1, 1, 1, 1)]; + let r: [u32x4; 4] = transmute(vld4q_dup_u32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_p8() { + let a: [u8; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x8; 4] = [i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1), i8x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x8; 4] = transmute(vld4_dup_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_p16() { + let a: [u16; 17] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x4; 4] = [i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1), i16x4::new(1, 1, 1, 1)]; + let r: [i16x4; 4] = transmute(vld4_dup_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_p8() { + let a: [u8; 65] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i8x16; 4] = [i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i8x16; 4] = transmute(vld4q_dup_p8(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_p16() { + let a: [u16; 33] = [0, 1, 1, 1, 1, 2, 4, 3, 5, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9, 8, 6, 3, 7, 4, 8, 5, 9]; + let e: [i16x8; 4] = [i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1), i16x8::new(1, 1, 1, 1, 1, 1, 1, 1)]; + let r: [i16x8; 4] = transmute(vld4q_dup_p16(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_u64() { + let a: [u64; 5] = [0, 1, 1, 1, 1]; + let e: [u64x1; 4] = [u64x1::new(1), u64x1::new(1), u64x1::new(1), u64x1::new(1)]; + let r: [u64x1; 4] = transmute(vld4_dup_u64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_p64() { + let a: [u64; 5] = [0, 1, 1, 1, 1]; + let e: [i64x1; 4] = [i64x1::new(1), i64x1::new(1), i64x1::new(1), i64x1::new(1)]; + let r: [i64x1; 4] = transmute(vld4_dup_p64(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_dup_f32() { + let a: [f32; 9] = [0., 1., 1., 1., 1., 6., 4., 3., 5.]; + let e: [f32x2; 4] = [f32x2::new(1., 1.), f32x2::new(1., 1.), f32x2::new(1., 1.), f32x2::new(1., 1.)]; + let r: [f32x2; 4] = transmute(vld4_dup_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_dup_f32() { + let a: [f32; 17] = [0., 1., 1., 1., 1., 6., 4., 3., 5., 7., 4., 3., 5., 8., 4., 3., 5.]; + let e: [f32x4; 4] = [f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.), f32x4::new(1., 1., 1., 1.)]; + let r: [f32x4; 4] = transmute(vld4q_dup_f32(a[1..].as_ptr())); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_s8() { + let a: [i8; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i8x8; 4] = [i8x8::new(0, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i8x8; 4] = transmute(vld4_lane_s8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x4; 4] = [i16x4::new(0, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)]; + let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)]; + let r: [i16x4; 4] = transmute(vld4_lane_s16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8]; + let b: [i32x2; 4] = [i32x2::new(0, 2), i32x2::new(2, 2), i32x2::new(2, 16), i32x2::new(2, 18)]; + let e: [i32x2; 4] = [i32x2::new(1, 2), i32x2::new(2, 2), i32x2::new(2, 16), i32x2::new(2, 18)]; + let r: [i32x2; 4] = transmute(vld4_lane_s32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_lane_s16() { + let a: [i16; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x8; 4] = [i16x8::new(0, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i16x8; 4] = transmute(vld4q_lane_s16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_lane_s32() { + let a: [i32; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i32x4; 4] = [i32x4::new(0, 2, 2, 2), i32x4::new(2, 16, 2, 18), i32x4::new(2, 20, 21, 22), i32x4::new(2, 24, 25, 26)]; + let e: [i32x4; 4] = [i32x4::new(1, 2, 2, 2), i32x4::new(2, 16, 2, 18), i32x4::new(2, 20, 21, 22), i32x4::new(2, 24, 25, 26)]; + let r: [i32x4; 4] = transmute(vld4q_lane_s32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_u8() { + let a: [u8; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u8x8; 4] = [u8x8::new(0, 2, 2, 2, 2, 16, 2, 18), u8x8::new(2, 20, 21, 22, 2, 24, 25, 26), u8x8::new(11, 12, 13, 14, 15, 16, 2, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [u8x8; 4] = [u8x8::new(1, 2, 2, 2, 2, 16, 2, 18), u8x8::new(2, 20, 21, 22, 2, 24, 25, 26), u8x8::new(2, 12, 13, 14, 15, 16, 2, 18), u8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [u8x8; 4] = transmute(vld4_lane_u8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u16x4; 4] = [u16x4::new(0, 2, 2, 2), u16x4::new(2, 16, 2, 18), u16x4::new(2, 20, 21, 22), u16x4::new(2, 24, 25, 26)]; + let e: [u16x4; 4] = [u16x4::new(1, 2, 2, 2), u16x4::new(2, 16, 2, 18), u16x4::new(2, 20, 21, 22), u16x4::new(2, 24, 25, 26)]; + let r: [u16x4; 4] = transmute(vld4_lane_u16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 2, 5, 6, 7, 8]; + let b: [u32x2; 4] = [u32x2::new(0, 2), u32x2::new(2, 2), u32x2::new(2, 16), u32x2::new(2, 18)]; + let e: [u32x2; 4] = [u32x2::new(1, 2), u32x2::new(2, 2), u32x2::new(2, 16), u32x2::new(2, 18)]; + let r: [u32x2; 4] = transmute(vld4_lane_u32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_lane_u16() { + let a: [u16; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u16x8; 4] = [u16x8::new(0, 2, 2, 2, 2, 16, 2, 18), u16x8::new(2, 20, 21, 22, 2, 24, 25, 26), u16x8::new(11, 12, 13, 14, 15, 16, 2, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [u16x8; 4] = [u16x8::new(1, 2, 2, 2, 2, 16, 2, 18), u16x8::new(2, 20, 21, 22, 2, 24, 25, 26), u16x8::new(2, 12, 13, 14, 15, 16, 2, 18), u16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [u16x8; 4] = transmute(vld4q_lane_u16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_lane_u32() { + let a: [u32; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [u32x4; 4] = [u32x4::new(0, 2, 2, 2), u32x4::new(2, 16, 2, 18), u32x4::new(2, 20, 21, 22), u32x4::new(2, 24, 25, 26)]; + let e: [u32x4; 4] = [u32x4::new(1, 2, 2, 2), u32x4::new(2, 16, 2, 18), u32x4::new(2, 20, 21, 22), u32x4::new(2, 24, 25, 26)]; + let r: [u32x4; 4] = transmute(vld4q_lane_u32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_p8() { + let a: [u8; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i8x8; 4] = [i8x8::new(0, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(11, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i8x8; 4] = [i8x8::new(1, 2, 2, 2, 2, 16, 2, 18), i8x8::new(2, 20, 21, 22, 2, 24, 25, 26), i8x8::new(2, 12, 13, 14, 15, 16, 2, 18), i8x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i8x8; 4] = transmute(vld4_lane_p8::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x4; 4] = [i16x4::new(0, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)]; + let e: [i16x4; 4] = [i16x4::new(1, 2, 2, 2), i16x4::new(2, 16, 2, 18), i16x4::new(2, 20, 21, 22), i16x4::new(2, 24, 25, 26)]; + let r: [i16x4; 4] = transmute(vld4_lane_p16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_lane_p16() { + let a: [u16; 33] = [0, 1, 2, 2, 2, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8, 1, 2, 3, 4, 5, 6, 7, 8]; + let b: [i16x8; 4] = [i16x8::new(0, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(11, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let e: [i16x8; 4] = [i16x8::new(1, 2, 2, 2, 2, 16, 2, 18), i16x8::new(2, 20, 21, 22, 2, 24, 25, 26), i16x8::new(2, 12, 13, 14, 15, 16, 2, 18), i16x8::new(2, 20, 21, 22, 23, 24, 25, 26)]; + let r: [i16x8; 4] = transmute(vld4q_lane_p16::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4_lane_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 2., 5., 6., 7., 8.]; + let b: [f32x2; 4] = [f32x2::new(0., 2.), f32x2::new(2., 2.), f32x2::new(2., 16.), f32x2::new(2., 18.)]; + let e: [f32x2; 4] = [f32x2::new(1., 2.), f32x2::new(2., 2.), f32x2::new(2., 16.), f32x2::new(2., 18.)]; + let r: [f32x2; 4] = transmute(vld4_lane_f32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld4q_lane_f32() { + let a: [f32; 17] = [0., 1., 2., 2., 2., 5., 6., 7., 8., 5., 6., 7., 8., 1., 4., 3., 5.]; + let b: [f32x4; 4] = [f32x4::new(0., 2., 2., 2.), f32x4::new(2., 16., 2., 18.), f32x4::new(5., 6., 7., 8.), f32x4::new(1., 4., 3., 5.)]; + let e: [f32x4; 4] = [f32x4::new(1., 2., 2., 2.), f32x4::new(2., 16., 2., 18.), f32x4::new(2., 6., 7., 8.), f32x4::new(2., 4., 3., 5.)]; + let r: [f32x4; 4] = transmute(vld4q_lane_f32::<0>(a[1..].as_ptr(), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_s8() { + let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i8; 8] = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i8; 8] = [0i8; 8]; + vst1_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_s16() { + let a: [i16; 5] = [0, 1, 2, 3, 4]; + let e: [i16; 4] = [1, 0, 0, 0]; + let mut r: [i16; 4] = [0i16; 4]; + vst1_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_s32() { + let a: [i32; 3] = [0, 1, 2]; + let e: [i32; 2] = [1, 0]; + let mut r: [i32; 2] = [0i32; 2]; + vst1_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_s64() { + let a: [i64; 2] = [0, 1]; + let e: [i64; 1] = [1]; + let mut r: [i64; 1] = [0i64; 1]; + vst1_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_s8() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i8; 16] = [0i8; 16]; + vst1q_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_s16() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i16; 8] = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 8] = [0i16; 8]; + vst1q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_s32() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let e: [i32; 4] = [1, 0, 0, 0]; + let mut r: [i32; 4] = [0i32; 4]; + vst1q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_s64() { + let a: [i64; 3] = [0, 1, 2]; + let e: [i64; 2] = [1, 0]; + let mut r: [i64; 2] = [0i64; 2]; + vst1q_lane_s64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_u8() { + let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u8; 8] = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 8] = [0u8; 8]; + vst1_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_u16() { + let a: [u16; 5] = [0, 1, 2, 3, 4]; + let e: [u16; 4] = [1, 0, 0, 0]; + let mut r: [u16; 4] = [0u16; 4]; + vst1_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_u32() { + let a: [u32; 3] = [0, 1, 2]; + let e: [u32; 2] = [1, 0]; + let mut r: [u32; 2] = [0u32; 2]; + vst1_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_u64() { + let a: [u64; 2] = [0, 1]; + let e: [u64; 1] = [1]; + let mut r: [u64; 1] = [0u64; 1]; + vst1_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_u8() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 16] = [0u8; 16]; + vst1q_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_u16() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16; 8] = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 8] = [0u16; 8]; + vst1q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_u32() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let e: [u32; 4] = [1, 0, 0, 0]; + let mut r: [u32; 4] = [0u32; 4]; + vst1q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_u64() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 0]; + let mut r: [u64; 2] = [0u64; 2]; + vst1q_lane_u64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_p8() { + let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u8; 8] = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 8] = [0u8; 8]; + vst1_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_p16() { + let a: [u16; 5] = [0, 1, 2, 3, 4]; + let e: [u16; 4] = [1, 0, 0, 0]; + let mut r: [u16; 4] = [0u16; 4]; + vst1_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_p8() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 16] = [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 16] = [0u8; 16]; + vst1q_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_p16() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16; 8] = [1, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 8] = [0u16; 8]; + vst1q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_p64() { + let a: [u64; 2] = [0, 1]; + let e: [u64; 1] = [1]; + let mut r: [u64; 1] = [0u64; 1]; + vst1_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_p64() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 0]; + let mut r: [u64; 2] = [0u64; 2]; + vst1q_lane_p64::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_lane_f32() { + let a: [f32; 3] = [0., 1., 2.]; + let e: [f32; 2] = [1., 0.]; + let mut r: [f32; 2] = [0f32; 2]; + vst1_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_lane_f32() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let e: [f32; 4] = [1., 0., 0., 0.]; + let mut r: [f32; 4] = [0f32; 4]; + vst1q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s8_x2() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i8; 16] = [0i8; 16]; + vst1_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s16_x2() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i16; 8] = [0i16; 8]; + vst1_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s32_x2() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let e: [i32; 4] = [1, 2, 3, 4]; + let mut r: [i32; 4] = [0i32; 4]; + vst1_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s64_x2() { + let a: [i64; 3] = [0, 1, 2]; + let e: [i64; 2] = [1, 2]; + let mut r: [i64; 2] = [0i64; 2]; + vst1_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s8_x2() { + let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i8; 32] = [0i8; 32]; + vst1q_s8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s16_x2() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i16; 16] = [0i16; 16]; + vst1q_s16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s32_x2() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i32; 8] = [0i32; 8]; + vst1q_s32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s64_x2() { + let a: [i64; 5] = [0, 1, 2, 3, 4]; + let e: [i64; 4] = [1, 2, 3, 4]; + let mut r: [i64; 4] = [0i64; 4]; + vst1q_s64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s8_x3() { + let a: [i8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [i8; 24] = [0i8; 24]; + vst1_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s16_x3() { + let a: [i16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [i16; 12] = [0i16; 12]; + vst1_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s32_x3() { + let a: [i32; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i32; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [i32; 6] = [0i32; 6]; + vst1_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s64_x3() { + let a: [i64; 4] = [0, 1, 2, 3]; + let e: [i64; 3] = [1, 2, 3]; + let mut r: [i64; 3] = [0i64; 3]; + vst1_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s8_x3() { + let a: [i8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i8; 48] = [0i8; 48]; + vst1q_s8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s16_x3() { + let a: [i16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [i16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [i16; 24] = [0i16; 24]; + vst1q_s16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s32_x3() { + let a: [i32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [i32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [i32; 12] = [0i32; 12]; + vst1q_s32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s64_x3() { + let a: [i64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [i64; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [i64; 6] = [0i64; 6]; + vst1q_s64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s8_x4() { + let a: [i8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i8; 32] = [0i8; 32]; + vst1_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s16_x4() { + let a: [i16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i16; 16] = [0i16; 16]; + vst1_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s32_x4() { + let a: [i32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i32; 8] = [0i32; 8]; + vst1_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_s64_x4() { + let a: [i64; 5] = [0, 1, 2, 3, 4]; + let e: [i64; 4] = [1, 2, 3, 4]; + let mut r: [i64; 4] = [0i64; 4]; + vst1_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s8_x4() { + let a: [i8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i8; 64] = [0i8; 64]; + vst1q_s8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s16_x4() { + let a: [i16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [i16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [i16; 32] = [0i16; 32]; + vst1q_s16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s32_x4() { + let a: [i32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [i32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [i32; 16] = [0i32; 16]; + vst1q_s32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_s64_x4() { + let a: [i64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [i64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [i64; 8] = [0i64; 8]; + vst1q_s64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u8_x2() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 16] = [0u8; 16]; + vst1_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u16_x2() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u16; 8] = [0u16; 8]; + vst1_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u32_x2() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let e: [u32; 4] = [1, 2, 3, 4]; + let mut r: [u32; 4] = [0u32; 4]; + vst1_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u64_x2() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 2]; + let mut r: [u64; 2] = [0u64; 2]; + vst1_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u8_x2() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1q_u8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u16_x2() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1q_u16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u32_x2() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u32; 8] = [0u32; 8]; + vst1q_u32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u64_x2() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64; 4] = [1, 2, 3, 4]; + let mut r: [u64; 4] = [0u64; 4]; + vst1q_u64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u8_x3() { + let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u8; 24] = [0u8; 24]; + vst1_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u16_x3() { + let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [u16; 12] = [0u16; 12]; + vst1_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u32_x3() { + let a: [u32; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u32; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [u32; 6] = [0u32; 6]; + vst1_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u64_x3() { + let a: [u64; 4] = [0, 1, 2, 3]; + let e: [u64; 3] = [1, 2, 3]; + let mut r: [u64; 3] = [0u64; 3]; + vst1_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u8_x3() { + let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 48] = [0u8; 48]; + vst1q_u8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u16_x3() { + let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u16; 24] = [0u16; 24]; + vst1q_u16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u32_x3() { + let a: [u32; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u32; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [u32; 12] = [0u32; 12]; + vst1q_u32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u64_x3() { + let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u64; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [u64; 6] = [0u64; 6]; + vst1q_u64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u8_x4() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u16_x4() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u32_x4() { + let a: [u32; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u32; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u32; 8] = [0u32; 8]; + vst1_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_u64_x4() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64; 4] = [1, 2, 3, 4]; + let mut r: [u64; 4] = [0u64; 4]; + vst1_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u8_x4() { + let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 64] = [0u8; 64]; + vst1q_u8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u16_x4() { + let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u16; 32] = [0u16; 32]; + vst1q_u16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u32_x4() { + let a: [u32; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u32; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u32; 16] = [0u32; 16]; + vst1q_u32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_u64_x4() { + let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u64; 8] = [0u64; 8]; + vst1q_u64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p8_x2() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 16] = [0u8; 16]; + vst1_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p8_x3() { + let a: [u8; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u8; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u8; 24] = [0u8; 24]; + vst1_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p8_x4() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p8_x2() { + let a: [u8; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst1q_p8_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p8_x3() { + let a: [u8; 49] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u8; 48] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u8; 48] = [0u8; 48]; + vst1q_p8_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p8_x4() { + let a: [u8; 65] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u8; 64] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u8; 64] = [0u8; 64]; + vst1q_p8_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p16_x2() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u16; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u16; 8] = [0u16; 8]; + vst1_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p16_x3() { + let a: [u16; 13] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let e: [u16; 12] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]; + let mut r: [u16; 12] = [0u16; 12]; + vst1_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p16_x4() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p16_x2() { + let a: [u16; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e: [u16; 16] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst1q_p16_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p16_x3() { + let a: [u16; 25] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let e: [u16; 24] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24]; + let mut r: [u16; 24] = [0u16; 24]; + vst1q_p16_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p16_x4() { + let a: [u16; 33] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let e: [u16; 32] = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]; + let mut r: [u16; 32] = [0u16; 32]; + vst1q_p16_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p64_x2() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 2]; + let mut r: [u64; 2] = [0u64; 2]; + vst1_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p64_x3() { + let a: [u64; 4] = [0, 1, 2, 3]; + let e: [u64; 3] = [1, 2, 3]; + let mut r: [u64; 3] = [0u64; 3]; + vst1_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_p64_x4() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64; 4] = [1, 2, 3, 4]; + let mut r: [u64; 4] = [0u64; 4]; + vst1_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p64_x2() { + let a: [u64; 5] = [0, 1, 2, 3, 4]; + let e: [u64; 4] = [1, 2, 3, 4]; + let mut r: [u64; 4] = [0u64; 4]; + vst1q_p64_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p64_x3() { + let a: [u64; 7] = [0, 1, 2, 3, 4, 5, 6]; + let e: [u64; 6] = [1, 2, 3, 4, 5, 6]; + let mut r: [u64; 6] = [0u64; 6]; + vst1q_p64_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_p64_x4() { + let a: [u64; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e: [u64; 8] = [1, 2, 3, 4, 5, 6, 7, 8]; + let mut r: [u64; 8] = [0u64; 8]; + vst1q_p64_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f32_x2() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let e: [f32; 4] = [1., 2., 3., 4.]; + let mut r: [f32; 4] = [0f32; 4]; + vst1_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f32_x2() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let mut r: [f32; 8] = [0f32; 8]; + vst1q_f32_x2(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f32_x3() { + let a: [f32; 7] = [0., 1., 2., 3., 4., 5., 6.]; + let e: [f32; 6] = [1., 2., 3., 4., 5., 6.]; + let mut r: [f32; 6] = [0f32; 6]; + vst1_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f32_x3() { + let a: [f32; 13] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]; + let e: [f32; 12] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.]; + let mut r: [f32; 12] = [0f32; 12]; + vst1q_f32_x3(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1_f32_x4() { + let a: [f32; 9] = [0., 1., 2., 3., 4., 5., 6., 7., 8.]; + let e: [f32; 8] = [1., 2., 3., 4., 5., 6., 7., 8.]; + let mut r: [f32; 8] = [0f32; 8]; + vst1_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst1q_f32_x4() { + let a: [f32; 17] = [0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]; + let e: [f32; 16] = [1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.]; + let mut r: [f32; 16] = [0f32; 16]; + vst1q_f32_x4(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_s8() { + let a: [i8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [i8; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let mut r: [i8; 16] = [0i8; 16]; + vst2_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_s16() { + let a: [i16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [i16; 8] = [1, 2, 2, 3, 2, 4, 3, 5]; + let mut r: [i16; 8] = [0i16; 8]; + vst2_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_s32() { + let a: [i32; 5] = [0, 1, 2, 2, 3]; + let e: [i32; 4] = [1, 2, 2, 3]; + let mut r: [i32; 4] = [0i32; 4]; + vst2_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_s8() { + let a: [i8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let e: [i8; 32] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let mut r: [i8; 32] = [0i8; 32]; + vst2q_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [i16; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let mut r: [i16; 16] = [0i16; 16]; + vst2q_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [i32; 8] = [1, 2, 2, 3, 2, 4, 3, 5]; + let mut r: [i32; 8] = [0i32; 8]; + vst2q_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_s64() { + let a: [i64; 3] = [0, 1, 2]; + let e: [i64; 2] = [1, 2]; + let mut r: [i64; 2] = [0i64; 2]; + vst2_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_u8() { + let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u8; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let mut r: [u8; 16] = [0u8; 16]; + vst2_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_u16() { + let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [u16; 8] = [1, 2, 2, 3, 2, 4, 3, 5]; + let mut r: [u16; 8] = [0u16; 8]; + vst2_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_u32() { + let a: [u32; 5] = [0, 1, 2, 2, 3]; + let e: [u32; 4] = [1, 2, 2, 3]; + let mut r: [u32; 4] = [0u32; 4]; + vst2_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_u8() { + let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let e: [u8; 32] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let mut r: [u8; 32] = [0u8; 32]; + vst2q_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u16; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let mut r: [u16; 16] = [0u16; 16]; + vst2q_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [u32; 8] = [1, 2, 2, 3, 2, 4, 3, 5]; + let mut r: [u32; 8] = [0u32; 8]; + vst2q_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_p8() { + let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u8; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let mut r: [u8; 16] = [0u8; 16]; + vst2_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_p16() { + let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [u16; 8] = [1, 2, 2, 3, 2, 4, 3, 5]; + let mut r: [u16; 8] = [0u16; 8]; + vst2_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_p8() { + let a: [u8; 33] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17]; + let e: [u8; 32] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15, 8, 16, 9, 17]; + let mut r: [u8; 32] = [0u8; 32]; + vst2q_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u16; 16] = [1, 2, 2, 3, 2, 4, 3, 5, 2, 6, 3, 7, 4, 8, 5, 9]; + let mut r: [u16; 16] = [0u16; 16]; + vst2q_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_u64() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 2]; + let mut r: [u64; 2] = [0u64; 2]; + vst2_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_p64() { + let a: [u64; 3] = [0, 1, 2]; + let e: [u64; 2] = [1, 2]; + let mut r: [u64; 2] = [0u64; 2]; + vst2_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_f32() { + let a: [f32; 5] = [0., 1., 2., 2., 3.]; + let e: [f32; 4] = [1., 2., 2., 3.]; + let mut r: [f32; 4] = [0f32; 4]; + vst2_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 3., 2., 3., 4., 5.]; + let e: [f32; 8] = [1., 2., 2., 3., 2., 4., 3., 5.]; + let mut r: [f32; 8] = [0f32; 8]; + vst2q_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_s8() { + let a: [i8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [i8; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i8; 16] = [0i8; 16]; + vst2_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_s16() { + let a: [i16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [i16; 8] = [1, 2, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 8] = [0i16; 8]; + vst2_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_s32() { + let a: [i32; 5] = [0, 1, 2, 2, 3]; + let e: [i32; 4] = [1, 2, 0, 0]; + let mut r: [i32; 4] = [0i32; 4]; + vst2_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_lane_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [i16; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 16] = [0i16; 16]; + vst2q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_lane_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [i32; 8] = [1, 2, 0, 0, 0, 0, 0, 0]; + let mut r: [i32; 8] = [0i32; 8]; + vst2q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_u8() { + let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u8; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 16] = [0u8; 16]; + vst2_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_u16() { + let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [u16; 8] = [1, 2, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 8] = [0u16; 8]; + vst2_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_u32() { + let a: [u32; 5] = [0, 1, 2, 2, 3]; + let e: [u32; 4] = [1, 2, 0, 0]; + let mut r: [u32; 4] = [0u32; 4]; + vst2_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_lane_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u16; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 16] = [0u16; 16]; + vst2q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_lane_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [u32; 8] = [1, 2, 0, 0, 0, 0, 0, 0]; + let mut r: [u32; 8] = [0u32; 8]; + vst2q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_p8() { + let a: [u8; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u8; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 16] = [0u8; 16]; + vst2_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_p16() { + let a: [u16; 9] = [0, 1, 2, 2, 3, 2, 3, 4, 5]; + let e: [u16; 8] = [1, 2, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 8] = [0u16; 8]; + vst2_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_lane_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 3, 2, 3, 4, 5, 2, 3, 4, 5, 6, 7, 8, 9]; + let e: [u16; 16] = [1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 16] = [0u16; 16]; + vst2q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2_lane_f32() { + let a: [f32; 5] = [0., 1., 2., 2., 3.]; + let e: [f32; 4] = [1., 2., 0., 0.]; + let mut r: [f32; 4] = [0f32; 4]; + vst2_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst2q_lane_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 3., 2., 3., 4., 5.]; + let e: [f32; 8] = [1., 2., 0., 0., 0., 0., 0., 0.]; + let mut r: [f32; 8] = [0f32; 8]; + vst2q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_s8() { + let a: [i8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [i8; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let mut r: [i8; 24] = [0i8; 24]; + vst3_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_s16() { + let a: [i16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [i16; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let mut r: [i16; 12] = [0i16; 12]; + vst3_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_s32() { + let a: [i32; 7] = [0, 1, 2, 2, 4, 2, 4]; + let e: [i32; 6] = [1, 2, 2, 2, 4, 4]; + let mut r: [i32; 6] = [0i32; 6]; + vst3_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_s8() { + let a: [i8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48]; + let e: [i8; 48] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48]; + let mut r: [i8; 48] = [0i8; 48]; + vst3q_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_s16() { + let a: [i16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [i16; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let mut r: [i16; 24] = [0i16; 24]; + vst3q_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_s32() { + let a: [i32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [i32; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let mut r: [i32; 12] = [0i32; 12]; + vst3q_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_s64() { + let a: [i64; 4] = [0, 1, 2, 2]; + let e: [i64; 3] = [1, 2, 2]; + let mut r: [i64; 3] = [0i64; 3]; + vst3_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_u8() { + let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u8; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let mut r: [u8; 24] = [0u8; 24]; + vst3_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_u16() { + let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [u16; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let mut r: [u16; 12] = [0u16; 12]; + vst3_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_u32() { + let a: [u32; 7] = [0, 1, 2, 2, 4, 2, 4]; + let e: [u32; 6] = [1, 2, 2, 2, 4, 4]; + let mut r: [u32; 6] = [0u32; 6]; + vst3_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_u8() { + let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48]; + let e: [u8; 48] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48]; + let mut r: [u8; 48] = [0u8; 48]; + vst3q_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_u16() { + let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u16; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let mut r: [u16; 24] = [0u16; 24]; + vst3q_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_u32() { + let a: [u32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [u32; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let mut r: [u32; 12] = [0u32; 12]; + vst3q_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_p8() { + let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u8; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let mut r: [u8; 24] = [0u8; 24]; + vst3_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_p16() { + let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [u16; 12] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8]; + let mut r: [u16; 12] = [0u16; 12]; + vst3_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_p8() { + let a: [u8; 49] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16, 25, 26, 27, 28, 29, 30, 31, 32, 2, 4, 7, 8, 13, 14, 15, 16, 41, 42, 43, 44, 45, 46, 47, 48]; + let e: [u8; 48] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16, 2, 25, 41, 4, 26, 42, 7, 27, 43, 8, 28, 44, 13, 29, 45, 14, 30, 46, 15, 31, 47, 16, 32, 48]; + let mut r: [u8; 48] = [0u8; 48]; + vst3q_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_p16() { + let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u16; 24] = [1, 2, 2, 2, 4, 4, 2, 7, 7, 4, 8, 8, 2, 13, 13, 4, 14, 14, 7, 15, 15, 8, 16, 16]; + let mut r: [u16; 24] = [0u16; 24]; + vst3q_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_u64() { + let a: [u64; 4] = [0, 1, 2, 2]; + let e: [u64; 3] = [1, 2, 2]; + let mut r: [u64; 3] = [0u64; 3]; + vst3_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_p64() { + let a: [u64; 4] = [0, 1, 2, 2]; + let e: [u64; 3] = [1, 2, 2]; + let mut r: [u64; 3] = [0u64; 3]; + vst3_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_f32() { + let a: [f32; 7] = [0., 1., 2., 2., 4., 2., 4.]; + let e: [f32; 6] = [1., 2., 2., 2., 4., 4.]; + let mut r: [f32; 6] = [0f32; 6]; + vst3_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_f32() { + let a: [f32; 13] = [0., 1., 2., 2., 4., 2., 4., 7., 8., 2., 4., 7., 8.]; + let e: [f32; 12] = [1., 2., 2., 2., 4., 4., 2., 7., 7., 4., 8., 8.]; + let mut r: [f32; 12] = [0f32; 12]; + vst3q_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_s8() { + let a: [i8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [i8; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i8; 24] = [0i8; 24]; + vst3_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_s16() { + let a: [i16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [i16; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 12] = [0i16; 12]; + vst3_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_s32() { + let a: [i32; 7] = [0, 1, 2, 2, 4, 2, 4]; + let e: [i32; 6] = [1, 2, 2, 0, 0, 0]; + let mut r: [i32; 6] = [0i32; 6]; + vst3_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_lane_s16() { + let a: [i16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [i16; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 24] = [0i16; 24]; + vst3q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_lane_s32() { + let a: [i32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [i32; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i32; 12] = [0i32; 12]; + vst3q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_u8() { + let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u8; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 24] = [0u8; 24]; + vst3_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_u16() { + let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [u16; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 12] = [0u16; 12]; + vst3_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_u32() { + let a: [u32; 7] = [0, 1, 2, 2, 4, 2, 4]; + let e: [u32; 6] = [1, 2, 2, 0, 0, 0]; + let mut r: [u32; 6] = [0u32; 6]; + vst3_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_lane_u16() { + let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u16; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 24] = [0u16; 24]; + vst3q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_lane_u32() { + let a: [u32; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [u32; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u32; 12] = [0u32; 12]; + vst3q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_p8() { + let a: [u8; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u8; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 24] = [0u8; 24]; + vst3_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_p16() { + let a: [u16; 13] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8]; + let e: [u16; 12] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 12] = [0u16; 12]; + vst3_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_lane_p16() { + let a: [u16; 25] = [0, 1, 2, 2, 4, 2, 4, 7, 8, 2, 4, 7, 8, 13, 14, 15, 16, 2, 4, 7, 8, 13, 14, 15, 16]; + let e: [u16; 24] = [1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 24] = [0u16; 24]; + vst3q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3_lane_f32() { + let a: [f32; 7] = [0., 1., 2., 2., 3., 2., 3.]; + let e: [f32; 6] = [1., 2., 2., 0., 0., 0.]; + let mut r: [f32; 6] = [0f32; 6]; + vst3_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst3q_lane_f32() { + let a: [f32; 13] = [0., 1., 2., 2., 3., 2., 3., 4., 5., 2., 3., 4., 5.]; + let e: [f32; 12] = [1., 2., 2., 0., 0., 0., 0., 0., 0., 0., 0., 0.]; + let mut r: [f32; 12] = [0f32; 12]; + vst3q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_s8() { + let a: [i8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i8; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let mut r: [i8; 32] = [0i8; 32]; + vst4_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i16; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let mut r: [i16; 16] = [0i16; 16]; + vst4_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8]; + let e: [i32; 8] = [1, 2, 2, 6, 2, 6, 6, 8]; + let mut r: [i32; 8] = [0i32; 8]; + vst4_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_s8() { + let a: [i8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let e: [i8; 64] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let mut r: [i8; 64] = [0i8; 64]; + vst4q_s8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_s16() { + let a: [i16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i16; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let mut r: [i16; 32] = [0i16; 32]; + vst4q_s16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_s32() { + let a: [i32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i32; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let mut r: [i32; 16] = [0i32; 16]; + vst4q_s32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_s64() { + let a: [i64; 5] = [0, 1, 2, 2, 6]; + let e: [i64; 4] = [1, 2, 2, 6]; + let mut r: [i64; 4] = [0i64; 4]; + vst4_s64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_u8() { + let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u8; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst4_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u16; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst4_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8]; + let e: [u32; 8] = [1, 2, 2, 6, 2, 6, 6, 8]; + let mut r: [u32; 8] = [0u32; 8]; + vst4_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_u8() { + let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let e: [u8; 64] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let mut r: [u8; 64] = [0u8; 64]; + vst4q_u8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_u16() { + let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u16; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let mut r: [u16; 32] = [0u16; 32]; + vst4q_u16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_u32() { + let a: [u32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u32; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let mut r: [u32; 16] = [0u32; 16]; + vst4q_u32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_p8() { + let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u8; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let mut r: [u8; 32] = [0u8; 32]; + vst4_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u16; 16] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let mut r: [u16; 16] = [0u16; 16]; + vst4_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_p8() { + let a: [u8; 65] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let e: [u8; 64] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 43, 44, 8, 16, 44, 48, 6, 8, 8, 16, 8, 16, 16, 32, 8, 16, 44, 48, 16, 32, 48, 64]; + let mut r: [u8; 64] = [0u8; 64]; + vst4q_p8(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_p16() { + let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u16; 32] = [1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let mut r: [u16; 32] = [0u16; 32]; + vst4q_p16(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_u64() { + let a: [u64; 5] = [0, 1, 2, 2, 6]; + let e: [u64; 4] = [1, 2, 2, 6]; + let mut r: [u64; 4] = [0u64; 4]; + vst4_u64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_p64() { + let a: [u64; 5] = [0, 1, 2, 2, 6]; + let e: [u64; 4] = [1, 2, 2, 6]; + let mut r: [u64; 4] = [0u64; 4]; + vst4_p64(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.]; + let e: [f32; 8] = [1., 2., 2., 6., 2., 6., 6., 8.]; + let mut r: [f32; 8] = [0f32; 8]; + vst4_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_f32() { + let a: [f32; 17] = [0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.]; + let e: [f32; 16] = [1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.]; + let mut r: [f32; 16] = [0f32; 16]; + vst4q_f32(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_s8() { + let a: [i8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i8; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i8; 32] = [0i8; 32]; + vst4_lane_s8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_s16() { + let a: [i16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i16; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 16] = [0i16; 16]; + vst4_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_s32() { + let a: [i32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8]; + let e: [i32; 8] = [1, 2, 2, 6, 0, 0, 0, 0]; + let mut r: [i32; 8] = [0i32; 8]; + vst4_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_lane_s16() { + let a: [i16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [i16; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i16; 32] = [0i16; 32]; + vst4q_lane_s16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_lane_s32() { + let a: [i32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [i32; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [i32; 16] = [0i32; 16]; + vst4q_lane_s32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_u8() { + let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u8; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 32] = [0u8; 32]; + vst4_lane_u8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_u16() { + let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u16; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 16] = [0u16; 16]; + vst4_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_u32() { + let a: [u32; 9] = [0, 1, 2, 2, 6, 2, 6, 6, 8]; + let e: [u32; 8] = [1, 2, 2, 6, 0, 0, 0, 0]; + let mut r: [u32; 8] = [0u32; 8]; + vst4_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_lane_u16() { + let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u16; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 32] = [0u16; 32]; + vst4q_lane_u16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_lane_u32() { + let a: [u32; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u32; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u32; 16] = [0u32; 16]; + vst4q_lane_u32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_p8() { + let a: [u8; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u8; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u8; 32] = [0u8; 32]; + vst4_lane_p8::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_p16() { + let a: [u16; 17] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16]; + let e: [u16; 16] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 16] = [0u16; 16]; + vst4_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_lane_p16() { + let a: [u16; 33] = [0, 1, 2, 2, 6, 2, 6, 6, 8, 2, 6, 6, 8, 6, 8, 8, 16, 2, 6, 6, 8, 6, 8, 8, 16, 6, 8, 8, 16, 8, 16, 16, 32]; + let e: [u16; 32] = [1, 2, 2, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]; + let mut r: [u16; 32] = [0u16; 32]; + vst4q_lane_p16::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4_lane_f32() { + let a: [f32; 9] = [0., 1., 2., 2., 6., 2., 6., 6., 8.]; + let e: [f32; 8] = [1., 2., 2., 6., 0., 0., 0., 0.]; + let mut r: [f32; 8] = [0f32; 8]; + vst4_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vst4q_lane_f32() { + let a: [f32; 17] = [0., 1., 2., 2., 6., 2., 6., 6., 8., 2., 6., 6., 8., 6., 8., 8., 16.]; + let e: [f32; 16] = [1., 2., 2., 6., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]; + let mut r: [f32; 16] = [0f32; 16]; + vst4q_lane_f32::<0>(r.as_mut_ptr(), core::ptr::read_unaligned(a[1..].as_ptr() as _)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s8() { + let a: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: i8x8 = transmute(vmul_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s8() { + let a: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32); + let r: i8x16 = transmute(vmulq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s16() { + let a: i16x4 = i16x4::new(1, 2, 1, 2); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(1, 4, 3, 8); + let r: i16x4 = transmute(vmul_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s16() { + let a: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: i16x8 = transmute(vmulq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(1, 4); + let r: i32x2 = transmute(vmul_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s32() { + let a: i32x4 = i32x4::new(1, 2, 1, 2); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(1, 4, 3, 8); + let r: i32x4 = transmute(vmulq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u8() { + let a: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: u8x8 = transmute(vmul_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u8() { + let a: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let b: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(1, 4, 3, 8, 5, 12, 7, 16, 9, 20, 11, 24, 13, 28, 15, 32); + let r: u8x16 = transmute(vmulq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u16() { + let a: u16x4 = u16x4::new(1, 2, 1, 2); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(1, 4, 3, 8); + let r: u16x4 = transmute(vmul_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u16() { + let a: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: u16x8 = transmute(vmulq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(1, 4); + let r: u32x2 = transmute(vmul_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u32() { + let a: u32x4 = u32x4::new(1, 2, 1, 2); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(1, 4, 3, 8); + let r: u32x4 = transmute(vmulq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_p8() { + let a: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 6, 3, 12, 5, 10, 7, 24); + let r: i8x8 = transmute(vmul_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_p8() { + let a: i8x16 = i8x16::new(1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3, 1, 3); + let b: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 6, 3, 12, 5, 10, 7, 24, 9, 30, 11, 20, 13, 18, 15, 48); + let r: i8x16 = transmute(vmulq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(2.0, 3.0); + let e: f32x2 = f32x2::new(2.0, 6.0); + let r: f32x2 = transmute(vmul_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 1.0, 2.0); + let b: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0); + let e: f32x4 = f32x4::new(2.0, 6.0, 4.0, 10.0); + let r: f32x4 = transmute(vmulq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_n_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16 = 2; + let e: i16x4 = i16x4::new(2, 4, 6, 8); + let r: i16x4 = transmute(vmul_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_n_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16 = 2; + let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16); + let r: i16x8 = transmute(vmulq_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_n_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32 = 2; + let e: i32x2 = i32x2::new(2, 4); + let r: i32x2 = transmute(vmul_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_n_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32 = 2; + let e: i32x4 = i32x4::new(2, 4, 6, 8); + let r: i32x4 = transmute(vmulq_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_n_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16 = 2; + let e: u16x4 = u16x4::new(2, 4, 6, 8); + let r: u16x4 = transmute(vmul_n_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_n_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16 = 2; + let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16); + let r: u16x8 = transmute(vmulq_n_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_n_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32 = 2; + let e: u32x2 = u32x2::new(2, 4); + let r: u32x2 = transmute(vmul_n_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_n_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32 = 2; + let e: u32x4 = u32x4::new(2, 4, 6, 8); + let r: u32x4 = transmute(vmulq_n_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_n_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32 = 2.; + let e: f32x2 = f32x2::new(2., 4.); + let r: f32x2 = transmute(vmul_n_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_n_f32() { + let a: f32x4 = f32x4::new(1., 2., 3., 4.); + let b: f32 = 2.; + let e: f32x4 = f32x4::new(2., 4., 6., 8.); + let r: f32x4 = transmute(vmulq_n_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_lane_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i16x4 = i16x4::new(2, 4, 6, 8); + let r: i16x4 = transmute(vmul_lane_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_laneq_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(2, 4, 6, 8); + let r: i16x4 = transmute(vmul_laneq_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_lane_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16); + let r: i16x8 = transmute(vmulq_lane_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_laneq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(2, 4, 6, 8, 10, 12, 14, 16); + let r: i16x8 = transmute(vmulq_laneq_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_lane_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 2); + let e: i32x2 = i32x2::new(2, 4); + let r: i32x2 = transmute(vmul_lane_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_laneq_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i32x2 = i32x2::new(2, 4); + let r: i32x2 = transmute(vmul_laneq_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_lane_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x2 = i32x2::new(0, 2); + let e: i32x4 = i32x4::new(2, 4, 6, 8); + let r: i32x4 = transmute(vmulq_lane_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_laneq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(2, 4, 6, 8); + let r: i32x4 = transmute(vmulq_laneq_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_lane_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u16x4 = u16x4::new(2, 4, 6, 8); + let r: u16x4 = transmute(vmul_lane_u16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_laneq_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u16x4 = u16x4::new(2, 4, 6, 8); + let r: u16x4 = transmute(vmul_laneq_u16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_lane_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16); + let r: u16x8 = transmute(vmulq_lane_u16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_laneq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u16x8 = u16x8::new(2, 4, 6, 8, 10, 12, 14, 16); + let r: u16x8 = transmute(vmulq_laneq_u16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_lane_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 2); + let e: u32x2 = u32x2::new(2, 4); + let r: u32x2 = transmute(vmul_lane_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_laneq_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u32x2 = u32x2::new(2, 4); + let r: u32x2 = transmute(vmul_laneq_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_lane_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x2 = u32x2::new(0, 2); + let e: u32x4 = u32x4::new(2, 4, 6, 8); + let r: u32x4 = transmute(vmulq_lane_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_laneq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u32x4 = u32x4::new(2, 4, 6, 8); + let r: u32x4 = transmute(vmulq_laneq_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_lane_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x2 = f32x2::new(2., 0.); + let e: f32x2 = f32x2::new(2., 4.); + let r: f32x2 = transmute(vmul_lane_f32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_laneq_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x4 = f32x4::new(2., 0., 0., 0.); + let e: f32x2 = f32x2::new(2., 4.); + let r: f32x2 = transmute(vmul_laneq_f32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_lane_f32() { + let a: f32x4 = f32x4::new(1., 2., 3., 4.); + let b: f32x2 = f32x2::new(2., 0.); + let e: f32x4 = f32x4::new(2., 4., 6., 8.); + let r: f32x4 = transmute(vmulq_lane_f32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_laneq_f32() { + let a: f32x4 = f32x4::new(1., 2., 3., 4.); + let b: f32x4 = f32x4::new(2., 0., 0., 0.); + let e: f32x4 = f32x4::new(2., 4., 6., 8.); + let r: f32x4 = transmute(vmulq_laneq_f32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i16x8 = i16x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: i16x8 = transmute(vmull_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 1, 2); + let e: i32x4 = i32x4::new(1, 4, 3, 8); + let r: i32x4 = transmute(vmull_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i64x2 = i64x2::new(1, 4); + let r: i64x2 = transmute(vmull_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u16x8 = u16x8::new(1, 4, 3, 8, 5, 12, 7, 16); + let r: u16x8 = transmute(vmull_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(1, 2, 1, 2); + let e: u32x4 = u32x4::new(1, 4, 3, 8); + let r: u32x4 = transmute(vmull_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u64x2 = u64x2::new(1, 4); + let r: u64x2 = transmute(vmull_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_p8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 3, 1, 3, 1, 3, 1, 3); + let e: i16x8 = i16x8::new(1, 6, 3, 12, 5, 10, 7, 24); + let r: i16x8 = transmute(vmull_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_n_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16 = 2; + let e: i32x4 = i32x4::new(2, 4, 6, 8); + let r: i32x4 = transmute(vmull_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_n_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32 = 2; + let e: i64x2 = i64x2::new(2, 4); + let r: i64x2 = transmute(vmull_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_n_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16 = 2; + let e: u32x4 = u32x4::new(2, 4, 6, 8); + let r: u32x4 = transmute(vmull_n_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_n_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32 = 2; + let e: u64x2 = u64x2::new(2, 4); + let r: u64x2 = transmute(vmull_n_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_lane_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(2, 4, 6, 8); + let r: i32x4 = transmute(vmull_lane_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_laneq_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i32x4 = i32x4::new(2, 4, 6, 8); + let r: i32x4 = transmute(vmull_laneq_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_lane_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(2, 4); + let r: i64x2 = transmute(vmull_lane_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_laneq_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i64x2 = i64x2::new(2, 4); + let r: i64x2 = transmute(vmull_laneq_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_lane_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(0, 2, 0, 0); + let e: u32x4 = u32x4::new(2, 4, 6, 8); + let r: u32x4 = transmute(vmull_lane_u16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_laneq_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x8 = u16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: u32x4 = u32x4::new(2, 4, 6, 8); + let r: u32x4 = transmute(vmull_laneq_u16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_lane_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(0, 2); + let e: u64x2 = u64x2::new(2, 4); + let r: u64x2 = transmute(vmull_lane_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmull_laneq_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x4 = u32x4::new(0, 2, 0, 0); + let e: u64x2 = u64x2::new(2, 4); + let r: u64x2 = transmute(vmull_laneq_u32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfma_f32() { + let a: f32x2 = f32x2::new(8.0, 18.0); + let b: f32x2 = f32x2::new(6.0, 4.0); + let c: f32x2 = f32x2::new(2.0, 3.0); + let e: f32x2 = f32x2::new(20.0, 30.0); + let r: f32x2 = transmute(vfma_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfmaq_f32() { + let a: f32x4 = f32x4::new(8.0, 18.0, 12.0, 10.0); + let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0); + let c: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0); + let e: f32x4 = f32x4::new(20.0, 30.0, 40.0, 50.0); + let r: f32x4 = transmute(vfmaq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfma_n_f32() { + let a: f32x2 = f32x2::new(2.0, 3.0); + let b: f32x2 = f32x2::new(6.0, 4.0); + let c: f32 = 8.0; + let e: f32x2 = f32x2::new(50.0, 35.0); + let r: f32x2 = transmute(vfma_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfmaq_n_f32() { + let a: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0); + let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0); + let c: f32 = 8.0; + let e: f32x4 = f32x4::new(50.0, 35.0, 60.0, 69.0); + let r: f32x4 = transmute(vfmaq_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfms_f32() { + let a: f32x2 = f32x2::new(20.0, 30.0); + let b: f32x2 = f32x2::new(6.0, 4.0); + let c: f32x2 = f32x2::new(2.0, 3.0); + let e: f32x2 = f32x2::new(8.0, 18.0); + let r: f32x2 = transmute(vfms_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfmsq_f32() { + let a: f32x4 = f32x4::new(20.0, 30.0, 40.0, 50.0); + let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0); + let c: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0); + let e: f32x4 = f32x4::new(8.0, 18.0, 12.0, 10.0); + let r: f32x4 = transmute(vfmsq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfms_n_f32() { + let a: f32x2 = f32x2::new(50.0, 35.0); + let b: f32x2 = f32x2::new(6.0, 4.0); + let c: f32 = 8.0; + let e: f32x2 = f32x2::new(2.0, 3.0); + let r: f32x2 = transmute(vfms_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vfmsq_n_f32() { + let a: f32x4 = f32x4::new(50.0, 35.0, 60.0, 69.0); + let b: f32x4 = f32x4::new(6.0, 4.0, 7.0, 8.0); + let c: f32 = 8.0; + let e: f32x4 = f32x4::new(2.0, 3.0, 4.0, 5.0); + let r: f32x4 = transmute(vfmsq_n_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x8 = i8x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: i8x8 = transmute(vsub_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x16 = i8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); + let r: i8x16 = transmute(vsubq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 1, 2); + let e: i16x4 = i16x4::new(0, 0, 2, 2); + let r: i16x4 = transmute(vsub_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i16x8 = i16x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: i16x8 = transmute(vsubq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vsub_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(1, 2, 1, 2); + let e: i32x4 = i32x4::new(0, 0, 2, 2); + let r: i32x4 = transmute(vsubq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x8 = u8x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u8x8 = transmute(vsub_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x16 = u8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); + let r: u8x16 = transmute(vsubq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(1, 2, 1, 2); + let e: u16x4 = u16x4::new(0, 0, 2, 2); + let r: u16x4 = transmute(vsub_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u16x8 = u16x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u16x8 = transmute(vsubq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vsub_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(1, 2, 1, 2); + let e: u32x4 = u32x4::new(0, 0, 2, 2); + let r: u32x4 = transmute(vsubq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vsub_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(1, 2); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vsub_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f32() { + let a: f32x2 = f32x2::new(1.0, 4.0); + let b: f32x2 = f32x2::new(1.0, 2.0); + let e: f32x2 = f32x2::new(0.0, 2.0); + let r: f32x2 = transmute(vsub_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f32() { + let a: f32x4 = f32x4::new(1.0, 4.0, 3.0, 8.0); + let b: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e: f32x4 = f32x4::new(0.0, 2.0, 0.0, 4.0); + let r: f32x4 = transmute(vsubq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_p8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e: i8x8 = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let r: i8x8 = transmute(vadd_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_p16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 1, 1, 1); + let e: i16x4 = i16x4::new(0, 3, 2, 5); + let r: i16x4 = transmute(vadd_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_p8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e: i8x16 = i8x16::new(0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17); + let r: i8x16 = transmute(vaddq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_p16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e: i16x8 = i16x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let r: i16x8 = transmute(vaddq_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_p64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vadd_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_p64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(1, 1); + let e: i64x2 = i64x2::new(0, 3); + let r: i64x2 = transmute(vaddq_p64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_p128() { + let a: p128 = 16; + let b: p128 = 1; + let e: p128 = 17; + let r: p128 = transmute(vaddq_p128(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, -32768, 1, 1, 0x7F_FF, -32768, 1, 1); + let b: i16x8 = i16x8::new(1, 0, 0, 0, 1, 0, 0, 0); + let e: i8x8 = i8x8::new(0x7F, -128, 0, 0, 0x7F, -128, 0, 0); + let r: i8x8 = transmute(vsubhn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 1, 1); + let b: i32x4 = i32x4::new(1, 0, 0, 0); + let e: i16x4 = i16x4::new(0x7F_FF, -32768, 0, 0); + let r: i16x4 = transmute(vsubhn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_s64() { + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let b: i64x2 = i64x2::new(1, 0); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let r: i32x2 = transmute(vsubhn_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_u16() { + let a: u16x8 = u16x8::new(0xFF_FF, 0, 1, 1, 0xFF_FF, 0, 1, 1); + let b: u16x8 = u16x8::new(1, 0, 0, 0, 1, 0, 0, 0); + let e: u8x8 = u8x8::new(0xFF, 0, 0, 0, 0xFF, 0, 0, 0); + let r: u8x8 = transmute(vsubhn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_u32() { + let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 1, 1); + let b: u32x4 = u32x4::new(1, 0, 0, 0); + let e: u16x4 = u16x4::new(0xFF_FF, 0, 0, 0); + let r: u16x4 = transmute(vsubhn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_u64() { + let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let b: u64x2 = u64x2::new(1, 0); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let r: u32x2 = transmute(vsubhn_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_s16() { + let a: i8x8 = i8x8::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0); + let b: i16x8 = i16x8::new(0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1, 0x7F_FF, 1); + let c: i16x8 = i16x8::new(1, 0, 1, 0, 1, 0, 1, 0); + let e: i8x16 = i8x16::new(0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0, 0x7F, 0); + let r: i8x16 = transmute(vsubhn_high_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_s32() { + let a: i16x4 = i16x4::new(0x7F_FF, 0, 0x7F_FF, 0); + let b: i32x4 = i32x4::new(0x7F_FF_FF_FF, 1, 0x7F_FF_FF_FF, 1); + let c: i32x4 = i32x4::new(1, 0, 1, 0); + let e: i16x8 = i16x8::new(0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0, 0x7F_FF, 0); + let r: i16x8 = transmute(vsubhn_high_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_s64() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0); + let b: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 1); + let c: i64x2 = i64x2::new(1, 0); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0, 0x7F_FF_FF_FF, 0); + let r: i32x4 = transmute(vsubhn_high_s64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_u16() { + let a: u8x8 = u8x8::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let b: u16x8 = u16x8::new(0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1, 0xFF_FF, 1); + let c: u16x8 = u16x8::new(1, 0, 1, 0, 1, 0, 1, 0); + let e: u8x16 = u8x16::new(0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0, 0xFF, 0); + let r: u8x16 = transmute(vsubhn_high_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_u32() { + let a: u16x4 = u16x4::new(0xFF_FF, 0, 0xFF_FF, 0); + let b: u32x4 = u32x4::new(0xFF_FF_FF_FF, 1, 0xFF_FF_FF_FF, 1); + let c: u32x4 = u32x4::new(1, 0, 1, 0); + let e: u16x8 = u16x8::new(0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0, 0xFF_FF, 0); + let r: u16x8 = transmute(vsubhn_high_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubhn_high_u64() { + let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let b: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 1); + let c: u64x2 = u64x2::new(1, 0); + let e: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 0xFF_FF_FF_FF, 0); + let r: u32x4 = transmute(vsubhn_high_u64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x8 = u8x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: u8x8 = transmute(vhsub_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: u8x16 = u8x16::new(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); + let r: u8x16 = transmute(vhsubq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(1, 2, 1, 2); + let e: u16x4 = u16x4::new(0, 0, 1, 1); + let r: u16x4 = transmute(vhsub_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: u16x8 = u16x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: u16x8 = transmute(vhsubq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vhsub_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(1, 2, 1, 2); + let e: u32x4 = u32x4::new(0, 0, 1, 1); + let r: u32x4 = transmute(vhsubq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x8 = i8x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: i8x8 = transmute(vhsub_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2, 1, 2); + let e: i8x16 = i8x16::new(0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7); + let r: i8x16 = transmute(vhsubq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 1, 2); + let e: i16x4 = i16x4::new(0, 0, 1, 1); + let r: i16x4 = transmute(vhsub_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(1, 2, 1, 2, 1, 2, 1, 2); + let e: i16x8 = i16x8::new(0, 0, 1, 1, 2, 2, 3, 3); + let r: i16x8 = transmute(vhsubq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vhsub_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(1, 2, 1, 2); + let e: i32x4 = i32x4::new(0, 0, 1, 1); + let r: i32x4 = transmute(vhsubq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_s8() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vsubw_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_s16() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vsubw_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_s32() { + let a: i64x2 = i64x2::new(0, 1); + let b: i32x2 = i32x2::new(0, 1); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubw_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_u8() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vsubw_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_u16() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vsubw_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubw_u32() { + let a: u64x2 = u64x2::new(0, 1); + let b: u32x2 = u32x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubw_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_s8() { + let a: i8x8 = i8x8::new(0x7F, -128, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(0x7F, -128, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vsubl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, -32768, 2, 3); + let b: i16x4 = i16x4::new(0x7F_FF, -32768, 2, 3); + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vsubl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let b: i32x2 = i32x2::new(0x7F_FF_FF_FF, -2147483648); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vsubl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_u8() { + let a: u8x8 = u8x8::new(0xFF, 0, 2, 3, 4, 5, 6, 7); + let b: u8x8 = u8x8::new(0xFF, 0, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vsubl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_u16() { + let a: u16x4 = u16x4::new(0xFF_FF, 0, 2, 3); + let b: u16x4 = u16x4::new(0xFF_FF, 0, 2, 3); + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vsubl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsubl_u32() { + let a: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let b: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vsubl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let r: i8x8 = transmute(vmax_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vmaxq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(16, 15, 14, 13); + let e: i16x4 = i16x4::new(16, 15, 14, 13); + let r: i16x4 = transmute(vmax_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let r: i16x8 = transmute(vmaxq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(16, 15); + let e: i32x2 = i32x2::new(16, 15); + let r: i32x2 = transmute(vmax_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(16, 15, 14, 13); + let e: i32x4 = i32x4::new(16, 15, 14, 13); + let r: i32x4 = transmute(vmaxq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let r: u8x8 = transmute(vmax_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vmaxq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(16, 15, 14, 13); + let e: u16x4 = u16x4::new(16, 15, 14, 13); + let r: u16x4 = transmute(vmax_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let r: u16x8 = transmute(vmaxq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(16, 15); + let e: u32x2 = u32x2::new(16, 15); + let r: u32x2 = transmute(vmax_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(16, 15, 14, 13); + let e: u32x4 = u32x4::new(16, 15, 14, 13); + let r: u32x4 = transmute(vmaxq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmax_f32() { + let a: f32x2 = f32x2::new(1.0, -2.0); + let b: f32x2 = f32x2::new(0.0, 3.0); + let e: f32x2 = f32x2::new(1.0, 3.0); + let r: f32x2 = transmute(vmax_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxq_f32() { + let a: f32x4 = f32x4::new(1.0, -2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(0.0, 3.0, 2.0, 8.0); + let e: f32x4 = f32x4::new(1.0, 3.0, 3.0, 8.0); + let r: f32x4 = transmute(vmaxq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(8.0, 16.0); + let e: f32x2 = f32x2::new(8.0, 16.0); + let r: f32x2 = transmute(vmaxnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmaxnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(8.0, 16.0, 3.0, 6.0); + let r: f32x4 = transmute(vmaxnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vmin_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1); + let r: i8x16 = transmute(vminq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(16, 15, 14, 13); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vmin_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vminq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(16, 15); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vmin_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(16, 15, 14, 13); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vminq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vmin_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: u8x16 = u8x16::new(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1); + let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 8, 7, 6, 5, 4, 3, 2, 1); + let r: u8x16 = transmute(vminq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(16, 15, 14, 13); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vmin_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(16, 15, 14, 13, 12, 11, 10, 9); + let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vminq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(16, 15); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vmin_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u32x4 = u32x4::new(16, 15, 14, 13); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vminq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmin_f32() { + let a: f32x2 = f32x2::new(1.0, -2.0); + let b: f32x2 = f32x2::new(0.0, 3.0); + let e: f32x2 = f32x2::new(0.0, -2.0); + let r: f32x2 = transmute(vmin_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminq_f32() { + let a: f32x4 = f32x4::new(1.0, -2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(0.0, 3.0, 2.0, 8.0); + let e: f32x4 = f32x4::new(0.0, -2.0, 2.0, -4.0); + let r: f32x4 = transmute(vminq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminnm_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(8.0, 16.0); + let e: f32x2 = f32x2::new(1.0, 2.0); + let r: f32x2 = transmute(vminnm_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vminnmq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, -4.0); + let b: f32x4 = f32x4::new(8.0, 16.0, -1.0, 6.0); + let e: f32x4 = f32x4::new(1.0, 2.0, -1.0, -4.0); + let r: f32x4 = transmute(vminnmq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x2 = f32x2::new(3., 4.); + let e: f32x2 = f32x2::new(3., 7.); + let r: f32x2 = transmute(vpadd_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmull_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(0, 4, 12, 24); + let r: i32x4 = transmute(vqdmull_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmull_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(1, 2); + let e: i64x2 = i64x2::new(0, 4); + let r: i64x2 = transmute(vqdmull_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmull_n_s16() { + let a: i16x4 = i16x4::new(2, 4, 6, 8); + let b: i16 = 2; + let e: i32x4 = i32x4::new(8, 16, 24, 32); + let r: i32x4 = transmute(vqdmull_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmull_n_s32() { + let a: i32x2 = i32x2::new(2, 4); + let b: i32 = 2; + let e: i64x2 = i64x2::new(8, 16); + let r: i64x2 = transmute(vqdmull_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmull_lane_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(0, 2, 2, 0); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vqdmull_lane_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmull_lane_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vqdmull_lane_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlal_s16() { + let a: i32x4 = i32x4::new(1, 1, 1, 1); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(5, 9, 13, 17); + let r: i32x4 = transmute(vqdmlal_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlal_s32() { + let a: i64x2 = i64x2::new(1, 1); + let b: i32x2 = i32x2::new(1, 2); + let c: i32x2 = i32x2::new(2, 2); + let e: i64x2 = i64x2::new(5, 9); + let r: i64x2 = transmute(vqdmlal_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlal_n_s16() { + let a: i32x4 = i32x4::new(1, 1, 1, 1); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16 = 2; + let e: i32x4 = i32x4::new(5, 9, 13, 17); + let r: i32x4 = transmute(vqdmlal_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlal_n_s32() { + let a: i64x2 = i64x2::new(1, 1); + let b: i32x2 = i32x2::new(1, 2); + let c: i32 = 2; + let e: i64x2 = i64x2::new(5, 9); + let r: i64x2 = transmute(vqdmlal_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlal_lane_s16() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16x4 = i16x4::new(0, 2, 2, 0); + let e: i32x4 = i32x4::new(5, 10, 15, 20); + let r: i32x4 = transmute(vqdmlal_lane_s16::<2>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlal_lane_s32() { + let a: i64x2 = i64x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let c: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(5, 10); + let r: i64x2 = transmute(vqdmlal_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlsl_s16() { + let a: i32x4 = i32x4::new(3, 7, 11, 15); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(-1, -1, -1, -1); + let r: i32x4 = transmute(vqdmlsl_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlsl_s32() { + let a: i64x2 = i64x2::new(3, 7); + let b: i32x2 = i32x2::new(1, 2); + let c: i32x2 = i32x2::new(2, 2); + let e: i64x2 = i64x2::new(-1, -1); + let r: i64x2 = transmute(vqdmlsl_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlsl_n_s16() { + let a: i32x4 = i32x4::new(3, 7, 11, 15); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16 = 2; + let e: i32x4 = i32x4::new(-1, -1, -1, -1); + let r: i32x4 = transmute(vqdmlsl_n_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlsl_n_s32() { + let a: i64x2 = i64x2::new(3, 7); + let b: i32x2 = i32x2::new(1, 2); + let c: i32 = 2; + let e: i64x2 = i64x2::new(-1, -1); + let r: i64x2 = transmute(vqdmlsl_n_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlsl_lane_s16() { + let a: i32x4 = i32x4::new(3, 6, 9, 12); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16x4 = i16x4::new(0, 2, 2, 0); + let e: i32x4 = i32x4::new(-1, -2, -3, -4); + let r: i32x4 = transmute(vqdmlsl_lane_s16::<2>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmlsl_lane_s32() { + let a: i64x2 = i64x2::new(3, 6); + let b: i32x2 = i32x2::new(1, 2); + let c: i32x2 = i32x2::new(0, 2); + let e: i64x2 = i64x2::new(-1, -2); + let r: i64x2 = transmute(vqdmlsl_lane_s32::<1>(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulh_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vqdmulh_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulhq_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vqdmulhq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulh_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vqdmulh_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulhq_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vqdmulhq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulh_n_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16 = 2; + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vqdmulh_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulh_n_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32 = 2; + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vqdmulh_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulhq_n_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16 = 2; + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vqdmulhq_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulhq_n_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32 = 2; + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vqdmulhq_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulhq_laneq_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1); + let e: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let r: i16x8 = transmute(vqdmulhq_laneq_s16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulh_laneq_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x8 = i16x8::new(2, 1, 1, 1, 1, 1, 1, 1); + let e: i16x4 = i16x4::new(1, 1, 1, 1); + let r: i16x4 = transmute(vqdmulh_laneq_s16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulhq_laneq_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x4 = i32x4::new(2, 1, 1, 1); + let e: i32x4 = i32x4::new(1, 1, 1, 1); + let r: i32x4 = transmute(vqdmulhq_laneq_s32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqdmulh_laneq_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x4 = i32x4::new(2, 1, 1, 1); + let e: i32x2 = i32x2::new(1, 1); + let r: i32x2 = transmute(vqdmulh_laneq_s32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let e: i8x8 = i8x8::new(0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F, 0x7F); + let r: i8x8 = transmute(vqmovn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let e: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let r: i16x4 = transmute(vqmovn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_s64() { + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, 0x7F_FF_FF_FF_FF_FF_FF_FF); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let r: i32x2 = transmute(vqmovn_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_u16() { + let a: u16x8 = u16x8::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let e: u8x8 = u8x8::new(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF); + let r: u8x8 = transmute(vqmovn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_u32() { + let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let e: u16x4 = u16x4::new(0xFF_FF, 0xFF_FF, 0xFF_FF, 0xFF_FF); + let r: u16x4 = transmute(vqmovn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovn_u64() { + let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0xFF_FF_FF_FF_FF_FF_FF_FF); + let e: u32x2 = u32x2::new(0xFF_FF_FF_FF, 0xFF_FF_FF_FF); + let r: u32x2 = transmute(vqmovn_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovun_s16() { + let a: i16x8 = i16x8::new(-1, -1, -1, -1, -1, -1, -1, -1); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vqmovun_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovun_s32() { + let a: i32x4 = i32x4::new(-1, -1, -1, -1); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vqmovun_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqmovun_s64() { + let a: i64x2 = i64x2::new(-1, -1); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vqmovun_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(2, 2, 2, 2); + let r: i16x4 = transmute(vqrdmulh_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let r: i16x8 = transmute(vqrdmulhq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(2, 2); + let r: i32x2 = transmute(vqrdmulh_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(2, 2, 2, 2); + let r: i32x4 = transmute(vqrdmulhq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_n_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16 = 2; + let e: i16x4 = i16x4::new(2, 2, 2, 2); + let r: i16x4 = transmute(vqrdmulh_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_n_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16 = 2; + let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let r: i16x8 = transmute(vqrdmulhq_n_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_n_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32 = 2; + let e: i32x2 = i32x2::new(2, 2); + let r: i32x2 = transmute(vqrdmulh_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_n_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32 = 2; + let e: i32x4 = i32x4::new(2, 2, 2, 2); + let r: i32x4 = transmute(vqrdmulhq_n_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_lane_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i16x4 = i16x4::new(2, 2, 2, 2); + let r: i16x4 = transmute(vqrdmulh_lane_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_laneq_s16() { + let a: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i16x4 = i16x4::new(2, 2, 2, 2); + let r: i16x4 = transmute(vqrdmulh_laneq_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_lane_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x4 = i16x4::new(0, 2, 0, 0); + let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let r: i16x8 = transmute(vqrdmulhq_lane_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_laneq_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF, 0x7F_FF); + let b: i16x8 = i16x8::new(0, 2, 0, 0, 0, 0, 0, 0); + let e: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let r: i16x8 = transmute(vqrdmulhq_laneq_s16::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_lane_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x2 = i32x2::new(0, 2); + let e: i32x2 = i32x2::new(2, 2); + let r: i32x2 = transmute(vqrdmulh_lane_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulh_laneq_s32() { + let a: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i32x2 = i32x2::new(2, 2); + let r: i32x2 = transmute(vqrdmulh_laneq_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_lane_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x2 = i32x2::new(0, 2); + let e: i32x4 = i32x4::new(2, 2, 2, 2); + let r: i32x4 = transmute(vqrdmulhq_lane_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrdmulhq_laneq_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let b: i32x4 = i32x4::new(0, 2, 0, 0); + let e: i32x4 = i32x4::new(2, 2, 2, 2); + let r: i32x4 = transmute(vqrdmulhq_laneq_s32::<1>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s8() { + let a: i8x8 = i8x8::new(2, -128, 0x7F, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(8, -128, 0x7F, 12, 16, 20, 24, 28); + let r: i8x8 = transmute(vqrshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s8() { + let a: i8x16 = i8x16::new(2, -128, 0x7F, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(8, -128, 0x7F, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: i8x16 = transmute(vqrshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s16() { + let a: i16x4 = i16x4::new(2, -32768, 0x7F_FF, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(8, -32768, 0x7F_FF, 12); + let r: i16x4 = transmute(vqrshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s16() { + let a: i16x8 = i16x8::new(2, -32768, 0x7F_FF, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(8, -32768, 0x7F_FF, 12, 16, 20, 24, 28); + let r: i16x8 = transmute(vqrshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s32() { + let a: i32x2 = i32x2::new(2, -2147483648); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(8, -2147483648); + let r: i32x2 = transmute(vqrshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s32() { + let a: i32x4 = i32x4::new(2, -2147483648, 0x7F_FF_FF_FF, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(8, -2147483648, 0x7F_FF_FF_FF, 12); + let r: i32x4 = transmute(vqrshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_s64() { + let a: i64x1 = i64x1::new(2); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(8); + let r: i64x1 = transmute(vqrshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_s64() { + let a: i64x2 = i64x2::new(2, -9223372036854775808); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(8, -9223372036854775808); + let r: i64x2 = transmute(vqrshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u8() { + let a: u8x8 = u8x8::new(2, 0, 0xFF, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(8, 0, 0xFF, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqrshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u8() { + let a: u8x16 = u8x16::new(2, 0, 0xFF, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(8, 0, 0xFF, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqrshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u16() { + let a: u16x4 = u16x4::new(2, 0, 0xFF_FF, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(8, 0, 0xFF_FF, 12); + let r: u16x4 = transmute(vqrshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u16() { + let a: u16x8 = u16x8::new(2, 0, 0xFF_FF, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(8, 0, 0xFF_FF, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqrshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u32() { + let a: u32x2 = u32x2::new(2, 0); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(8, 0); + let r: u32x2 = transmute(vqrshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u32() { + let a: u32x4 = u32x4::new(2, 0, 0xFF_FF_FF_FF, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(8, 0, 0xFF_FF_FF_FF, 12); + let r: u32x4 = transmute(vqrshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshl_u64() { + let a: u64x1 = u64x1::new(2); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(8); + let r: u64x1 = transmute(vqrshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshlq_u64() { + let a: u64x2 = u64x2::new(2, 0); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(8, 0); + let r: u64x2 = transmute(vqrshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_s16() { + let a: i16x8 = i16x8::new(-32768, 4, 8, 12, 16, 20, 24, 28); + let e: i8x8 = i8x8::new(-128, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vqrshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_s32() { + let a: i32x4 = i32x4::new(-2147483648, 4, 8, 12); + let e: i16x4 = i16x4::new(-32768, 1, 2, 3); + let r: i16x4 = transmute(vqrshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_s64() { + let a: i64x2 = i64x2::new(-9223372036854775808, 4); + let e: i32x2 = i32x2::new(-2147483648, 1); + let r: i32x2 = transmute(vqrshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_u16() { + let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqrshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_u32() { + let a: u32x4 = u32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqrshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrn_n_u64() { + let a: u64x2 = u64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqrshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_n_s16() { + let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqrshrun_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_n_s32() { + let a: i32x4 = i32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqrshrun_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqrshrun_n_s64() { + let a: i64x2 = i64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqrshrun_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i8x8 = transmute(vqshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: i8x16 = transmute(vqshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(0, 4, 8, 12); + let r: i16x4 = transmute(vqshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i16x8 = transmute(vqshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s32() { + let a: i32x2 = i32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(0, 4); + let r: i32x2 = transmute(vqshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(0, 4, 8, 12); + let r: i32x4 = transmute(vqshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vqshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_s64() { + let a: i64x2 = i64x2::new(0, 1); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(0, 4); + let r: i64x2 = transmute(vqshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(0, 4, 8, 12); + let r: u16x4 = transmute(vqshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u32() { + let a: u32x2 = u32x2::new(0, 1); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(0, 4); + let r: u32x2 = transmute(vqshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(0, 4, 8, 12); + let r: u32x4 = transmute(vqshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_u64() { + let a: u64x1 = u64x1::new(0); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_u64() { + let a: u64x2 = u64x2::new(0, 1); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(0, 4); + let r: u64x2 = transmute(vqshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x8 = i8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i8x8 = transmute(vqshl_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: i8x16 = transmute(vqshlq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i16x4 = i16x4::new(0, 4, 8, 12); + let r: i16x4 = transmute(vqshl_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: i16x8 = transmute(vqshlq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i32x2 = i32x2::new(0, 4); + let r: i32x2 = transmute(vqshl_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: i32x4 = i32x4::new(0, 4, 8, 12); + let r: i32x4 = transmute(vqshlq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_s64() { + let a: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vqshl_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i64x2 = i64x2::new(0, 4); + let r: i64x2 = transmute(vqshlq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqshl_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqshlq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0, 4, 8, 12); + let r: u16x4 = transmute(vqshl_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqshlq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: u32x2 = u32x2::new(0, 4); + let r: u32x2 = transmute(vqshl_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 4, 8, 12); + let r: u32x4 = transmute(vqshlq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshl_n_u64() { + let a: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqshl_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlq_n_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 4); + let r: u64x2 = transmute(vqshlq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u8x8 = transmute(vqshlu_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0, 4, 8, 12); + let r: u16x4 = transmute(vqshlu_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0, 4); + let r: u32x2 = transmute(vqshlu_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshlu_n_s64() { + let a: i64x1 = i64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vqshlu_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60); + let r: u8x16 = transmute(vqshluq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let r: u16x8 = transmute(vqshluq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 4, 8, 12); + let r: u32x4 = transmute(vqshluq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshluq_n_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 4); + let r: u64x2 = transmute(vqshluq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_s16() { + let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vqshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_s32() { + let a: i32x4 = i32x4::new(0, 4, 8, 12); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vqshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_s64() { + let a: i64x2 = i64x2::new(0, 4); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vqshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_u16() { + let a: u16x8 = u16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_u32() { + let a: u32x4 = u32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrn_n_u64() { + let a: u64x2 = u64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_n_s16() { + let a: i16x8 = i16x8::new(0, 4, 8, 12, 16, 20, 24, 28); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vqshrun_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_n_s32() { + let a: i32x4 = i32x4::new(0, 4, 8, 12); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vqshrun_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqshrun_n_s64() { + let a: i64x2 = i64x2::new(0, 4); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vqshrun_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrte_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let e: f32x2 = f32x2::new(0.998046875, 0.705078125); + let r: f32x2 = transmute(vrsqrte_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrteq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e: f32x4 = f32x4::new(0.998046875, 0.705078125, 0.576171875, 0.4990234375); + let r: f32x4 = transmute(vrsqrteq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrte_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(4294967295, 4294967295); + let r: u32x2 = transmute(vrsqrte_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrteq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(4294967295, 4294967295, 4294967295, 4294967295); + let r: u32x4 = transmute(vrsqrteq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrts_f32() { + let a: f32x2 = f32x2::new(1.0, 2.0); + let b: f32x2 = f32x2::new(1.0, 2.0); + let e: f32x2 = f32x2::new(1., -0.5); + let r: f32x2 = transmute(vrsqrts_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsqrtsq_f32() { + let a: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0); + let b: f32x4 = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e: f32x4 = f32x4::new(1., -0.5, -3.0, -6.5); + let r: f32x4 = transmute(vrsqrtsq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrecpe_f32() { + let a: f32x2 = f32x2::new(4.0, 3.0); + let e: f32x2 = f32x2::new(0.24951171875, 0.3330078125); + let r: f32x2 = transmute(vrecpe_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrecpeq_f32() { + let a: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0); + let e: f32x4 = f32x4::new(0.24951171875, 0.3330078125, 0.4990234375, 0.998046875); + let r: f32x4 = transmute(vrecpeq_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrecpe_u32() { + let a: u32x2 = u32x2::new(4, 3); + let e: u32x2 = u32x2::new(4294967295, 4294967295); + let r: u32x2 = transmute(vrecpe_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrecpeq_u32() { + let a: u32x4 = u32x4::new(4, 3, 2, 1); + let e: u32x4 = u32x4::new(4294967295, 4294967295, 4294967295, 4294967295); + let r: u32x4 = transmute(vrecpeq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrecps_f32() { + let a: f32x2 = f32x2::new(4.0, 3.0); + let b: f32x2 = f32x2::new(4.0, 3.0); + let e: f32x2 = f32x2::new(-14., -7.); + let r: f32x2 = transmute(vrecps_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrecpsq_f32() { + let a: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0); + let b: f32x4 = f32x4::new(4.0, 3.0, 2.0, 1.0); + let e: f32x4 = f32x4::new(-14., -7., -2., 1.); + let r: f32x4 = transmute(vrecpsq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vreinterpret_s8_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_p8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vreinterpret_s8_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_p16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_s16_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_s16_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_u64() { + let a: u64x1 = u64x1::new(0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vreinterpretq_s8_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_p8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vreinterpretq_s8_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_p16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_s16_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_s16_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_p8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vreinterpret_u8_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u8x8 = transmute(vreinterpret_u8_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_p16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vreinterpret_u16_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vreinterpret_u16_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_s64() { + let a: i64x1 = i64x1::new(0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_p8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vreinterpretq_u8_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: u8x16 = transmute(vreinterpretq_u8_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_p16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vreinterpretq_u16_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vreinterpretq_u16_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_s8() { + let a: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vreinterpret_p8_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_u8() { + let a: u8x8 = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x8 = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i8x8 = transmute(vreinterpret_p8_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_p16_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_p16_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_s8() { + let a: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vreinterpretq_p8_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_u8() { + let a: u8x16 = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e: i8x16 = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r: i8x16 = transmute(vreinterpretq_p8_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_p16_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_p16_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i8x8 = transmute(vreinterpret_s8_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i8x8 = transmute(vreinterpret_s8_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_p16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i8x8 = transmute(vreinterpret_s8_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i16x4 = i16x4::new(0, 0, 1, 0); + let r: i16x4 = transmute(vreinterpret_s16_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: i16x4 = i16x4::new(0, 0, 1, 0); + let r: i16x4 = transmute(vreinterpret_s16_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_s64() { + let a: i64x1 = i64x1::new(0); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vreinterpret_s32_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_u64() { + let a: u64x1 = u64x1::new(0); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vreinterpret_s32_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: i8x16 = transmute(vreinterpretq_s8_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: i8x16 = transmute(vreinterpretq_s8_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_p16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: i8x16 = transmute(vreinterpretq_s8_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i16x8 = transmute(vreinterpretq_s16_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i16x8 = transmute(vreinterpretq_s16_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i32x4 = i32x4::new(0, 0, 1, 0); + let r: i32x4 = transmute(vreinterpretq_s32_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: i32x4 = i32x4::new(0, 0, 1, 0); + let r: i32x4 = transmute(vreinterpretq_s32_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_p16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: u8x8 = transmute(vreinterpret_u8_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: u8x8 = transmute(vreinterpret_u8_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: u8x8 = transmute(vreinterpret_u8_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: u16x4 = u16x4::new(0, 0, 1, 0); + let r: u16x4 = transmute(vreinterpret_u16_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: u16x4 = u16x4::new(0, 0, 1, 0); + let r: u16x4 = transmute(vreinterpret_u16_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_s64() { + let a: i64x1 = i64x1::new(0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vreinterpret_u32_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_u64() { + let a: u64x1 = u64x1::new(0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vreinterpret_u32_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_p16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: u8x16 = transmute(vreinterpretq_u8_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: u8x16 = transmute(vreinterpretq_u8_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: u8x16 = transmute(vreinterpretq_u8_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: u16x8 = transmute(vreinterpretq_u16_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: u16x8 = transmute(vreinterpretq_u16_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u32x4 = u32x4::new(0, 0, 1, 0); + let r: u32x4 = transmute(vreinterpretq_u32_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u32x4 = u32x4::new(0, 0, 1, 0); + let r: u32x4 = transmute(vreinterpretq_u32_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_p16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i8x8 = transmute(vreinterpret_p8_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_s16() { + let a: i16x4 = i16x4::new(0, 1, 2, 3); + let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i8x8 = transmute(vreinterpret_p8_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_u16() { + let a: u16x4 = u16x4::new(0, 1, 2, 3); + let e: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i8x8 = transmute(vreinterpret_p8_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i16x4 = i16x4::new(0, 0, 1, 0); + let r: i16x4 = transmute(vreinterpret_p16_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: i16x4 = i16x4::new(0, 0, 1, 0); + let r: i16x4 = transmute(vreinterpret_p16_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_p16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: i8x16 = transmute(vreinterpretq_p8_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_s16() { + let a: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: i8x16 = transmute(vreinterpretq_p8_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_u16() { + let a: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let r: i8x16 = transmute(vreinterpretq_p8_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i16x8 = transmute(vreinterpretq_p16_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let r: i16x8 = transmute(vreinterpretq_p16_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_p64() { + let a: i64x1 = i64x1::new(0); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vreinterpret_s32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_p64() { + let a: i64x1 = i64x1::new(0); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vreinterpret_u32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i32x4 = i32x4::new(0, 0, 1, 0); + let r: i32x4 = transmute(vreinterpretq_s32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u32x4 = u32x4::new(0, 0, 1, 0); + let r: u32x4 = transmute(vreinterpretq_u32_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_p128() { + let a: p128 = 0; + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vreinterpretq_s64_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_p128() { + let a: p128 = 0; + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vreinterpretq_u64_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_p128() { + let a: p128 = 0; + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vreinterpretq_p64_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_p8() { + let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_s16_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_s8() { + let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_s16_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_u8() { + let a: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_s16_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_p16() { + let a: i16x4 = i16x4::new(0, 0, 1, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_s16() { + let a: i16x4 = i16x4::new(0, 0, 1, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_u16() { + let a: u16x4 = u16x4::new(0, 0, 1, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_s32() { + let a: i32x2 = i32x2::new(0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_u32() { + let a: u32x2 = u32x2::new(0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_p8() { + let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_s16_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_s8() { + let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_s16_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_u8() { + let a: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_s16_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_p16() { + let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_s16() { + let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_u16() { + let a: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_s32() { + let a: i32x4 = i32x4::new(0, 0, 1, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_u32() { + let a: u32x4 = u32x4::new(0, 0, 1, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_p8() { + let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vreinterpret_u16_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_s8() { + let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vreinterpret_u16_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_u8() { + let a: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: u16x4 = u16x4::new(0, 1, 2, 3); + let r: u16x4 = transmute(vreinterpret_u16_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_p16() { + let a: i16x4 = i16x4::new(0, 0, 1, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_s16() { + let a: i16x4 = i16x4::new(0, 0, 1, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_u16() { + let a: u16x4 = u16x4::new(0, 0, 1, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_s32() { + let a: i32x2 = i32x2::new(0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_u32() { + let a: u32x2 = u32x2::new(0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_p8() { + let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vreinterpretq_u16_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_s8() { + let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vreinterpretq_u16_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_u8() { + let a: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: u16x8 = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: u16x8 = transmute(vreinterpretq_u16_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_p16() { + let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_s16() { + let a: i16x8 = i16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_u16() { + let a: u16x8 = u16x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_s32() { + let a: i32x4 = i32x4::new(0, 0, 1, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_u32() { + let a: u32x4 = u32x4::new(0, 0, 1, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_p8() { + let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_p16_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_s8() { + let a: i8x8 = i8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_p16_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_u8() { + let a: u8x8 = u8x8::new(0, 0, 1, 0, 2, 0, 3, 0); + let e: i16x4 = i16x4::new(0, 1, 2, 3); + let r: i16x4 = transmute(vreinterpret_p16_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_p8() { + let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_p16_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_s8() { + let a: i8x16 = i8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_p16_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_u8() { + let a: u8x16 = u8x16::new(0, 0, 1, 0, 2, 0, 3, 0, 4, 0, 5, 0, 6, 0, 7, 0); + let e: i16x8 = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vreinterpretq_p16_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_s32() { + let a: i32x2 = i32x2::new(0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_u32() { + let a: u32x2 = u32x2::new(0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_s32() { + let a: i32x4 = i32x4::new(0, 0, 1, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_u32() { + let a: u32x4 = u32x4::new(0, 0, 1, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s64() { + let a: i64x2 = i64x2::new(0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u64() { + let a: u64x2 = u64x2::new(0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_p64() { + let a: i64x2 = i64x2::new(0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_s64() { + let a: i64x1 = i64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_s16_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_u64() { + let a: u64x1 = u64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_s16_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_s64() { + let a: i64x1 = i64x1::new(0); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vreinterpret_u16_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_u64() { + let a: u64x1 = u64x1::new(0); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vreinterpret_u16_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_s32() { + let a: i32x2 = i32x2::new(0, 1); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_u32() { + let a: u32x2 = u32x2::new(0, 1); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_s64() { + let a: i64x1 = i64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_p16_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_u64() { + let a: u64x1 = u64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_p16_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_s32() { + let a: i32x4 = i32x4::new(0, 1, 2, 3); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_u32() { + let a: u32x4 = u32x4::new(0, 1, 2, 3); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_p64() { + let a: i64x1 = i64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_s16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_p64() { + let a: i64x1 = i64x1::new(0); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vreinterpret_u16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_p64() { + let a: i64x1 = i64x1::new(0); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_p16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_p128() { + let a: p128 = 0; + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vreinterpretq_s32_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_p128() { + let a: p128 = 0; + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vreinterpretq_u32_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i32x2 = i32x2::new(0, 1); + let r: i32x2 = transmute(vreinterpret_s32_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_p16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_s16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_u16() { + let a: u16x4 = u16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let e: i32x4 = i32x4::new(0, 1, 2, 3); + let r: i32x4 = transmute(vreinterpretq_s32_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: u32x2 = u32x2::new(0, 1); + let r: u32x2 = transmute(vreinterpret_u32_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_p16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_s16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_u16() { + let a: u16x4 = u16x4::new(0, 0, 0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 1, 0, 0, 0, 2, 0, 0, 0, 3, 0, 0, 0); + let e: u32x4 = u32x4::new(0, 1, 2, 3); + let r: u32x4 = transmute(vreinterpretq_u32_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_p16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_s16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_u16() { + let a: u16x4 = u16x4::new(0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 1, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s32() { + let a: i32x4 = i32x4::new(0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u32() { + let a: u32x4 = u32x4::new(0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_s64() { + let a: i64x1 = i64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_u64() { + let a: u64x1 = u64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_s64() { + let a: i64x1 = i64x1::new(0); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_u64() { + let a: u64x1 = u64x1::new(0); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_s64() { + let a: i64x1 = i64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_u64() { + let a: u64x1 = u64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_s64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_u64() { + let a: u64x2 = u64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_p64() { + let a: i64x1 = i64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_p64() { + let a: i64x1 = i64x1::new(0); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_p64() { + let a: i64x1 = i64x1::new(0); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_p64() { + let a: i64x2 = i64x2::new(0, 1); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_p64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_p128() { + let a: p128 = 0; + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_p128() { + let a: p128 = 0; + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_p128() { + let a: p128 = 0; + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_s64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: u64x2 = u64x2::new(0, 1); + let r: u64x2 = transmute(vreinterpretq_u64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p64_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_p64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p64_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0); + let e: i64x2 = i64x2::new(0, 1); + let r: i64x2 = transmute(vreinterpretq_p64_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_s8() { + let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 1; + let r: p128 = transmute(vreinterpretq_p128_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_u8() { + let a: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 1; + let r: p128 = transmute(vreinterpretq_p128_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_p8() { + let a: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: p128 = 1; + let r: p128 = transmute(vreinterpretq_p128_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_p128() { + let a: p128 = 1; + let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_p128() { + let a: p128 = 1; + let e: u8x16 = u8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_p128() { + let a: p128 = 1; + let e: i8x16 = i8x16::new(1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s8_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_s8_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s16_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_s16_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s32_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: i32x2 = i32x2::new(0, 0); + let r: i32x2 = transmute(vreinterpret_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_s64_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vreinterpret_s64_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s8_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_s8_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s16_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_s16_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s32_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: i32x4 = i32x4::new(0, 0, 0, 0); + let r: i32x4 = transmute(vreinterpretq_s32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_s64_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: i64x2 = i64x2::new(0, 0); + let r: i64x2 = transmute(vreinterpretq_s64_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u8_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vreinterpret_u8_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u16_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vreinterpret_u16_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u32_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vreinterpret_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_u64_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vreinterpret_u64_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u8_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x16 = transmute(vreinterpretq_u8_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u16_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u16x8 = transmute(vreinterpretq_u16_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u32_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: u32x4 = u32x4::new(0, 0, 0, 0); + let r: u32x4 = transmute(vreinterpretq_u32_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_u64_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: u64x2 = u64x2::new(0, 0); + let r: u64x2 = transmute(vreinterpretq_u64_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p8_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vreinterpret_p8_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_p16_f32() { + let a: f32x2 = f32x2::new(0., 0.); + let e: i16x4 = i16x4::new(0, 0, 0, 0); + let r: i16x4 = transmute(vreinterpret_p16_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p8_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let r: i8x16 = transmute(vreinterpretq_p8_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p16_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: i16x8 = transmute(vreinterpretq_p16_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_p128_f32() { + let a: f32x4 = f32x4::new(0., 0., 0., 0.); + let e: p128 = 0; + let r: p128 = transmute(vreinterpretq_p128_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_s8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_s16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_s32() { + let a: i32x2 = i32x2::new(0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_s64() { + let a: i64x1 = i64x1::new(0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_s8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_s16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_s32() { + let a: i32x4 = i32x4::new(0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_s64() { + let a: i64x2 = i64x2::new(0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_u8() { + let a: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_u16() { + let a: u16x4 = u16x4::new(0, 0, 0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_u32() { + let a: u32x2 = u32x2::new(0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_u64() { + let a: u64x1 = u64x1::new(0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_u8() { + let a: u8x16 = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_u16() { + let a: u16x8 = u16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_u32() { + let a: u32x4 = u32x4::new(0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_u64() { + let a: u64x2 = u64x2::new(0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_p8() { + let a: i8x8 = i8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpret_f32_p16() { + let a: i16x4 = i16x4::new(0, 0, 0, 0); + let e: f32x2 = f32x2::new(0., 0.); + let r: f32x2 = transmute(vreinterpret_f32_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_p8() { + let a: i8x16 = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_p16() { + let a: i16x8 = i16x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vreinterpretq_f32_p128() { + let a: p128 = 0; + let e: f32x4 = f32x4::new(0., 0., 0., 0.); + let r: f32x4 = transmute(vreinterpretq_f32_p128(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i8x8 = transmute(vrshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: i8x16 = transmute(vrshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(4, 8, 12, 16); + let r: i16x4 = transmute(vrshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vrshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(4, 8); + let r: i32x2 = transmute(vrshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vrshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(4); + let r: i64x1 = transmute(vrshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vrshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u8x8 = transmute(vrshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vrshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(4, 8, 12, 16); + let r: u16x4 = transmute(vrshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vrshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(4, 8); + let r: u32x2 = transmute(vrshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vrshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshl_u64() { + let a: u64x1 = u64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(4); + let r: u64x1 = transmute(vrshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshlq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vrshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s8() { + let a: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vrshr_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s8() { + let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vrshrq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s16() { + let a: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vrshr_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vrshrq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s32() { + let a: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vrshr_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vrshrq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_s64() { + let a: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vrshr_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(1, 2); + let r: i64x2 = transmute(vrshrq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u8() { + let a: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vrshr_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u8() { + let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vrshrq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u16() { + let a: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vrshr_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vrshrq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u32() { + let a: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vrshr_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vrshrq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshr_n_u64() { + let a: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vrshr_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrq_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vrshrq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vrshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vrshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vrshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vrshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vrshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrshrn_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vrshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i8x8 = transmute(vrsra_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: i8x16 = transmute(vrsraq_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 1); + let b: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(2, 3, 4, 5); + let r: i16x4 = transmute(vrsra_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i16x8 = transmute(vrsraq_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s32() { + let a: i32x2 = i32x2::new(1, 1); + let b: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(2, 3); + let r: i32x2 = transmute(vrsra_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 1); + let b: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(2, 3, 4, 5); + let r: i32x4 = transmute(vrsraq_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(2); + let r: i64x1 = transmute(vrsra_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_s64() { + let a: i64x2 = i64x2::new(1, 1); + let b: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(2, 3); + let r: i64x2 = transmute(vrsraq_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u8x8 = transmute(vrsra_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: u8x16 = transmute(vrsraq_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 1); + let b: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(2, 3, 4, 5); + let r: u16x4 = transmute(vrsra_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u16x8 = transmute(vrsraq_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u32() { + let a: u32x2 = u32x2::new(1, 1); + let b: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(2, 3); + let r: u32x2 = transmute(vrsra_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 1); + let b: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(2, 3, 4, 5); + let r: u32x4 = transmute(vrsraq_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsra_n_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(2); + let r: u64x1 = transmute(vrsra_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsraq_n_u64() { + let a: u64x2 = u64x2::new(1, 1); + let b: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(2, 3); + let r: u64x2 = transmute(vrsraq_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsubhn_s16() { + let a: i16x8 = i16x8::new(0x7F_FF, -32768, 0, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(-128, -128, 0, 0, 0, 0, 0, 0); + let r: i8x8 = transmute(vrsubhn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsubhn_s32() { + let a: i32x4 = i32x4::new(0x7F_FF_FF_FF, -2147483648, 0, 4); + let b: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(-32768, -32768, 0, 0); + let r: i16x4 = transmute(vrsubhn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsubhn_s64() { + let a: i64x2 = i64x2::new(0x7F_FF_FF_FF_FF_FF_FF_FF, -9223372036854775808); + let b: i64x2 = i64x2::new(1, 2); + let e: i32x2 = i32x2::new(-2147483648, -2147483648); + let r: i32x2 = transmute(vrsubhn_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsubhn_u16() { + let a: u16x8 = u16x8::new(0xFF_FF, 0, 3, 4, 5, 6, 7, 8); + let b: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(0, 0, 0, 0, 0, 0, 0, 0); + let r: u8x8 = transmute(vrsubhn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsubhn_u32() { + let a: u32x4 = u32x4::new(0xFF_FF_FF_FF, 0, 3, 4); + let b: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(0, 0, 0, 0); + let r: u16x4 = transmute(vrsubhn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrsubhn_u64() { + let a: u64x2 = u64x2::new(0xFF_FF_FF_FF_FF_FF_FF_FF, 0); + let b: u64x2 = u64x2::new(1, 2); + let e: u32x2 = u32x2::new(0, 0); + let r: u32x2 = transmute(vrsubhn_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_s8() { + let a: i8 = 1; + let b: i8x8 = i8x8::new(0, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vset_lane_s8::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_s16() { + let a: i16 = 1; + let b: i16x4 = i16x4::new(0, 2, 3, 4); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vset_lane_s16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_s32() { + let a: i32 = 1; + let b: i32x2 = i32x2::new(0, 2); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vset_lane_s32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_s64() { + let a: i64 = 1; + let b: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vset_lane_s64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_u8() { + let a: u8 = 1; + let b: u8x8 = u8x8::new(0, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vset_lane_u8::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_u16() { + let a: u16 = 1; + let b: u16x4 = u16x4::new(0, 2, 3, 4); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vset_lane_u16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_u32() { + let a: u32 = 1; + let b: u32x2 = u32x2::new(0, 2); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vset_lane_u32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_u64() { + let a: u64 = 1; + let b: u64x1 = u64x1::new(0); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vset_lane_u64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_p8() { + let a: p8 = 1; + let b: i8x8 = i8x8::new(0, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vset_lane_p8::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_p16() { + let a: p16 = 1; + let b: i16x4 = i16x4::new(0, 2, 3, 4); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vset_lane_p16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_p64() { + let a: p64 = 1; + let b: i64x1 = i64x1::new(0); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vset_lane_p64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_s8() { + let a: i8 = 1; + let b: i8x16 = i8x16::new(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vsetq_lane_s8::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_s16() { + let a: i16 = 1; + let b: i16x8 = i16x8::new(0, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vsetq_lane_s16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_s32() { + let a: i32 = 1; + let b: i32x4 = i32x4::new(0, 2, 3, 4); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vsetq_lane_s32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_s64() { + let a: i64 = 1; + let b: i64x2 = i64x2::new(0, 2); + let e: i64x2 = i64x2::new(1, 2); + let r: i64x2 = transmute(vsetq_lane_s64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_u8() { + let a: u8 = 1; + let b: u8x16 = u8x16::new(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vsetq_lane_u8::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_u16() { + let a: u16 = 1; + let b: u16x8 = u16x8::new(0, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vsetq_lane_u16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_u32() { + let a: u32 = 1; + let b: u32x4 = u32x4::new(0, 2, 3, 4); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vsetq_lane_u32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_u64() { + let a: u64 = 1; + let b: u64x2 = u64x2::new(0, 2); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vsetq_lane_u64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_p8() { + let a: p8 = 1; + let b: i8x16 = i8x16::new(0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vsetq_lane_p8::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_p16() { + let a: p16 = 1; + let b: i16x8 = i16x8::new(0, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vsetq_lane_p16::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_p64() { + let a: p64 = 1; + let b: i64x2 = i64x2::new(0, 2); + let e: i64x2 = i64x2::new(1, 2); + let r: i64x2 = transmute(vsetq_lane_p64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vset_lane_f32() { + let a: f32 = 1.; + let b: f32x2 = f32x2::new(0., 2.); + let e: f32x2 = f32x2::new(1., 2.); + let r: f32x2 = transmute(vset_lane_f32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsetq_lane_f32() { + let a: f32 = 1.; + let b: f32x4 = f32x4::new(0., 2., 3., 4.); + let e: f32x4 = f32x4::new(1., 2., 3., 4.); + let r: f32x4 = transmute(vsetq_lane_f32::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i8x8 = transmute(vshl_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: i8x16 = transmute(vshlq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: i16x4 = i16x4::new(4, 8, 12, 16); + let r: i16x4 = transmute(vshl_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshlq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: i32x2 = i32x2::new(4, 8); + let r: i32x2 = transmute(vshl_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshlq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: i64x1 = i64x1::new(4); + let r: i64x1 = transmute(vshl_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_s64() { + let a: i64x2 = i64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshlq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u8x8 = transmute(vshl_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let b: i8x16 = i8x16::new(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2); + let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vshlq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(2, 2, 2, 2); + let e: u16x4 = u16x4::new(4, 8, 12, 16); + let r: u16x4 = transmute(vshl_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i16x8 = i16x8::new(2, 2, 2, 2, 2, 2, 2, 2); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshlq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 2); + let e: u32x2 = u32x2::new(4, 8); + let r: u32x2 = transmute(vshl_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: i32x4 = i32x4::new(2, 2, 2, 2); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshlq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_u64() { + let a: u64x1 = u64x1::new(1); + let b: i64x1 = i64x1::new(2); + let e: u64x1 = u64x1::new(4); + let r: u64x1 = transmute(vshl_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_u64() { + let a: u64x2 = u64x2::new(1, 2); + let b: i64x2 = i64x2::new(2, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshlq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i8x8 = transmute(vshl_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s8() { + let a: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: i8x16 = transmute(vshlq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i16x4 = i16x4::new(4, 8, 12, 16); + let r: i16x4 = transmute(vshl_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s16() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshlq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s32() { + let a: i32x2 = i32x2::new(1, 2); + let e: i32x2 = i32x2::new(4, 8); + let r: i32x2 = transmute(vshl_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s32() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshlq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u8x8 = transmute(vshl_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u8() { + let a: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let r: u8x16 = transmute(vshlq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u16x4 = u16x4::new(4, 8, 12, 16); + let r: u16x4 = transmute(vshl_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u16() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshlq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: u32x2 = u32x2::new(4, 8); + let r: u32x2 = transmute(vshl_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u32() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshlq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_s64() { + let a: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(4); + let r: i64x1 = transmute(vshl_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_s64() { + let a: i64x2 = i64x2::new(1, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshlq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshl_n_u64() { + let a: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(4); + let r: u64x1 = transmute(vshl_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshlq_n_u64() { + let a: u64x2 = u64x2::new(1, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshlq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_s8() { + let a: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: i16x8 = transmute(vshll_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_s16() { + let a: i16x4 = i16x4::new(1, 2, 3, 4); + let e: i32x4 = i32x4::new(4, 8, 12, 16); + let r: i32x4 = transmute(vshll_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_s32() { + let a: i32x2 = i32x2::new(1, 2); + let e: i64x2 = i64x2::new(4, 8); + let r: i64x2 = transmute(vshll_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_u8() { + let a: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let r: u16x8 = transmute(vshll_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_u16() { + let a: u16x4 = u16x4::new(1, 2, 3, 4); + let e: u32x4 = u32x4::new(4, 8, 12, 16); + let r: u32x4 = transmute(vshll_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshll_n_u32() { + let a: u32x2 = u32x2::new(1, 2); + let e: u64x2 = u64x2::new(4, 8); + let r: u64x2 = transmute(vshll_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s8() { + let a: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vshr_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s8() { + let a: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vshrq_n_s8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s16() { + let a: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vshr_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vshrq_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s32() { + let a: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vshr_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vshrq_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_s64() { + let a: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(1); + let r: i64x1 = transmute(vshr_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(1, 2); + let r: i64x2 = transmute(vshrq_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u8() { + let a: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vshr_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u8() { + let a: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vshrq_n_u8::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u16() { + let a: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vshr_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vshrq_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u32() { + let a: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vshr_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vshrq_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshr_n_u64() { + let a: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(1); + let r: u64x1 = transmute(vshr_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrq_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(1, 2); + let r: u64x2 = transmute(vshrq_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_s16() { + let a: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vshrn_n_s16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_s32() { + let a: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vshrn_n_s32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_s64() { + let a: i64x2 = i64x2::new(4, 8); + let e: i32x2 = i32x2::new(1, 2); + let r: i32x2 = transmute(vshrn_n_s64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_u16() { + let a: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vshrn_n_u16::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_u32() { + let a: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vshrn_n_u32::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vshrn_n_u64() { + let a: u64x2 = u64x2::new(4, 8); + let e: u32x2 = u32x2::new(1, 2); + let r: u32x2 = transmute(vshrn_n_u64::<2>(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s8() { + let a: i8x8 = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x8 = i8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i8x8 = i8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i8x8 = transmute(vsra_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s8() { + let a: i8x16 = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: i8x16 = i8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: i8x16 = i8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: i8x16 = transmute(vsraq_n_s8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s16() { + let a: i16x4 = i16x4::new(1, 1, 1, 1); + let b: i16x4 = i16x4::new(4, 8, 12, 16); + let e: i16x4 = i16x4::new(2, 3, 4, 5); + let r: i16x4 = transmute(vsra_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s16() { + let a: i16x8 = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: i16x8 = i16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: i16x8 = i16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: i16x8 = transmute(vsraq_n_s16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s32() { + let a: i32x2 = i32x2::new(1, 1); + let b: i32x2 = i32x2::new(4, 8); + let e: i32x2 = i32x2::new(2, 3); + let r: i32x2 = transmute(vsra_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s32() { + let a: i32x4 = i32x4::new(1, 1, 1, 1); + let b: i32x4 = i32x4::new(4, 8, 12, 16); + let e: i32x4 = i32x4::new(2, 3, 4, 5); + let r: i32x4 = transmute(vsraq_n_s32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_s64() { + let a: i64x1 = i64x1::new(1); + let b: i64x1 = i64x1::new(4); + let e: i64x1 = i64x1::new(2); + let r: i64x1 = transmute(vsra_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_s64() { + let a: i64x2 = i64x2::new(1, 1); + let b: i64x2 = i64x2::new(4, 8); + let e: i64x2 = i64x2::new(2, 3); + let r: i64x2 = transmute(vsraq_n_s64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u8() { + let a: u8x8 = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x8 = u8x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u8x8 = u8x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u8x8 = transmute(vsra_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u8() { + let a: u8x16 = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let b: u8x16 = u8x16::new(4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64); + let e: u8x16 = u8x16::new(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17); + let r: u8x16 = transmute(vsraq_n_u8::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u16() { + let a: u16x4 = u16x4::new(1, 1, 1, 1); + let b: u16x4 = u16x4::new(4, 8, 12, 16); + let e: u16x4 = u16x4::new(2, 3, 4, 5); + let r: u16x4 = transmute(vsra_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u16() { + let a: u16x8 = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let b: u16x8 = u16x8::new(4, 8, 12, 16, 20, 24, 28, 32); + let e: u16x8 = u16x8::new(2, 3, 4, 5, 6, 7, 8, 9); + let r: u16x8 = transmute(vsraq_n_u16::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u32() { + let a: u32x2 = u32x2::new(1, 1); + let b: u32x2 = u32x2::new(4, 8); + let e: u32x2 = u32x2::new(2, 3); + let r: u32x2 = transmute(vsra_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u32() { + let a: u32x4 = u32x4::new(1, 1, 1, 1); + let b: u32x4 = u32x4::new(4, 8, 12, 16); + let e: u32x4 = u32x4::new(2, 3, 4, 5); + let r: u32x4 = transmute(vsraq_n_u32::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsra_n_u64() { + let a: u64x1 = u64x1::new(1); + let b: u64x1 = u64x1::new(4); + let e: u64x1 = u64x1::new(2); + let r: u64x1 = transmute(vsra_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsraq_n_u64() { + let a: u64x2 = u64x2::new(1, 1); + let b: u64x2 = u64x2::new(4, 8); + let e: u64x2 = u64x2::new(2, 3); + let r: u64x2 = transmute(vsraq_n_u64::<2>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_s8() { + let a: i8x8 = i8x8::new(0, 2, 2, 6, 2, 10, 6, 14); + let b: i8x8 = i8x8::new(1, 3, 3, 7, 3, 1, 7, 15); + let e: [i8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15]; + let r: [i8; 16] = transmute(vtrn_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_s16() { + let a: i16x4 = i16x4::new(0, 2, 2, 6); + let b: i16x4 = i16x4::new(1, 3, 3, 7); + let e: [i16; 8] = [0, 1, 2, 3, 2, 3, 6, 7]; + let r: [i16; 8] = transmute(vtrn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_s8() { + let a: i8x16 = i8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30); + let b: i8x16 = i8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31); + let e: [i8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31]; + let r: [i8; 32] = transmute(vtrnq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_s16() { + let a: i16x8 = i16x8::new(0, 2, 2, 6, 2, 10, 6, 14); + let b: i16x8 = i16x8::new(1, 3, 3, 7, 3, 1, 7, 15); + let e: [i16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15]; + let r: [i16; 16] = transmute(vtrnq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_s32() { + let a: i32x4 = i32x4::new(0, 2, 2, 6); + let b: i32x4 = i32x4::new(1, 3, 3, 7); + let e: [i32; 8] = [0, 1, 2, 3, 2, 3, 6, 7]; + let r: [i32; 8] = transmute(vtrnq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_u8() { + let a: u8x8 = u8x8::new(0, 2, 2, 6, 2, 10, 6, 14); + let b: u8x8 = u8x8::new(1, 3, 3, 7, 3, 1, 7, 15); + let e: [u8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15]; + let r: [u8; 16] = transmute(vtrn_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_u16() { + let a: u16x4 = u16x4::new(0, 2, 2, 6); + let b: u16x4 = u16x4::new(1, 3, 3, 7); + let e: [u16; 8] = [0, 1, 2, 3, 2, 3, 6, 7]; + let r: [u16; 8] = transmute(vtrn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_u8() { + let a: u8x16 = u8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30); + let b: u8x16 = u8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31); + let e: [u8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31]; + let r: [u8; 32] = transmute(vtrnq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_u16() { + let a: u16x8 = u16x8::new(0, 2, 2, 6, 2, 10, 6, 14); + let b: u16x8 = u16x8::new(1, 3, 3, 7, 3, 1, 7, 15); + let e: [u16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15]; + let r: [u16; 16] = transmute(vtrnq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_u32() { + let a: u32x4 = u32x4::new(0, 2, 2, 6); + let b: u32x4 = u32x4::new(1, 3, 3, 7); + let e: [u32; 8] = [0, 1, 2, 3, 2, 3, 6, 7]; + let r: [u32; 8] = transmute(vtrnq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_p8() { + let a: i8x8 = i8x8::new(0, 2, 2, 6, 2, 10, 6, 14); + let b: i8x8 = i8x8::new(1, 3, 3, 7, 3, 1, 7, 15); + let e: [u8; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15]; + let r: [u8; 16] = transmute(vtrn_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_p16() { + let a: i16x4 = i16x4::new(0, 2, 2, 6); + let b: i16x4 = i16x4::new(1, 3, 3, 7); + let e: [u16; 8] = [0, 1, 2, 3, 2, 3, 6, 7]; + let r: [u16; 8] = transmute(vtrn_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_p8() { + let a: i8x16 = i8x16::new(0, 2, 2, 6, 2, 10, 6, 14, 2, 18, 6, 22, 10, 26, 14, 30); + let b: i8x16 = i8x16::new(1, 3, 3, 7, 3, 1, 7, 15, 3, 19, 7, 23, 1, 27, 15, 31); + let e: [u8; 32] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15, 2, 3, 6, 7, 10, 1, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31]; + let r: [u8; 32] = transmute(vtrnq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_p16() { + let a: i16x8 = i16x8::new(0, 2, 2, 6, 2, 10, 6, 14); + let b: i16x8 = i16x8::new(1, 3, 3, 7, 3, 1, 7, 15); + let e: [u16; 16] = [0, 1, 2, 3, 2, 3, 6, 7, 2, 3, 6, 7, 10, 1, 14, 15]; + let r: [u16; 16] = transmute(vtrnq_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_s32() { + let a: i32x2 = i32x2::new(0, 2); + let b: i32x2 = i32x2::new(1, 3); + let e: [i32; 4] = [0, 1, 2, 3]; + let r: [i32; 4] = transmute(vtrn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_u32() { + let a: u32x2 = u32x2::new(0, 2); + let b: u32x2 = u32x2::new(1, 3); + let e: [u32; 4] = [0, 1, 2, 3]; + let r: [u32; 4] = transmute(vtrn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrn_f32() { + let a: f32x2 = f32x2::new(0., 2.); + let b: f32x2 = f32x2::new(1., 3.); + let e: [f32; 4] = [0., 1., 2., 3.]; + let r: [f32; 4] = transmute(vtrn_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vtrnq_f32() { + let a: f32x4 = f32x4::new(0., 2., 2., 6.); + let b: f32x4 = f32x4::new(1., 3., 3., 7.); + let e: [f32; 8] = [0., 1., 2., 3., 2., 3., 6., 7.]; + let r: [f32; 8] = transmute(vtrnq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_s8() { + let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15); + let e: [i8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let r: [i8; 16] = transmute(vzip_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_s16() { + let a: i16x4 = i16x4::new(0, 2, 4, 6); + let b: i16x4 = i16x4::new(1, 3, 5, 7); + let e: [i16; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let r: [i16; 8] = transmute(vzip_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_u8() { + let a: u8x8 = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let b: u8x8 = u8x8::new(1, 3, 5, 7, 9, 11, 13, 15); + let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let r: [u8; 16] = transmute(vzip_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_u16() { + let a: u16x4 = u16x4::new(0, 2, 4, 6); + let b: u16x4 = u16x4::new(1, 3, 5, 7); + let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let r: [u16; 8] = transmute(vzip_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_p8() { + let a: i8x8 = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let b: i8x8 = i8x8::new(1, 3, 5, 7, 9, 11, 13, 15); + let e: [u8; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let r: [u8; 16] = transmute(vzip_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_p16() { + let a: i16x4 = i16x4::new(0, 2, 4, 6); + let b: i16x4 = i16x4::new(1, 3, 5, 7); + let e: [u16; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let r: [u16; 8] = transmute(vzip_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_s32() { + let a: i32x2 = i32x2::new(0, 2); + let b: i32x2 = i32x2::new(1, 3); + let e: [i32; 4] = [0, 1, 2, 3]; + let r: [i32; 4] = transmute(vzip_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_u32() { + let a: u32x2 = u32x2::new(0, 2); + let b: u32x2 = u32x2::new(1, 3); + let e: [u32; 4] = [0, 1, 2, 3]; + let r: [u32; 4] = transmute(vzip_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_s8() { + let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + let e: [i8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; + let r: [i8; 32] = transmute(vzipq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_s16() { + let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15); + let e: [i16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let r: [i16; 16] = transmute(vzipq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_s32() { + let a: i32x4 = i32x4::new(0, 2, 4, 6); + let b: i32x4 = i32x4::new(1, 3, 5, 7); + let e: [i32; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let r: [i32; 8] = transmute(vzipq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_u8() { + let a: u8x16 = u8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let b: u8x16 = u8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + let e: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; + let r: [u8; 32] = transmute(vzipq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_u16() { + let a: u16x8 = u16x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let b: u16x8 = u16x8::new(1, 3, 5, 7, 9, 11, 13, 15); + let e: [u16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let r: [u16; 16] = transmute(vzipq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_u32() { + let a: u32x4 = u32x4::new(0, 2, 4, 6); + let b: u32x4 = u32x4::new(1, 3, 5, 7); + let e: [u32; 8] = [0, 1, 2, 3, 4, 5, 6, 7]; + let r: [u32; 8] = transmute(vzipq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_p8() { + let a: i8x16 = i8x16::new(0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30); + let b: i8x16 = i8x16::new(1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31); + let e: [u8; 32] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31]; + let r: [u8; 32] = transmute(vzipq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_p16() { + let a: i16x8 = i16x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let b: i16x8 = i16x8::new(1, 3, 5, 7, 9, 11, 13, 15); + let e: [u16; 16] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]; + let r: [u16; 16] = transmute(vzipq_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzip_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x2 = f32x2::new(5., 6.); + let e: [f32; 4] = [1., 5., 2., 6.]; + let r: [f32; 4] = transmute(vzip_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vzipq_f32() { + let a: f32x4 = f32x4::new(1., 2., 3., 4.); + let b: f32x4 = f32x4::new(5., 6., 7., 8.); + let e: [f32; 8] = [1., 5., 2., 6., 3., 7., 4., 8.]; + let r: [f32; 8] = transmute(vzipq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_s8() { + let a: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 3, 8); + let b: i8x8 = i8x8::new(2, 3, 3, 8, 3, 15, 8, 16); + let e: [i8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16]; + let r: [i8; 16] = transmute(vuzp_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_s16() { + let a: i16x4 = i16x4::new(1, 2, 2, 3); + let b: i16x4 = i16x4::new(2, 3, 3, 8); + let e: [i16; 8] = [1, 2, 2, 3, 2, 3, 3, 8]; + let r: [i16; 8] = transmute(vuzp_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_s8() { + let a: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16); + let b: i8x16 = i8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32); + let e: [i8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32]; + let r: [i8; 32] = transmute(vuzpq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_s16() { + let a: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 3, 8); + let b: i16x8 = i16x8::new(2, 3, 3, 8, 3, 15, 8, 16); + let e: [i16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16]; + let r: [i16; 16] = transmute(vuzpq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_s32() { + let a: i32x4 = i32x4::new(1, 2, 2, 3); + let b: i32x4 = i32x4::new(2, 3, 3, 8); + let e: [i32; 8] = [1, 2, 2, 3, 2, 3, 3, 8]; + let r: [i32; 8] = transmute(vuzpq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_u8() { + let a: u8x8 = u8x8::new(1, 2, 2, 3, 2, 3, 3, 8); + let b: u8x8 = u8x8::new(2, 3, 3, 8, 3, 15, 8, 16); + let e: [u8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16]; + let r: [u8; 16] = transmute(vuzp_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_u16() { + let a: u16x4 = u16x4::new(1, 2, 2, 3); + let b: u16x4 = u16x4::new(2, 3, 3, 8); + let e: [u16; 8] = [1, 2, 2, 3, 2, 3, 3, 8]; + let r: [u16; 8] = transmute(vuzp_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_u8() { + let a: u8x16 = u8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16); + let b: u8x16 = u8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32); + let e: [u8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32]; + let r: [u8; 32] = transmute(vuzpq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_u16() { + let a: u16x8 = u16x8::new(1, 2, 2, 3, 2, 3, 3, 8); + let b: u16x8 = u16x8::new(2, 3, 3, 8, 3, 15, 8, 16); + let e: [u16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16]; + let r: [u16; 16] = transmute(vuzpq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_u32() { + let a: u32x4 = u32x4::new(1, 2, 2, 3); + let b: u32x4 = u32x4::new(2, 3, 3, 8); + let e: [u32; 8] = [1, 2, 2, 3, 2, 3, 3, 8]; + let r: [u32; 8] = transmute(vuzpq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_p8() { + let a: i8x8 = i8x8::new(1, 2, 2, 3, 2, 3, 3, 8); + let b: i8x8 = i8x8::new(2, 3, 3, 8, 3, 15, 8, 16); + let e: [u8; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16]; + let r: [u8; 16] = transmute(vuzp_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_p16() { + let a: i16x4 = i16x4::new(1, 2, 2, 3); + let b: i16x4 = i16x4::new(2, 3, 3, 8); + let e: [u16; 8] = [1, 2, 2, 3, 2, 3, 3, 8]; + let r: [u16; 8] = transmute(vuzp_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_p8() { + let a: i8x16 = i8x16::new(1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 15, 8, 16); + let b: i8x16 = i8x16::new(2, 3, 3, 8, 3, 15, 8, 16, 3, 29, 8, 30, 15, 31, 16, 32); + let e: [u8; 32] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16, 2, 3, 3, 8, 3, 8, 15, 16, 3, 8, 15, 16, 29, 30, 31, 32]; + let r: [u8; 32] = transmute(vuzpq_p8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_p16() { + let a: i16x8 = i16x8::new(1, 2, 2, 3, 2, 3, 3, 8); + let b: i16x8 = i16x8::new(2, 3, 3, 8, 3, 15, 8, 16); + let e: [u16; 16] = [1, 2, 2, 3, 2, 3, 3, 8, 2, 3, 3, 8, 3, 8, 15, 16]; + let r: [u16; 16] = transmute(vuzpq_p16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_s32() { + let a: i32x2 = i32x2::new(1, 2); + let b: i32x2 = i32x2::new(2, 3); + let e: [i32; 4] = [1, 2, 2, 3]; + let r: [i32; 4] = transmute(vuzp_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_u32() { + let a: u32x2 = u32x2::new(1, 2); + let b: u32x2 = u32x2::new(2, 3); + let e: [u32; 4] = [1, 2, 2, 3]; + let r: [u32; 4] = transmute(vuzp_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzp_f32() { + let a: f32x2 = f32x2::new(1., 2.); + let b: f32x2 = f32x2::new(2., 6.); + let e: [f32; 4] = [1., 2., 2., 6.]; + let r: [f32; 4] = transmute(vuzp_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vuzpq_f32() { + let a: f32x4 = f32x4::new(1., 2., 2., 4.); + let b: f32x4 = f32x4::new(2., 6., 6., 8.); + let e: [f32; 8] = [1., 2., 2., 6., 2., 4., 6., 8.]; + let r: [f32; 8] = transmute(vuzpq_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabal_u8() { + let a: u16x8 = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: u8x8 = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let c: u8x8 = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let e: u16x8 = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let r: u16x8 = transmute(vabal_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabal_u16() { + let a: u32x4 = u32x4::new(1, 2, 3, 4); + let b: u16x4 = u16x4::new(1, 2, 3, 4); + let c: u16x4 = u16x4::new(10, 10, 10, 10); + let e: u32x4 = u32x4::new(10, 10, 10, 10); + let r: u32x4 = transmute(vabal_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabal_u32() { + let a: u64x2 = u64x2::new(1, 2); + let b: u32x2 = u32x2::new(1, 2); + let c: u32x2 = u32x2::new(10, 10); + let e: u64x2 = u64x2::new(10, 10); + let r: u64x2 = transmute(vabal_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabal_s8() { + let a: i16x8 = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b: i8x8 = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let c: i8x8 = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let e: i16x8 = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10); + let r: i16x8 = transmute(vabal_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabal_s16() { + let a: i32x4 = i32x4::new(1, 2, 3, 4); + let b: i16x4 = i16x4::new(1, 2, 3, 4); + let c: i16x4 = i16x4::new(10, 10, 10, 10); + let e: i32x4 = i32x4::new(10, 10, 10, 10); + let r: i32x4 = transmute(vabal_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabal_s32() { + let a: i64x2 = i64x2::new(1, 2); + let b: i32x2 = i32x2::new(1, 2); + let c: i32x2 = i32x2::new(10, 10); + let e: i64x2 = i64x2::new(10, 10); + let r: i64x2 = transmute(vabal_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqabs_s8() { + let a: i8x8 = i8x8::new(-128, 0x7F, -6, -5, -4, -3, -2, -1); + let e: i8x8 = i8x8::new(0x7F, 0x7F, 6, 5, 4, 3, 2, 1); + let r: i8x8 = transmute(vqabs_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqabsq_s8() { + let a: i8x16 = i8x16::new(-128, 0x7F, -6, -5, -4, -3, -2, -1, 0, -127, 127, 1, 2, 3, 4, 5); + let e: i8x16 = i8x16::new(0x7F, 0x7F, 6, 5, 4, 3, 2, 1, 0, 127, 127, 1, 2, 3, 4, 5); + let r: i8x16 = transmute(vqabsq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqabs_s16() { + let a: i16x4 = i16x4::new(-32768, 0x7F_FF, -6, -5); + let e: i16x4 = i16x4::new(0x7F_FF, 0x7F_FF, 6, 5); + let r: i16x4 = transmute(vqabs_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqabsq_s16() { + let a: i16x8 = i16x8::new(-32768, 0x7F_FF, -6, -5, -4, -3, -2, -1); + let e: i16x8 = i16x8::new(0x7F_FF, 0x7F_FF, 6, 5, 4, 3, 2, 1); + let r: i16x8 = transmute(vqabsq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqabs_s32() { + let a: i32x2 = i32x2::new(-2147483648, 0x7F_FF_FF_FF); + let e: i32x2 = i32x2::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF); + let r: i32x2 = transmute(vqabs_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqabsq_s32() { + let a: i32x4 = i32x4::new(-2147483648, 0x7F_FF_FF_FF, -6, -5); + let e: i32x4 = i32x4::new(0x7F_FF_FF_FF, 0x7F_FF_FF_FF, 6, 5); + let r: i32x4 = transmute(vqabsq_s32(transmute(a))); + assert_eq!(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs new file mode 100644 index 000000000..bbee29ae7 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/load_tests.rs @@ -0,0 +1,206 @@ +//! Tests for ARM+v7+neon load (vld1) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; +use std::mem; +use stdarch_test::simd_test; +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s8() { + let a: [i8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vld1_s8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s8() { + let a: [i8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x16 = transmute(vld1q_s8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s16() { + let a: [i16; 5] = [0, 1, 2, 3, 4]; + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vld1_s16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s16() { + let a: [i16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vld1q_s16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s32() { + let a: [i32; 3] = [0, 1, 2]; + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vld1_s32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s32() { + let a: [i32; 5] = [0, 1, 2, 3, 4]; + let e = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vld1q_s32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_s64() { + let a: [i64; 2] = [0, 1]; + let e = i64x1::new(1); + let r: i64x1 = transmute(vld1_s64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_s64() { + let a: [i64; 3] = [0, 1, 2]; + let e = i64x2::new(1, 2); + let r: i64x2 = transmute(vld1q_s64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u8() { + let a: [u8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vld1_u8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u8() { + let a: [u8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vld1q_u8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u16() { + let a: [u16; 5] = [0, 1, 2, 3, 4]; + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vld1_u16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u16() { + let a: [u16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vld1q_u16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u32() { + let a: [u32; 3] = [0, 1, 2]; + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vld1_u32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u32() { + let a: [u32; 5] = [0, 1, 2, 3, 4]; + let e = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vld1q_u32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_u64() { + let a: [u64; 2] = [0, 1]; + let e = u64x1::new(1); + let r: u64x1 = transmute(vld1_u64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_u64() { + let a: [u64; 3] = [0, 1, 2]; + let e = u64x2::new(1, 2); + let r: u64x2 = transmute(vld1q_u64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_p8() { + let a: [p8; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vld1_p8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_p8() { + let a: [p8; 17] = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16]; + let e = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x16 = transmute(vld1q_p8(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_p16() { + let a: [p16; 5] = [0, 1, 2, 3, 4]; + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vld1_p16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_p16() { + let a: [p16; 9] = [0, 1, 2, 3, 4, 5, 6, 7, 8]; + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vld1q_p16(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vld1_p64() { + let a: [p64; 2] = [0, 1]; + let e = u64x1::new(1); + let r: u64x1 = transmute(vld1_p64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vld1q_p64() { + let a: [p64; 3] = [0, 1, 2]; + let e = u64x2::new(1, 2); + let r: u64x2 = transmute(vld1q_p64(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1_f32() { + let a: [f32; 3] = [0., 1., 2.]; + let e = f32x2::new(1., 2.); + let r: f32x2 = transmute(vld1_f32(a[1..].as_ptr())); + assert_eq!(r, e) +} + +#[simd_test(enable = "neon")] +unsafe fn test_vld1q_f32() { + let a: [f32; 5] = [0., 1., 2., 3., 4.]; + let e = f32x4::new(1., 2., 3., 4.); + let r: f32x4 = transmute(vld1q_f32(a[1..].as_ptr())); + assert_eq!(r, e) +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs new file mode 100644 index 000000000..952d1ca2e --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/mod.rs @@ -0,0 +1,12347 @@ +//! ARMv7 NEON intrinsics + +#[rustfmt::skip] +mod generated; +#[rustfmt::skip] +pub use self::generated::*; + +use crate::{ + core_arch::simd::*, core_arch::simd_llvm::*, hint::unreachable_unchecked, mem::transmute, +}; +#[cfg(test)] +use stdarch_test::assert_instr; + +pub(crate) type p8 = u8; +pub(crate) type p16 = u16; +pub(crate) type p64 = u64; +pub(crate) type p128 = u128; + +types! { + /// ARM-specific 64-bit wide vector of eight packed `i8`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int8x8_t(pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8); + /// ARM-specific 64-bit wide vector of eight packed `u8`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint8x8_t(pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8); + /// ARM-specific 64-bit wide polynomial vector of eight packed `p8`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct poly8x8_t(pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8); + /// ARM-specific 64-bit wide vector of four packed `i16`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int16x4_t(pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16); + /// ARM-specific 64-bit wide vector of four packed `u16`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint16x4_t(pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16); + // FIXME: ARM-specific 64-bit wide vector of four packed `f16`. + // pub struct float16x4_t(f16, f16, f16, f16); + /// ARM-specific 64-bit wide vector of four packed `p16`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct poly16x4_t(pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16); + /// ARM-specific 64-bit wide vector of two packed `i32`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int32x2_t(pub(crate) i32, pub(crate) i32); + /// ARM-specific 64-bit wide vector of two packed `u32`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint32x2_t(pub(crate) u32, pub(crate) u32); + /// ARM-specific 64-bit wide vector of two packed `f32`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct float32x2_t(pub(crate) f32, pub(crate) f32); + /// ARM-specific 64-bit wide vector of one packed `i64`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int64x1_t(pub(crate) i64); + /// ARM-specific 64-bit wide vector of one packed `u64`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint64x1_t(pub(crate) u64); + /// ARM-specific 64-bit wide vector of one packed `p64`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct poly64x1_t(pub(crate) p64); + + /// ARM-specific 128-bit wide vector of sixteen packed `i8`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int8x16_t( + pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8 , pub(crate) i8, pub(crate) i8, + pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8, pub(crate) i8 , pub(crate) i8, pub(crate) i8, + ); + /// ARM-specific 128-bit wide vector of sixteen packed `u8`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint8x16_t( + pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, + pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, pub(crate) u8, pub(crate) u8 , pub(crate) u8, pub(crate) u8, + ); + /// ARM-specific 128-bit wide vector of sixteen packed `p8`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct poly8x16_t( + pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, + pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, pub(crate) p8, + ); + /// ARM-specific 128-bit wide vector of eight packed `i16`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int16x8_t(pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16, pub(crate) i16); + /// ARM-specific 128-bit wide vector of eight packed `u16`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint16x8_t(pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16, pub(crate) u16); + // FIXME: ARM-specific 128-bit wide vector of eight packed `f16`. + // pub struct float16x8_t(f16, f16, f16, f16, f16, f16, f16); + /// ARM-specific 128-bit wide vector of eight packed `p16`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct poly16x8_t(pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16, pub(crate) p16); + /// ARM-specific 128-bit wide vector of four packed `i32`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int32x4_t(pub(crate) i32, pub(crate) i32, pub(crate) i32, pub(crate) i32); + /// ARM-specific 128-bit wide vector of four packed `u32`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint32x4_t(pub(crate) u32, pub(crate) u32, pub(crate) u32, pub(crate) u32); + /// ARM-specific 128-bit wide vector of four packed `f32`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct float32x4_t(pub(crate) f32, pub(crate) f32, pub(crate) f32, pub(crate) f32); + /// ARM-specific 128-bit wide vector of two packed `i64`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct int64x2_t(pub(crate) i64, pub(crate) i64); + /// ARM-specific 128-bit wide vector of two packed `u64`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct uint64x2_t(pub(crate) u64, pub(crate) u64); + /// ARM-specific 128-bit wide vector of two packed `p64`. + #[cfg_attr(target_arch = "aarch64", stable(feature = "neon_intrinsics", since = "1.59.0"))] + pub struct poly64x2_t(pub(crate) p64, pub(crate) p64); +} + +/// ARM-specific type containing two `int8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int8x8x2_t(pub int8x8_t, pub int8x8_t); +/// ARM-specific type containing three `int8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int8x8x3_t(pub int8x8_t, pub int8x8_t, pub int8x8_t); +/// ARM-specific type containing four `int8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int8x8x4_t(pub int8x8_t, pub int8x8_t, pub int8x8_t, pub int8x8_t); + +/// ARM-specific type containing two `int8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int8x16x2_t(pub int8x16_t, pub int8x16_t); +/// ARM-specific type containing three `int8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int8x16x3_t(pub int8x16_t, pub int8x16_t, pub int8x16_t); +/// ARM-specific type containing four `int8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int8x16x4_t(pub int8x16_t, pub int8x16_t, pub int8x16_t, pub int8x16_t); + +/// ARM-specific type containing two `uint8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint8x8x2_t(pub uint8x8_t, pub uint8x8_t); +/// ARM-specific type containing three `uint8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint8x8x3_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); +/// ARM-specific type containing four `uint8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint8x8x4_t(pub uint8x8_t, pub uint8x8_t, pub uint8x8_t, pub uint8x8_t); + +/// ARM-specific type containing two `uint8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint8x16x2_t(pub uint8x16_t, pub uint8x16_t); +/// ARM-specific type containing three `uint8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint8x16x3_t(pub uint8x16_t, pub uint8x16_t, pub uint8x16_t); +/// ARM-specific type containing four `uint8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint8x16x4_t( + pub uint8x16_t, + pub uint8x16_t, + pub uint8x16_t, + pub uint8x16_t, +); + +/// ARM-specific type containing two `poly8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly8x8x2_t(pub poly8x8_t, pub poly8x8_t); +/// ARM-specific type containing three `poly8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly8x8x3_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); +/// ARM-specific type containing four `poly8x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly8x8x4_t(pub poly8x8_t, pub poly8x8_t, pub poly8x8_t, pub poly8x8_t); + +/// ARM-specific type containing two `poly8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly8x16x2_t(pub poly8x16_t, pub poly8x16_t); +/// ARM-specific type containing three `poly8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly8x16x3_t(pub poly8x16_t, pub poly8x16_t, pub poly8x16_t); +/// ARM-specific type containing four `poly8x16_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly8x16x4_t( + pub poly8x16_t, + pub poly8x16_t, + pub poly8x16_t, + pub poly8x16_t, +); + +/// ARM-specific type containing two `int16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int16x4x2_t(pub int16x4_t, pub int16x4_t); +/// ARM-specific type containing three `int16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int16x4x3_t(pub int16x4_t, pub int16x4_t, pub int16x4_t); +/// ARM-specific type containing four `int16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int16x4x4_t(pub int16x4_t, pub int16x4_t, pub int16x4_t, pub int16x4_t); + +/// ARM-specific type containing two `int16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int16x8x2_t(pub int16x8_t, pub int16x8_t); +/// ARM-specific type containing three `int16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int16x8x3_t(pub int16x8_t, pub int16x8_t, pub int16x8_t); +/// ARM-specific type containing four `int16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int16x8x4_t(pub int16x8_t, pub int16x8_t, pub int16x8_t, pub int16x8_t); + +/// ARM-specific type containing two `uint16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint16x4x2_t(pub uint16x4_t, pub uint16x4_t); +/// ARM-specific type containing three `uint16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint16x4x3_t(pub uint16x4_t, pub uint16x4_t, pub uint16x4_t); +/// ARM-specific type containing four `uint16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint16x4x4_t( + pub uint16x4_t, + pub uint16x4_t, + pub uint16x4_t, + pub uint16x4_t, +); + +/// ARM-specific type containing two `uint16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint16x8x2_t(pub uint16x8_t, pub uint16x8_t); +/// ARM-specific type containing three `uint16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint16x8x3_t(pub uint16x8_t, pub uint16x8_t, pub uint16x8_t); +/// ARM-specific type containing four `uint16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint16x8x4_t( + pub uint16x8_t, + pub uint16x8_t, + pub uint16x8_t, + pub uint16x8_t, +); + +/// ARM-specific type containing two `poly16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly16x4x2_t(pub poly16x4_t, pub poly16x4_t); +/// ARM-specific type containing three `poly16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly16x4x3_t(pub poly16x4_t, pub poly16x4_t, pub poly16x4_t); +/// ARM-specific type containing four `poly16x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly16x4x4_t( + pub poly16x4_t, + pub poly16x4_t, + pub poly16x4_t, + pub poly16x4_t, +); + +/// ARM-specific type containing two `poly16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly16x8x2_t(pub poly16x8_t, pub poly16x8_t); +/// ARM-specific type containing three `poly16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly16x8x3_t(pub poly16x8_t, pub poly16x8_t, pub poly16x8_t); +/// ARM-specific type containing four `poly16x8_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly16x8x4_t( + pub poly16x8_t, + pub poly16x8_t, + pub poly16x8_t, + pub poly16x8_t, +); + +/// ARM-specific type containing two `int32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int32x2x2_t(pub int32x2_t, pub int32x2_t); +/// ARM-specific type containing three `int32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int32x2x3_t(pub int32x2_t, pub int32x2_t, pub int32x2_t); +/// ARM-specific type containing four `int32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int32x2x4_t(pub int32x2_t, pub int32x2_t, pub int32x2_t, pub int32x2_t); + +/// ARM-specific type containing two `int32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int32x4x2_t(pub int32x4_t, pub int32x4_t); +/// ARM-specific type containing three `int32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int32x4x3_t(pub int32x4_t, pub int32x4_t, pub int32x4_t); +/// ARM-specific type containing four `int32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int32x4x4_t(pub int32x4_t, pub int32x4_t, pub int32x4_t, pub int32x4_t); + +/// ARM-specific type containing two `uint32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint32x2x2_t(pub uint32x2_t, pub uint32x2_t); +/// ARM-specific type containing three `uint32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint32x2x3_t(pub uint32x2_t, pub uint32x2_t, pub uint32x2_t); +/// ARM-specific type containing four `uint32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint32x2x4_t( + pub uint32x2_t, + pub uint32x2_t, + pub uint32x2_t, + pub uint32x2_t, +); + +/// ARM-specific type containing two `uint32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint32x4x2_t(pub uint32x4_t, pub uint32x4_t); +/// ARM-specific type containing three `uint32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint32x4x3_t(pub uint32x4_t, pub uint32x4_t, pub uint32x4_t); +/// ARM-specific type containing four `uint32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint32x4x4_t( + pub uint32x4_t, + pub uint32x4_t, + pub uint32x4_t, + pub uint32x4_t, +); + +/// ARM-specific type containing two `float32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct float32x2x2_t(pub float32x2_t, pub float32x2_t); +/// ARM-specific type containing three `float32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct float32x2x3_t(pub float32x2_t, pub float32x2_t, pub float32x2_t); +/// ARM-specific type containing four `float32x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct float32x2x4_t( + pub float32x2_t, + pub float32x2_t, + pub float32x2_t, + pub float32x2_t, +); + +/// ARM-specific type containing two `float32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct float32x4x2_t(pub float32x4_t, pub float32x4_t); +/// ARM-specific type containing three `float32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct float32x4x3_t(pub float32x4_t, pub float32x4_t, pub float32x4_t); +/// ARM-specific type containing four `float32x4_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct float32x4x4_t( + pub float32x4_t, + pub float32x4_t, + pub float32x4_t, + pub float32x4_t, +); + +/// ARM-specific type containing four `int64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int64x1x2_t(pub int64x1_t, pub int64x1_t); +/// ARM-specific type containing four `int64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int64x1x3_t(pub int64x1_t, pub int64x1_t, pub int64x1_t); +/// ARM-specific type containing four `int64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int64x1x4_t(pub int64x1_t, pub int64x1_t, pub int64x1_t, pub int64x1_t); + +/// ARM-specific type containing four `int64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int64x2x2_t(pub int64x2_t, pub int64x2_t); +/// ARM-specific type containing four `int64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int64x2x3_t(pub int64x2_t, pub int64x2_t, pub int64x2_t); +/// ARM-specific type containing four `int64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct int64x2x4_t(pub int64x2_t, pub int64x2_t, pub int64x2_t, pub int64x2_t); + +/// ARM-specific type containing four `uint64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint64x1x2_t(pub uint64x1_t, pub uint64x1_t); +/// ARM-specific type containing four `uint64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint64x1x3_t(pub uint64x1_t, pub uint64x1_t, pub uint64x1_t); +/// ARM-specific type containing four `uint64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint64x1x4_t( + pub uint64x1_t, + pub uint64x1_t, + pub uint64x1_t, + pub uint64x1_t, +); + +/// ARM-specific type containing four `uint64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint64x2x2_t(pub uint64x2_t, pub uint64x2_t); +/// ARM-specific type containing four `uint64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint64x2x3_t(pub uint64x2_t, pub uint64x2_t, pub uint64x2_t); +/// ARM-specific type containing four `uint64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct uint64x2x4_t( + pub uint64x2_t, + pub uint64x2_t, + pub uint64x2_t, + pub uint64x2_t, +); + +/// ARM-specific type containing four `poly64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly64x1x2_t(pub poly64x1_t, pub poly64x1_t); +/// ARM-specific type containing four `poly64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly64x1x3_t(pub poly64x1_t, pub poly64x1_t, pub poly64x1_t); +/// ARM-specific type containing four `poly64x1_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly64x1x4_t( + pub poly64x1_t, + pub poly64x1_t, + pub poly64x1_t, + pub poly64x1_t, +); + +/// ARM-specific type containing four `poly64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly64x2x2_t(pub poly64x2_t, pub poly64x2_t); +/// ARM-specific type containing four `poly64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly64x2x3_t(pub poly64x2_t, pub poly64x2_t, pub poly64x2_t); +/// ARM-specific type containing four `poly64x2_t` vectors. +#[derive(Copy, Clone, Debug)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub struct poly64x2x4_t( + pub poly64x2_t, + pub poly64x2_t, + pub poly64x2_t, + pub poly64x2_t, +); + +#[allow(improper_ctypes)] +extern "unadjusted" { + // absolute value (64-bit) + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i8")] + fn vabs_s8_(a: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i16")] + fn vabs_s16_(a: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v2i32")] + fn vabs_s32_(a: int32x2_t) -> int32x2_t; + // absolute value (128-bit) + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v16i8")] + fn vabsq_s8_(a: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v8i16")] + fn vabsq_s16_(a: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vabs.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.abs.v4i32")] + fn vabsq_s32_(a: int32x4_t) -> int32x4_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v8i8")] + fn vpmins_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v4i16")] + fn vpmins_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.sminp.v2i32")] + fn vpmins_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v8i8")] + fn vpminu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v4i16")] + fn vpminu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpminu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.uminp.v2i32")] + fn vpminu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmins.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fminp.v2f32")] + fn vpminf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v8i8")] + fn vpmaxs_v8i8(a: int8x8_t, b: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v4i16")] + fn vpmaxs_v4i16(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.smaxp.v2i32")] + fn vpmaxs_v2i32(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v8i8")] + fn vpmaxu_v8i8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v4i16")] + fn vpmaxu_v4i16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxu.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.umaxp.v2i32")] + fn vpmaxu_v2i32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpmaxs.v2f32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.fmaxp.v2f32")] + fn vpmaxf_v2f32(a: float32x2_t, b: float32x2_t) -> float32x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v8i8")] + fn vraddhn_s16_(a: int16x8_t, b: int16x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v4i16")] + fn vraddhn_s32_(a: int32x4_t, b: int32x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vraddhn.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.raddhn.v2i32")] + fn vraddhn_s64_(a: int64x2_t, b: int64x2_t) -> int32x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v4i16")] + fn vpadd_s16_(a: int16x4_t, b: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v2i32")] + fn vpadd_s32_(a: int32x2_t, b: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpadd.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.neon.addp.v8i8")] + fn vpadd_s8_(a: int8x8_t, b: int8x8_t) -> int8x8_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i16.v8i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.saddlp.v4i16.v8i8" + )] + pub(crate) fn vpaddl_s8_(a: int8x8_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i32.v4i16")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.saddlp.v2i32.v4i16" + )] + pub(crate) fn vpaddl_s16_(a: int16x4_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v1i64.v2i32")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.saddlp.v1i64.v2i32" + )] + pub(crate) fn vpaddl_s32_(a: int32x2_t) -> int64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v8i16.v16i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.saddlp.v8i16.v16i8" + )] + pub(crate) fn vpaddlq_s8_(a: int8x16_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v4i32.v8i16")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.saddlp.v4i32.v8i16" + )] + pub(crate) fn vpaddlq_s16_(a: int16x8_t) -> int32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddls.v2i64.v4i32")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.saddlp.v2i64.v4i32" + )] + pub(crate) fn vpaddlq_s32_(a: int32x4_t) -> int64x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i16.v8i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.uaddlp.v4i16.v8i8" + )] + pub(crate) fn vpaddl_u8_(a: uint8x8_t) -> uint16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i32.v4i16")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.uaddlp.v2i32.v4i16" + )] + pub(crate) fn vpaddl_u16_(a: uint16x4_t) -> uint32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v1i64.v2i32")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.uaddlp.v1i64.v2i32" + )] + pub(crate) fn vpaddl_u32_(a: uint32x2_t) -> uint64x1_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v8i16.v16i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.uaddlp.v8i16.v16i8" + )] + pub(crate) fn vpaddlq_u8_(a: uint8x16_t) -> uint16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v4i32.v8i16")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.uaddlp.v4i32.v8i16" + )] + pub(crate) fn vpaddlq_u16_(a: uint16x8_t) -> uint32x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.vpaddlu.v2i64.v4i32")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.uaddlp.v2i64.v4i32" + )] + pub(crate) fn vpaddlq_u32_(a: uint32x4_t) -> uint64x2_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v8i8")] + fn vcnt_s8_(a: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctpop.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctpop.v16i8")] + fn vcntq_s8_(a: int8x16_t) -> int8x16_t; + + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i8")] + fn vclz_s8_(a: int8x8_t) -> int8x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v16i8")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v16i8")] + fn vclzq_s8_(a: int8x16_t) -> int8x16_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i16")] + fn vclz_s16_(a: int16x4_t) -> int16x4_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v8i16")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v8i16")] + fn vclzq_s16_(a: int16x8_t) -> int16x8_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v2i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v2i32")] + fn vclz_s32_(a: int32x2_t) -> int32x2_t; + #[cfg_attr(target_arch = "arm", link_name = "llvm.ctlz.v4i32")] + #[cfg_attr(target_arch = "aarch64", link_name = "llvm.ctlz.v4i32")] + fn vclzq_s32_(a: int32x4_t) -> int32x4_t; +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_s8(ptr: *const i8, src: int8x8_t) -> int8x8_t { + static_assert_imm3!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 15))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_s8(ptr: *const i8, src: int8x16_t) -> int8x16_t { + static_assert_imm4!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_s16(ptr: *const i16, src: int16x4_t) -> int16x4_t { + static_assert_imm2!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_s16(ptr: *const i16, src: int16x8_t) -> int16x8_t { + static_assert_imm3!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_s32(ptr: *const i32, src: int32x2_t) -> int32x2_t { + static_assert_imm1!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_s32(ptr: *const i32, src: int32x4_t) -> int32x4_t { + static_assert_imm2!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr, LANE = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_s64(ptr: *const i64, src: int64x1_t) -> int64x1_t { + static_assert!(LANE : i32 where LANE == 0); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_s64(ptr: *const i64, src: int64x2_t) -> int64x2_t { + static_assert_imm1!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_u8(ptr: *const u8, src: uint8x8_t) -> uint8x8_t { + static_assert_imm3!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 15))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_u8(ptr: *const u8, src: uint8x16_t) -> uint8x16_t { + static_assert_imm4!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_u16(ptr: *const u16, src: uint16x4_t) -> uint16x4_t { + static_assert_imm2!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_u16(ptr: *const u16, src: uint16x8_t) -> uint16x8_t { + static_assert_imm3!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_u32(ptr: *const u32, src: uint32x2_t) -> uint32x2_t { + static_assert_imm1!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_u32(ptr: *const u32, src: uint32x4_t) -> uint32x4_t { + static_assert_imm2!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr, LANE = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_u64(ptr: *const u64, src: uint64x1_t) -> uint64x1_t { + static_assert!(LANE : i32 where LANE == 0); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_u64(ptr: *const u64, src: uint64x2_t) -> uint64x2_t { + static_assert_imm1!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 7))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_p8(ptr: *const p8, src: poly8x8_t) -> poly8x8_t { + static_assert_imm3!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8", LANE = 15))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 15))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_p8(ptr: *const p8, src: poly8x16_t) -> poly8x16_t { + static_assert_imm4!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_p16(ptr: *const p16, src: poly16x4_t) -> poly16x4_t { + static_assert_imm2!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16", LANE = 7))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 7))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_p16(ptr: *const p16, src: poly16x8_t) -> poly16x8_t { + static_assert_imm3!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr, LANE = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_p64(ptr: *const p64, src: poly64x1_t) -> poly64x1_t { + static_assert!(LANE : i32 where LANE == 0); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_p64(ptr: *const p64, src: poly64x2_t) -> poly64x2_t { + static_assert_imm1!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 1))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_lane_f32(ptr: *const f32, src: float32x2_t) -> float32x2_t { + static_assert_imm1!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure to one lane of one register. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32", LANE = 3))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1, LANE = 3))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_lane_f32(ptr: *const f32, src: float32x4_t) -> float32x4_t { + static_assert_imm2!(LANE); + simd_insert(src, LANE as u32, *ptr) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_s8(ptr: *const i8) -> int8x8_t { + let x = vld1_lane_s8::<0>(ptr, transmute(i8x8::splat(0))); + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_s8(ptr: *const i8) -> int8x16_t { + let x = vld1q_lane_s8::<0>(ptr, transmute(i8x16::splat(0))); + simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_s16(ptr: *const i16) -> int16x4_t { + let x = vld1_lane_s16::<0>(ptr, transmute(i16x4::splat(0))); + simd_shuffle4!(x, x, [0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_s16(ptr: *const i16) -> int16x8_t { + let x = vld1q_lane_s16::<0>(ptr, transmute(i16x8::splat(0))); + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_s32(ptr: *const i32) -> int32x2_t { + let x = vld1_lane_s32::<0>(ptr, transmute(i32x2::splat(0))); + simd_shuffle2!(x, x, [0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_s32(ptr: *const i32) -> int32x4_t { + let x = vld1q_lane_s32::<0>(ptr, transmute(i32x4::splat(0))); + simd_shuffle4!(x, x, [0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_s64(ptr: *const i64) -> int64x1_t { + #[cfg(target_arch = "aarch64")] + { + crate::core_arch::aarch64::vld1_s64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_s64(ptr) + } +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_s64(ptr: *const i64) -> int64x2_t { + let x = vld1q_lane_s64::<0>(ptr, transmute(i64x2::splat(0))); + simd_shuffle2!(x, x, [0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_u8(ptr: *const u8) -> uint8x8_t { + let x = vld1_lane_u8::<0>(ptr, transmute(u8x8::splat(0))); + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_u8(ptr: *const u8) -> uint8x16_t { + let x = vld1q_lane_u8::<0>(ptr, transmute(u8x16::splat(0))); + simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_u16(ptr: *const u16) -> uint16x4_t { + let x = vld1_lane_u16::<0>(ptr, transmute(u16x4::splat(0))); + simd_shuffle4!(x, x, [0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_u16(ptr: *const u16) -> uint16x8_t { + let x = vld1q_lane_u16::<0>(ptr, transmute(u16x8::splat(0))); + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_u32(ptr: *const u32) -> uint32x2_t { + let x = vld1_lane_u32::<0>(ptr, transmute(u32x2::splat(0))); + simd_shuffle2!(x, x, [0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_u32(ptr: *const u32) -> uint32x4_t { + let x = vld1q_lane_u32::<0>(ptr, transmute(u32x4::splat(0))); + simd_shuffle4!(x, x, [0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_u64(ptr: *const u64) -> uint64x1_t { + #[cfg(target_arch = "aarch64")] + { + crate::core_arch::aarch64::vld1_u64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_u64(ptr) + } +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_u64(ptr: *const u64) -> uint64x2_t { + let x = vld1q_lane_u64::<0>(ptr, transmute(u64x2::splat(0))); + simd_shuffle2!(x, x, [0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_p8(ptr: *const p8) -> poly8x8_t { + let x = vld1_lane_p8::<0>(ptr, transmute(u8x8::splat(0))); + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_p8(ptr: *const p8) -> poly8x16_t { + let x = vld1q_lane_p8::<0>(ptr, transmute(u8x16::splat(0))); + simd_shuffle16!(x, x, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_p16(ptr: *const p16) -> poly16x4_t { + let x = vld1_lane_p16::<0>(ptr, transmute(u16x4::splat(0))); + simd_shuffle4!(x, x, [0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_p16(ptr: *const p16) -> poly16x8_t { + let x = vld1q_lane_p16::<0>(ptr, transmute(u16x8::splat(0))); + simd_shuffle8!(x, x, [0, 0, 0, 0, 0, 0, 0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_f32(ptr: *const f32) -> float32x2_t { + let x = vld1_lane_f32::<0>(ptr, transmute(f32x2::splat(0.))); + simd_shuffle2!(x, x, [0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ldr))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1_dup_p64(ptr: *const p64) -> poly64x1_t { + #[cfg(target_arch = "aarch64")] + { + crate::core_arch::aarch64::vld1_p64(ptr) + } + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::vld1_p64(ptr) + } +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon,aes")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vldr"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_p64(ptr: *const p64) -> poly64x2_t { + let x = vld1q_lane_p64::<0>(ptr, transmute(u64x2::splat(0))); + simd_shuffle2!(x, x, [0, 0]) +} + +/// Load one single-element structure and Replicate to all lanes (of one register). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vld1.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ld1r))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vld1q_dup_f32(ptr: *const f32) -> float32x4_t { + let x = vld1q_lane_f32::<0>(ptr, transmute(f32x4::splat(0.))); + simd_shuffle4!(x, x, [0, 0, 0, 0]) +} + +// signed absolute difference and accumulate (64-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaba_s8(a: int8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_add(a, vabd_s8(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaba_s16(a: int16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_add(a, vabd_s16(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaba_s32(a: int32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_add(a, vabd_s32(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaba_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_add(a, vabd_u8(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaba_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_add(a, vabd_u16(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaba_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_add(a, vabd_u32(b, c)) +} +// signed absolute difference and accumulate (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabaq_s8(a: int8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_add(a, vabdq_s8(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabaq_s16(a: int16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_add(a, vabdq_s16(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.s32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("saba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabaq_s32(a: int32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_add(a, vabdq_s32(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabaq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_add(a, vabdq_u8(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabaq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_add(a, vabdq_u16(b, c)) +} +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vaba.u32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("uaba"))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabaq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_add(a, vabdq_u32(b, c)) +} + +/// Absolute value (wrapping). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabs_s8(a: int8x8_t) -> int8x8_t { + vabs_s8_(a) +} +/// Absolute value (wrapping). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabs_s16(a: int16x4_t) -> int16x4_t { + vabs_s16_(a) +} +/// Absolute value (wrapping). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabs_s32(a: int32x2_t) -> int32x2_t { + vabs_s32_(a) +} +/// Absolute value (wrapping). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabsq_s8(a: int8x16_t) -> int8x16_t { + vabsq_s8_(a) +} +/// Absolute value (wrapping). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabsq_s16(a: int16x8_t) -> int16x8_t { + vabsq_s16_(a) +} +/// Absolute value (wrapping). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vabs))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(abs))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vabsq_s32(a: int32x4_t) -> int32x4_t { + vabsq_s32_(a) +} + +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + vpadd_s16_(a, b) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + vpadd_s32_(a, b) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vpadd_s8_(a, b) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + transmute(vpadd_s16_(transmute(a), transmute(b))) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + transmute(vpadd_s32_(transmute(a), transmute(b))) +} +/// Add pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + transmute(vpadd_s8_(transmute(a), transmute(b))) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(add))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vadd_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + simd_add(a, b) +} + +/// Vector add. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vadd))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fadd))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t { + simd_add(a, b) +} + +/// Signed Add Long (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_s8(a: int8x8_t, b: int8x8_t) -> int16x8_t { + let a: int16x8_t = simd_cast(a); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Long (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_s16(a: int16x4_t, b: int16x4_t) -> int32x4_t { + let a: int32x4_t = simd_cast(a); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Long (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_s32(a: int32x2_t, b: int32x2_t) -> int64x2_t { + let a: int64x2_t = simd_cast(a); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Long (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_u8(a: uint8x8_t, b: uint8x8_t) -> uint16x8_t { + let a: uint16x8_t = simd_cast(a); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Long (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_u16(a: uint16x4_t, b: uint16x4_t) -> uint32x4_t { + let a: uint32x4_t = simd_cast(a); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Long (vector). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_u32(a: uint32x2_t, b: uint32x2_t) -> uint64x2_t { + let a: uint64x2_t = simd_cast(a); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Long (vector, high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_high_s8(a: int8x16_t, b: int8x16_t) -> int16x8_t { + let a: int8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: int16x8_t = simd_cast(a); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Long (vector, high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t { + let a: int16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let a: int32x4_t = simd_cast(a); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Long (vector, high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddl2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t { + let a: int32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); + let a: int64x2_t = simd_cast(a); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Long (vector, high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_high_u8(a: uint8x16_t, b: uint8x16_t) -> uint16x8_t { + let a: uint8x8_t = simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let a: uint16x8_t = simd_cast(a); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Long (vector, high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_high_u16(a: uint16x8_t, b: uint16x8_t) -> uint32x4_t { + let a: uint16x4_t = simd_shuffle4!(a, a, [4, 5, 6, 7]); + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let a: uint32x4_t = simd_cast(a); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Long (vector, high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddl2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddl_high_u32(a: uint32x4_t, b: uint32x4_t) -> uint64x2_t { + let a: uint32x2_t = simd_shuffle2!(a, a, [2, 3]); + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); + let a: uint64x2_t = simd_cast(a); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Wide. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_s8(a: int16x8_t, b: int8x8_t) -> int16x8_t { + let b: int16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Wide. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_s16(a: int32x4_t, b: int16x4_t) -> int32x4_t { + let b: int32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Wide. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_s32(a: int64x2_t, b: int32x2_t) -> int64x2_t { + let b: int64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Wide. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_u8(a: uint16x8_t, b: uint8x8_t) -> uint16x8_t { + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Wide. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_u16(a: uint32x4_t, b: uint16x4_t) -> uint32x4_t { + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Wide. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t { + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Wide (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_high_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + let b: int8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: int16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Wide (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_high_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + let b: int16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let b: int32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Signed Add Wide (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddw2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_high_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + let b: int32x2_t = simd_shuffle2!(b, b, [2, 3]); + let b: int64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Wide (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_high_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + let b: uint8x8_t = simd_shuffle8!(b, b, [8, 9, 10, 11, 12, 13, 14, 15]); + let b: uint16x8_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Wide (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_high_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + let b: uint16x4_t = simd_shuffle4!(b, b, [4, 5, 6, 7]); + let b: uint32x4_t = simd_cast(b); + simd_add(a, b) +} + +/// Unsigned Add Wide (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddw))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddw2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddw_high_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + let b: uint32x2_t = simd_shuffle2!(b, b, [2, 3]); + let b: uint64x2_t = simd_cast(b); + simd_add(a, b) +} + +/// Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + simd_cast(simd_shr(simd_add(a, b), int16x8_t(8, 8, 8, 8, 8, 8, 8, 8))) +} + +/// Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + simd_cast(simd_shr(simd_add(a, b), int32x4_t(16, 16, 16, 16))) +} + +/// Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + simd_cast(simd_shr(simd_add(a, b), int64x2_t(32, 32))) +} + +/// Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + simd_cast(simd_shr(simd_add(a, b), uint16x8_t(8, 8, 8, 8, 8, 8, 8, 8))) +} + +/// Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + simd_cast(simd_shr(simd_add(a, b), uint32x4_t(16, 16, 16, 16))) +} + +/// Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + simd_cast(simd_shr(simd_add(a, b), uint64x2_t(32, 32))) +} + +/// Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { + let x = simd_cast(simd_shr(simd_add(a, b), int16x8_t(8, 8, 8, 8, 8, 8, 8, 8))); + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { + let x = simd_cast(simd_shr(simd_add(a, b), int32x4_t(16, 16, 16, 16))); + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { + let x = simd_cast(simd_shr(simd_add(a, b), int64x2_t(32, 32))); + simd_shuffle4!(r, x, [0, 1, 2, 3]) +} + +/// Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t { + let x = simd_cast(simd_shr(simd_add(a, b), uint16x8_t(8, 8, 8, 8, 8, 8, 8, 8))); + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t { + let x = simd_cast(simd_shr(simd_add(a, b), uint32x4_t(16, 16, 16, 16))); + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vaddhn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(addhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vaddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t { + let x = simd_cast(simd_shr(simd_add(a, b), uint64x2_t(32, 32))); + simd_shuffle4!(r, x, [0, 1, 2, 3]) +} + +/// Rounding Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_s16(a: int16x8_t, b: int16x8_t) -> int8x8_t { + vraddhn_s16_(a, b) +} + +/// Rounding Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_s32(a: int32x4_t, b: int32x4_t) -> int16x4_t { + vraddhn_s32_(a, b) +} + +/// Rounding Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_s64(a: int64x2_t, b: int64x2_t) -> int32x2_t { + vraddhn_s64_(a, b) +} + +/// Rounding Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_u16(a: uint16x8_t, b: uint16x8_t) -> uint8x8_t { + transmute(vraddhn_s16_(transmute(a), transmute(b))) +} + +/// Rounding Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_u32(a: uint32x4_t, b: uint32x4_t) -> uint16x4_t { + transmute(vraddhn_s32_(transmute(a), transmute(b))) +} + +/// Rounding Add returning High Narrow. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_u64(a: uint64x2_t, b: uint64x2_t) -> uint32x2_t { + transmute(vraddhn_s64_(transmute(a), transmute(b))) +} + +/// Rounding Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_high_s16(r: int8x8_t, a: int16x8_t, b: int16x8_t) -> int8x16_t { + let x = vraddhn_s16_(a, b); + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Rounding Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_high_s32(r: int16x4_t, a: int32x4_t, b: int32x4_t) -> int16x8_t { + let x = vraddhn_s32_(a, b); + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Rounding Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_high_s64(r: int32x2_t, a: int64x2_t, b: int64x2_t) -> int32x4_t { + let x = vraddhn_s64_(a, b); + simd_shuffle4!(r, x, [0, 1, 2, 3]) +} + +/// Rounding Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_high_u16(r: uint8x8_t, a: uint16x8_t, b: uint16x8_t) -> uint8x16_t { + let x: uint8x8_t = transmute(vraddhn_s16_(transmute(a), transmute(b))); + simd_shuffle16!(r, x, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Rounding Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_high_u32(r: uint16x4_t, a: uint32x4_t, b: uint32x4_t) -> uint16x8_t { + let x: uint16x4_t = transmute(vraddhn_s32_(transmute(a), transmute(b))); + simd_shuffle8!(r, x, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Rounding Add returning High Narrow (high half). +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vraddhn.i64))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(raddhn2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vraddhn_high_u64(r: uint32x2_t, a: uint64x2_t, b: uint64x2_t) -> uint32x4_t { + let x: uint32x2_t = transmute(vraddhn_s64_(transmute(a), transmute(b))); + simd_shuffle4!(r, x, [0, 1, 2, 3]) +} + +/// Signed Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddl_s8(a: int8x8_t) -> int16x4_t { + vpaddl_s8_(a) +} + +/// Signed Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddl_s16(a: int16x4_t) -> int32x2_t { + vpaddl_s16_(a) +} + +/// Signed Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddl_s32(a: int32x2_t) -> int64x1_t { + vpaddl_s32_(a) +} + +/// Signed Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddlq_s8(a: int8x16_t) -> int16x8_t { + vpaddlq_s8_(a) +} + +/// Signed Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddlq_s16(a: int16x8_t) -> int32x4_t { + vpaddlq_s16_(a) +} + +/// Signed Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(saddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddlq_s32(a: int32x4_t) -> int64x2_t { + vpaddlq_s32_(a) +} + +/// Unsigned Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddl_u8(a: uint8x8_t) -> uint16x4_t { + vpaddl_u8_(a) +} + +/// Unsigned Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddl_u16(a: uint16x4_t) -> uint32x2_t { + vpaddl_u16_(a) +} + +/// Unsigned Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddl_u32(a: uint32x2_t) -> uint64x1_t { + vpaddl_u32_(a) +} + +/// Unsigned Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddlq_u8(a: uint8x16_t) -> uint16x8_t { + vpaddlq_u8_(a) +} + +/// Unsigned Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddlq_u16(a: uint16x8_t) -> uint32x4_t { + vpaddlq_u16_(a) +} + +/// Unsigned Add Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpaddl.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uaddlp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpaddlq_u32(a: uint32x4_t) -> uint64x2_t { + vpaddlq_u32_(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovn_s16(a: int16x8_t) -> int8x8_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovn_s32(a: int32x4_t) -> int16x4_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovn_s64(a: int64x2_t) -> int32x2_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovn_u16(a: uint16x8_t) -> uint8x8_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovn_u32(a: uint32x4_t) -> uint16x4_t { + simd_cast(a) +} + +/// Vector narrow integer. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(xtn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovn_u64(a: uint64x2_t) -> uint32x2_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovl_s8(a: int8x8_t) -> int16x8_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovl_s16(a: int16x4_t) -> int32x4_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sxtl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovl_s32(a: int32x2_t) -> int64x2_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovl_u8(a: uint8x8_t) -> uint16x8_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovl_u16(a: uint16x4_t) -> uint32x4_t { + simd_cast(a) +} + +/// Vector long move. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmovl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uxtl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovl_u32(a: uint32x2_t) -> uint64x2_t { + simd_cast(a) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_s8(a: int8x8_t) -> int8x8_t { + let b = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_s8(a: int8x16_t) -> int8x16_t { + let b = int8x16_t( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_s16(a: int16x4_t) -> int16x4_t { + let b = int16x4_t(-1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_s16(a: int16x8_t) -> int16x8_t { + let b = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_s32(a: int32x2_t) -> int32x2_t { + let b = int32x2_t(-1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_s32(a: int32x4_t) -> int32x4_t { + let b = int32x4_t(-1, -1, -1, -1); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_u8(a: uint8x8_t) -> uint8x8_t { + let b = uint8x8_t(255, 255, 255, 255, 255, 255, 255, 255); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_u8(a: uint8x16_t) -> uint8x16_t { + let b = uint8x16_t( + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_u16(a: uint16x4_t) -> uint16x4_t { + let b = uint16x4_t(65_535, 65_535, 65_535, 65_535); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_u16(a: uint16x8_t) -> uint16x8_t { + let b = uint16x8_t( + 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, 65_535, + ); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_u32(a: uint32x2_t) -> uint32x2_t { + let b = uint32x2_t(4_294_967_295, 4_294_967_295); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_u32(a: uint32x4_t) -> uint32x4_t { + let b = uint32x4_t(4_294_967_295, 4_294_967_295, 4_294_967_295, 4_294_967_295); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvn_p8(a: poly8x8_t) -> poly8x8_t { + let b = poly8x8_t(255, 255, 255, 255, 255, 255, 255, 255); + simd_xor(a, b) +} + +/// Vector bitwise not. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vmvn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(mvn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmvnq_p8(a: poly8x16_t) -> poly8x16_t { + let b = poly8x16_t( + 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, + ); + simd_xor(a, b) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + let c = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + let c = int8x16_t( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + let c = int16x4_t(-1, -1, -1, -1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + let c = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + let c = int32x2_t(-1, -1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + let c = int32x4_t(-1, -1, -1, -1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + let c = int64x1_t(-1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + let c = int64x2_t(-1, -1); + simd_and(simd_xor(b, c), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c = int8x16_t( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c = int16x4_t(-1, -1, -1, -1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c = int32x2_t(-1, -1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c = int32x4_t(-1, -1, -1, -1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbic_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + let c = int64x1_t(-1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise bit clear +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbic))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bic))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbicq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + let c = int64x2_t(-1, -1); + simd_and(simd_xor(b, transmute(c)), a) +} + +/// Bitwise Select instructions. This instruction sets each bit in the destination SIMD&FP register +/// to the corresponding bit from the first source SIMD&FP register when the original +/// destination bit was 1, otherwise from the second source SIMD&FP register. + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_s8(a: uint8x8_t, b: int8x8_t, c: int8x8_t) -> int8x8_t { + simd_select(transmute::<_, int8x8_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_s16(a: uint16x4_t, b: int16x4_t, c: int16x4_t) -> int16x4_t { + simd_select(transmute::<_, int16x4_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_s32(a: uint32x2_t, b: int32x2_t, c: int32x2_t) -> int32x2_t { + simd_select(transmute::<_, int32x2_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_s64(a: uint64x1_t, b: int64x1_t, c: int64x1_t) -> int64x1_t { + simd_select(transmute::<_, int64x1_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_u8(a: uint8x8_t, b: uint8x8_t, c: uint8x8_t) -> uint8x8_t { + simd_select(transmute::<_, int8x8_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_u16(a: uint16x4_t, b: uint16x4_t, c: uint16x4_t) -> uint16x4_t { + simd_select(transmute::<_, int16x4_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_u32(a: uint32x2_t, b: uint32x2_t, c: uint32x2_t) -> uint32x2_t { + simd_select(transmute::<_, int32x2_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_u64(a: uint64x1_t, b: uint64x1_t, c: uint64x1_t) -> uint64x1_t { + simd_select(transmute::<_, int64x1_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_f32(a: uint32x2_t, b: float32x2_t, c: float32x2_t) -> float32x2_t { + simd_select(transmute::<_, int32x2_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_p8(a: uint8x8_t, b: poly8x8_t, c: poly8x8_t) -> poly8x8_t { + simd_select(transmute::<_, int8x8_t>(a), b, c) +} + +/// Bitwise Select. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbsl_p16(a: uint16x4_t, b: poly16x4_t, c: poly16x4_t) -> poly16x4_t { + simd_select(transmute::<_, int16x4_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_s8(a: uint8x16_t, b: int8x16_t, c: int8x16_t) -> int8x16_t { + simd_select(transmute::<_, int8x16_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_s16(a: uint16x8_t, b: int16x8_t, c: int16x8_t) -> int16x8_t { + simd_select(transmute::<_, int16x8_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_s32(a: uint32x4_t, b: int32x4_t, c: int32x4_t) -> int32x4_t { + simd_select(transmute::<_, int32x4_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_s64(a: uint64x2_t, b: int64x2_t, c: int64x2_t) -> int64x2_t { + simd_select(transmute::<_, int64x2_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_u8(a: uint8x16_t, b: uint8x16_t, c: uint8x16_t) -> uint8x16_t { + simd_select(transmute::<_, int8x16_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_u16(a: uint16x8_t, b: uint16x8_t, c: uint16x8_t) -> uint16x8_t { + simd_select(transmute::<_, int16x8_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_u32(a: uint32x4_t, b: uint32x4_t, c: uint32x4_t) -> uint32x4_t { + simd_select(transmute::<_, int32x4_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_u64(a: uint64x2_t, b: uint64x2_t, c: uint64x2_t) -> uint64x2_t { + simd_select(transmute::<_, int64x2_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_p8(a: uint8x16_t, b: poly8x16_t, c: poly8x16_t) -> poly8x16_t { + simd_select(transmute::<_, int8x16_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_p16(a: uint16x8_t, b: poly16x8_t, c: poly16x8_t) -> poly16x8_t { + simd_select(transmute::<_, int16x8_t>(a), b, c) +} + +/// Bitwise Select. (128-bit) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vbsl))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(bsl))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vbslq_f32(a: uint32x4_t, b: float32x4_t, c: float32x4_t) -> float32x4_t { + simd_select(transmute::<_, int32x4_t>(a), b, c) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + let c = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_s8(a: int8x16_t, b: int8x16_t) -> int8x16_t { + let c = int8x16_t( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + let c = int16x4_t(-1, -1, -1, -1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_s16(a: int16x8_t, b: int16x8_t) -> int16x8_t { + let c = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + let c = int32x2_t(-1, -1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_s32(a: int32x4_t, b: int32x4_t) -> int32x4_t { + let c = int32x4_t(-1, -1, -1, -1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_s64(a: int64x1_t, b: int64x1_t) -> int64x1_t { + let c = int64x1_t(-1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_s64(a: int64x2_t, b: int64x2_t) -> int64x2_t { + let c = int64x2_t(-1, -1); + simd_or(simd_xor(b, c), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + let c = int8x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_u8(a: uint8x16_t, b: uint8x16_t) -> uint8x16_t { + let c = int8x16_t( + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, + ); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + let c = int16x4_t(-1, -1, -1, -1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_u16(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t { + let c = int16x8_t(-1, -1, -1, -1, -1, -1, -1, -1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + let c = int32x2_t(-1, -1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t { + let c = int32x4_t(-1, -1, -1, -1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vorn_u64(a: uint64x1_t, b: uint64x1_t) -> uint64x1_t { + let c = int64x1_t(-1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Vector bitwise inclusive OR NOT +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vorn))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(orn))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vornq_u64(a: uint64x2_t, b: uint64x2_t) -> uint64x2_t { + let c = int64x2_t(-1, -1); + simd_or(simd_xor(b, transmute(c)), a) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vpmins_v8i8(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + vpmins_v4i16(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + vpmins_v2i32(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + vpminu_v8i8(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + vpminu_v4i16(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + vpminu_v2i32(a, b) +} + +/// Folding minimum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmin))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fminp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmin_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + vpminf_v2f32(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_s8(a: int8x8_t, b: int8x8_t) -> int8x8_t { + vpmaxs_v8i8(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_s16(a: int16x4_t, b: int16x4_t) -> int16x4_t { + vpmaxs_v4i16(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_s32(a: int32x2_t, b: int32x2_t) -> int32x2_t { + vpmaxs_v2i32(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_u8(a: uint8x8_t, b: uint8x8_t) -> uint8x8_t { + vpmaxu_v8i8(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_u16(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t { + vpmaxu_v4i16(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(umaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_u32(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t { + vpmaxu_v2i32(a, b) +} + +/// Folding maximum of adjacent pairs +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpmax))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmaxp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpmax_f32(a: float32x2_t, b: float32x2_t) -> float32x2_t { + vpmaxf_v2f32(a, b) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_u64(v: uint64x2_t) -> u64 { + static_assert_imm1!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_u64(v: uint64x1_t) -> u64 { + static_assert!(IMM5 : i32 where IMM5 == 0); + simd_extract(v, 0) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_u16(v: uint16x4_t) -> u16 { + static_assert_imm2!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_s16(v: int16x4_t) -> i16 { + static_assert_imm2!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_p16(v: poly16x4_t) -> p16 { + static_assert_imm2!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_u32(v: uint32x2_t) -> u32 { + static_assert_imm1!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_s32(v: int32x2_t) -> i32 { + static_assert_imm1!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_f32(v: float32x2_t) -> f32 { + static_assert_imm1!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 1))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_f32(v: float32x4_t) -> f32 { + static_assert_imm2!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_p64(v: poly64x1_t) -> p64 { + static_assert!(IMM5 : i32 where IMM5 == 0); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_p64(v: poly64x2_t) -> p64 { + static_assert_imm1!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_s64(v: int64x1_t) -> i64 { + static_assert!(IMM5 : i32 where IMM5 == 0); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 0))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_s64(v: int64x2_t) -> i64 { + static_assert_imm1!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_u16(v: uint16x8_t) -> u16 { + static_assert_imm3!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_u32(v: uint32x4_t) -> u32 { + static_assert_imm2!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_s16(v: int16x8_t) -> i16 { + static_assert_imm3!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_p16(v: poly16x8_t) -> p16 { + static_assert_imm3!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_s32(v: int32x4_t) -> i32 { + static_assert_imm2!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_u8(v: uint8x8_t) -> u8 { + static_assert_imm3!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_s8(v: int8x8_t) -> i8 { + static_assert_imm3!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_lane_p8(v: poly8x8_t) -> p8 { + static_assert_imm3!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_u8(v: uint8x16_t) -> u8 { + static_assert_imm4!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_s8(v: int8x16_t) -> i8 { + static_assert_imm4!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Move vector element to general-purpose register +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[rustc_legacy_const_generics(1)] +#[cfg_attr(test, assert_instr(nop, IMM5 = 2))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vgetq_lane_p8(v: poly8x16_t) -> p8 { + static_assert_imm4!(IMM5); + simd_extract(v, IMM5 as u32) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_s8(a: int8x16_t) -> int8x8_t { + simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_s16(a: int16x8_t) -> int16x4_t { + simd_shuffle4!(a, a, [4, 5, 6, 7]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_s32(a: int32x4_t) -> int32x2_t { + simd_shuffle2!(a, a, [2, 3]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_s64(a: int64x2_t) -> int64x1_t { + int64x1_t(simd_extract(a, 1)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_u8(a: uint8x16_t) -> uint8x8_t { + simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_u16(a: uint16x8_t) -> uint16x4_t { + simd_shuffle4!(a, a, [4, 5, 6, 7]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_u32(a: uint32x4_t) -> uint32x2_t { + simd_shuffle2!(a, a, [2, 3]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_u64(a: uint64x2_t) -> uint64x1_t { + uint64x1_t(simd_extract(a, 1)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_p8(a: poly8x16_t) -> poly8x8_t { + simd_shuffle8!(a, a, [8, 9, 10, 11, 12, 13, 14, 15]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_p16(a: poly16x8_t) -> poly16x4_t { + simd_shuffle4!(a, a, [4, 5, 6, 7]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ext))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_high_f32(a: float32x4_t) -> float32x2_t { + simd_shuffle2!(a, a, [2, 3]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "vget_low_s8", since = "1.60.0") +)] +pub unsafe fn vget_low_s8(a: int8x16_t) -> int8x8_t { + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_s16(a: int16x8_t) -> int16x4_t { + simd_shuffle4!(a, a, [0, 1, 2, 3]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_s32(a: int32x4_t) -> int32x2_t { + simd_shuffle2!(a, a, [0, 1]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_s64(a: int64x2_t) -> int64x1_t { + int64x1_t(simd_extract(a, 0)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_u8(a: uint8x16_t) -> uint8x8_t { + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_u16(a: uint16x8_t) -> uint16x4_t { + simd_shuffle4!(a, a, [0, 1, 2, 3]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_u32(a: uint32x4_t) -> uint32x2_t { + simd_shuffle2!(a, a, [0, 1]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_u64(a: uint64x2_t) -> uint64x1_t { + uint64x1_t(simd_extract(a, 0)) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_p8(a: poly8x16_t) -> poly8x8_t { + simd_shuffle8!(a, a, [0, 1, 2, 3, 4, 5, 6, 7]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_p16(a: poly16x8_t) -> poly16x4_t { + simd_shuffle4!(a, a, [0, 1, 2, 3]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(test, assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vget_low_f32(a: float32x4_t) -> float32x2_t { + simd_shuffle2!(a, a, [0, 1]) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_s8(value: i8) -> int8x16_t { + int8x16_t( + value, value, value, value, value, value, value, value, value, value, value, value, value, + value, value, value, + ) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_s16(value: i16) -> int16x8_t { + int16x8_t(value, value, value, value, value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_s32(value: i32) -> int32x4_t { + int32x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_s64(value: i64) -> int64x2_t { + int64x2_t(value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_u8(value: u8) -> uint8x16_t { + uint8x16_t( + value, value, value, value, value, value, value, value, value, value, value, value, value, + value, value, value, + ) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_u16(value: u16) -> uint16x8_t { + uint16x8_t(value, value, value, value, value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_u32(value: u32) -> uint32x4_t { + uint32x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_u64(value: u64) -> uint64x2_t { + uint64x2_t(value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_p8(value: p8) -> poly8x16_t { + poly8x16_t( + value, value, value, value, value, value, value, value, value, value, value, value, value, + value, value, value, + ) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_p16(value: p16) -> poly16x8_t { + poly16x8_t(value, value, value, value, value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdupq_n_f32(value: f32) -> float32x4_t { + float32x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +/// +/// Private vfp4 version used by FMA intriniscs because LLVM does +/// not inline the non-vfp4 version in vfp4 functions. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +unsafe fn vdupq_n_f32_vfp4(value: f32) -> float32x4_t { + float32x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_s8(value: i8) -> int8x8_t { + int8x8_t(value, value, value, value, value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_s16(value: i16) -> int16x4_t { + int16x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_s32(value: i32) -> int32x2_t { + int32x2_t(value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_s64(value: i64) -> int64x1_t { + int64x1_t(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_u8(value: u8) -> uint8x8_t { + uint8x8_t(value, value, value, value, value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_u16(value: u16) -> uint16x4_t { + uint16x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_u32(value: u32) -> uint32x2_t { + uint32x2_t(value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_u64(value: u64) -> uint64x1_t { + uint64x1_t(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_p8(value: p8) -> poly8x8_t { + poly8x8_t(value, value, value, value, value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_p16(value: p16) -> poly16x4_t { + poly16x4_t(value, value, value, value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vdup_n_f32(value: f32) -> float32x2_t { + float32x2_t(value, value) +} + +/// Duplicate vector element to vector or scalar +/// +/// Private vfp4 version used by FMA intriniscs because LLVM does +/// not inline the non-vfp4 version in vfp4 functions. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "vfp4"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +unsafe fn vdup_n_f32_vfp4(value: f32) -> float32x2_t { + float32x2_t(value, value) +} + +/// Load SIMD&FP register (immediate offset) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vldrq_p128(a: *const p128) -> p128 { + *a +} + +/// Store SIMD&FP register (immediate offset) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(nop))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vstrq_p128(a: *mut p128, b: p128) { + *a = b; +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_s8(value: i8) -> int8x8_t { + vdup_n_s8(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_s16(value: i16) -> int16x4_t { + vdup_n_s16(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_s32(value: i32) -> int32x2_t { + vdup_n_s32(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_s64(value: i64) -> int64x1_t { + vdup_n_s64(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_u8(value: u8) -> uint8x8_t { + vdup_n_u8(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_u16(value: u16) -> uint16x4_t { + vdup_n_u16(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_u32(value: u32) -> uint32x2_t { + vdup_n_u32(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(fmov))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_u64(value: u64) -> uint64x1_t { + vdup_n_u64(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_p8(value: p8) -> poly8x8_t { + vdup_n_p8(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_p16(value: p16) -> poly16x4_t { + vdup_n_p16(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmov_n_f32(value: f32) -> float32x2_t { + vdup_n_f32(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_s8(value: i8) -> int8x16_t { + vdupq_n_s8(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_s16(value: i16) -> int16x8_t { + vdupq_n_s16(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_s32(value: i32) -> int32x4_t { + vdupq_n_s32(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_s64(value: i64) -> int64x2_t { + vdupq_n_s64(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_u8(value: u8) -> uint8x16_t { + vdupq_n_u8(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_u16(value: u16) -> uint16x8_t { + vdupq_n_u16(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_u32(value: u32) -> uint32x4_t { + vdupq_n_u32(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vmov"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_u64(value: u64) -> uint64x2_t { + vdupq_n_u64(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_p8(value: p8) -> poly8x16_t { + vdupq_n_p8(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_p16(value: p16) -> poly16x8_t { + vdupq_n_p16(value) +} + +/// Duplicate vector element to vector or scalar +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vdup.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(dup))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vmovq_n_f32(value: f32) -> float32x4_t { + vdupq_n_f32(value) +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("nop", N = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vext_s64(a: int64x1_t, _b: int64x1_t) -> int64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Extract vector from pair of vectors +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("nop", N = 0))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr("nop", N = 0))] +#[rustc_legacy_const_generics(2)] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vext_u64(a: uint64x1_t, _b: uint64x1_t) -> uint64x1_t { + static_assert!(N : i32 where N == 0); + a +} + +/// Population count per byte. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vcnt_s8(a: int8x8_t) -> int8x8_t { + vcnt_s8_(a) +} +/// Population count per byte. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vcntq_s8(a: int8x16_t) -> int8x16_t { + vcntq_s8_(a) +} +/// Population count per byte. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vcnt_u8(a: uint8x8_t) -> uint8x8_t { + transmute(vcnt_s8_(transmute(a))) +} +/// Population count per byte. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vcntq_u8(a: uint8x16_t) -> uint8x16_t { + transmute(vcntq_s8_(transmute(a))) +} +/// Population count per byte. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vcnt_p8(a: poly8x8_t) -> poly8x8_t { + transmute(vcnt_s8_(transmute(a))) +} +/// Population count per byte. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vcnt))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(cnt))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vcntq_p8(a: poly8x16_t) -> poly8x16_t { + transmute(vcntq_s8_(transmute(a))) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev16_s8(a: int8x8_t) -> int8x8_t { + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev16q_s8(a: int8x16_t) -> int8x16_t { + simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev16_u8(a: uint8x8_t) -> uint8x8_t { + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev16q_u8(a: uint8x16_t) -> uint8x16_t { + simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev16_p8(a: poly8x8_t) -> poly8x8_t { + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev16.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev16))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev16q_p8(a: poly8x16_t) -> poly8x16_t { + simd_shuffle16!(a, a, [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32_s8(a: int8x8_t) -> int8x8_t { + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32q_s8(a: int8x16_t) -> int8x16_t { + simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32_u8(a: uint8x8_t) -> uint8x8_t { + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32q_u8(a: uint8x16_t) -> uint8x16_t { + simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32_s16(a: int16x4_t) -> int16x4_t { + simd_shuffle4!(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32q_s16(a: int16x8_t) -> int16x8_t { + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32_p16(a: poly16x4_t) -> poly16x4_t { + simd_shuffle4!(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32q_p16(a: poly16x8_t) -> poly16x8_t { + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32_u16(a: uint16x4_t) -> uint16x4_t { + simd_shuffle4!(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32q_u16(a: uint16x8_t) -> uint16x8_t { + simd_shuffle8!(a, a, [1, 0, 3, 2, 5, 4, 7, 6]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32_p8(a: poly8x8_t) -> poly8x8_t { + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev32.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev32))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev32q_p8(a: poly8x16_t) -> poly8x16_t { + simd_shuffle16!(a, a, [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_s8(a: int8x8_t) -> int8x8_t { + simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_s8(a: int8x16_t) -> int8x16_t { + simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_s16(a: int16x4_t) -> int16x4_t { + simd_shuffle4!(a, a, [3, 2, 1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_s16(a: int16x8_t) -> int16x8_t { + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_s32(a: int32x2_t) -> int32x2_t { + simd_shuffle2!(a, a, [1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_s32(a: int32x4_t) -> int32x4_t { + simd_shuffle4!(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_u8(a: uint8x8_t) -> uint8x8_t { + simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_u8(a: uint8x16_t) -> uint8x16_t { + simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_u16(a: uint16x4_t) -> uint16x4_t { + simd_shuffle4!(a, a, [3, 2, 1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_u16(a: uint16x8_t) -> uint16x8_t { + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_u32(a: uint32x2_t) -> uint32x2_t { + simd_shuffle2!(a, a, [1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_u32(a: uint32x4_t) -> uint32x4_t { + simd_shuffle4!(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_f32(a: float32x2_t) -> float32x2_t { + simd_shuffle2!(a, a, [1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.32"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_f32(a: float32x4_t) -> float32x4_t { + simd_shuffle4!(a, a, [1, 0, 3, 2]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_p8(a: poly8x8_t) -> poly8x8_t { + simd_shuffle8!(a, a, [7, 6, 5, 4, 3, 2, 1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.8"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_p8(a: poly8x16_t) -> poly8x16_t { + simd_shuffle16!(a, a, [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64_p16(a: poly16x4_t) -> poly16x4_t { + simd_shuffle4!(a, a, [3, 2, 1, 0]) +} + +/// Reversing vector elements (swap endianness) +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr("vrev64.16"))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(rev64))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vrev64q_p16(a: poly16x8_t) -> poly16x8_t { + simd_shuffle8!(a, a, [3, 2, 1, 0, 7, 6, 5, 4]) +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadal_s8(a: int16x4_t, b: int8x8_t) -> int16x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadal_s16(a: int32x2_t, b: int16x4_t) -> int32x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadal_s32(a: int64x1_t, b: int32x2_t) -> int64x1_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_s32_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadalq_s8(a: int16x8_t, b: int8x16_t) -> int16x8_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_s8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s8_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadalq_s16(a: int32x4_t, b: int16x8_t) -> int32x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_s16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s16_(b), a) + } +} + +/// Signed Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.s32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(sadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadalq_s32(a: int64x2_t, b: int32x4_t) -> int64x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_s32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_s32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadal_u8(a: uint16x4_t, b: uint8x8_t) -> uint16x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadal_u16(a: uint32x2_t, b: uint16x4_t) -> uint32x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadal_u32(a: uint64x1_t, b: uint32x2_t) -> uint64x1_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadal_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddl_u32_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u8))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadalq_u8(a: uint16x8_t, b: uint8x16_t) -> uint16x8_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_u8_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u8_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u16))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadalq_u16(a: uint32x4_t, b: uint16x8_t) -> uint32x4_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_u16_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u16_(b), a) + } +} + +/// Unsigned Add and Accumulate Long Pairwise. +#[inline] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v7"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(vpadal.u32))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(uadalp))] +#[cfg_attr( + target_arch = "aarch64", + stable(feature = "neon_intrinsics", since = "1.59.0") +)] +pub unsafe fn vpadalq_u32(a: uint64x2_t, b: uint32x4_t) -> uint64x2_t { + #[cfg(target_arch = "arm")] + { + crate::core_arch::arm::neon::vpadalq_u32_(a, b) + } + #[cfg(target_arch = "aarch64")] + { + simd_add(vpaddlq_u32_(b), a) + } +} + +/// 8-bit integer matrix multiply-accumulate +#[inline] +#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(smmla))] +pub unsafe fn vmmlaq_s32(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.smmla.v4i32.v16i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.smmla.v4i32.v16i8" + )] + fn vmmlaq_s32_(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t; + } + vmmlaq_s32_(a, b, c) +} + +/// 8-bit integer matrix multiply-accumulate +#[inline] +#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(ummla))] +pub unsafe fn vmmlaq_u32(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.ummla.v4i32.v16i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.ummla.v4i32.v16i8" + )] + fn vmmlaq_u32_(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t; + } + vmmlaq_u32_(a, b, c) +} + +/// Unsigned and signed 8-bit integer matrix multiply-accumulate +#[inline] +#[cfg_attr(not(bootstrap), target_feature(enable = "i8mm"))] +#[target_feature(enable = "neon")] +#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))] +#[cfg_attr(all(test, target_arch = "arm"), assert_instr(nop))] +#[cfg_attr(all(test, target_arch = "aarch64"), assert_instr(usmmla))] +pub unsafe fn vusmmlaq_s32(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t { + #[allow(improper_ctypes)] + extern "unadjusted" { + #[cfg_attr(target_arch = "arm", link_name = "llvm.arm.neon.usmmla.v4i32.v16i8")] + #[cfg_attr( + target_arch = "aarch64", + link_name = "llvm.aarch64.neon.usmmla.v4i32.v16i8" + )] + fn vusmmlaq_s32_(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t; + } + vusmmlaq_s32_(a, b, c) +} + +#[cfg(test)] +mod tests { + use super::*; + #[cfg(target_arch = "aarch64")] + use crate::core_arch::aarch64::*; + #[cfg(target_arch = "arm")] + use crate::core_arch::arm::*; + use crate::core_arch::arm_shared::test_support::*; + use crate::core_arch::simd::*; + use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec}; + use stdarch_test::simd_test; + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: i8 = 42; + let e = i8x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: i8x8 = transmute(vld1_lane_s8::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let elem: i8 = 42; + let e = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42); + let r: i8x16 = transmute(vld1q_lane_s8::<15>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s16() { + let a = i16x4::new(0, 1, 2, 3); + let elem: i16 = 42; + let e = i16x4::new(0, 1, 2, 42); + let r: i16x4 = transmute(vld1_lane_s16::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: i16 = 42; + let e = i16x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: i16x8 = transmute(vld1q_lane_s16::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s32() { + let a = i32x2::new(0, 1); + let elem: i32 = 42; + let e = i32x2::new(0, 42); + let r: i32x2 = transmute(vld1_lane_s32::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s32() { + let a = i32x4::new(0, 1, 2, 3); + let elem: i32 = 42; + let e = i32x4::new(0, 1, 2, 42); + let r: i32x4 = transmute(vld1q_lane_s32::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_s64() { + let a = i64x1::new(0); + let elem: i64 = 42; + let e = i64x1::new(42); + let r: i64x1 = transmute(vld1_lane_s64::<0>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_s64() { + let a = i64x2::new(0, 1); + let elem: i64 = 42; + let e = i64x2::new(0, 42); + let r: i64x2 = transmute(vld1q_lane_s64::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: u8 = 42; + let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u8x8 = transmute(vld1_lane_u8::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let elem: u8 = 42; + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42); + let r: u8x16 = transmute(vld1q_lane_u8::<15>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u16() { + let a = u16x4::new(0, 1, 2, 3); + let elem: u16 = 42; + let e = u16x4::new(0, 1, 2, 42); + let r: u16x4 = transmute(vld1_lane_u16::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: u16 = 42; + let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u16x8 = transmute(vld1q_lane_u16::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u32() { + let a = u32x2::new(0, 1); + let elem: u32 = 42; + let e = u32x2::new(0, 42); + let r: u32x2 = transmute(vld1_lane_u32::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u32() { + let a = u32x4::new(0, 1, 2, 3); + let elem: u32 = 42; + let e = u32x4::new(0, 1, 2, 42); + let r: u32x4 = transmute(vld1q_lane_u32::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_u64() { + let a = u64x1::new(0); + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_lane_u64::<0>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_u64() { + let a = u64x2::new(0, 1); + let elem: u64 = 42; + let e = u64x2::new(0, 42); + let r: u64x2 = transmute(vld1q_lane_u64::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: p8 = 42; + let e = u8x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u8x8 = transmute(vld1_lane_p8::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let elem: p8 = 42; + let e = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 42); + let r: u8x16 = transmute(vld1q_lane_p8::<15>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_p16() { + let a = u16x4::new(0, 1, 2, 3); + let elem: p16 = 42; + let e = u16x4::new(0, 1, 2, 42); + let r: u16x4 = transmute(vld1_lane_p16::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_p16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let elem: p16 = 42; + let e = u16x8::new(0, 1, 2, 3, 4, 5, 6, 42); + let r: u16x8 = transmute(vld1q_lane_p16::<7>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_lane_p64() { + let a = u64x1::new(0); + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_lane_p64::<0>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_lane_p64() { + let a = u64x2::new(0, 1); + let elem: u64 = 42; + let e = u64x2::new(0, 42); + let r: u64x2 = transmute(vld1q_lane_p64::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_lane_f32() { + let a = f32x2::new(0., 1.); + let elem: f32 = 42.; + let e = f32x2::new(0., 42.); + let r: f32x2 = transmute(vld1_lane_f32::<1>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_lane_f32() { + let a = f32x4::new(0., 1., 2., 3.); + let elem: f32 = 42.; + let e = f32x4::new(0., 1., 2., 42.); + let r: f32x4 = transmute(vld1q_lane_f32::<3>(&elem, transmute(a))); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s8() { + let elem: i8 = 42; + let e = i8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: i8x8 = transmute(vld1_dup_s8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s8() { + let elem: i8 = 42; + let e = i8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: i8x16 = transmute(vld1q_dup_s8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s16() { + let elem: i16 = 42; + let e = i16x4::new(42, 42, 42, 42); + let r: i16x4 = transmute(vld1_dup_s16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s16() { + let elem: i16 = 42; + let e = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: i16x8 = transmute(vld1q_dup_s16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s32() { + let elem: i32 = 42; + let e = i32x2::new(42, 42); + let r: i32x2 = transmute(vld1_dup_s32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s32() { + let elem: i32 = 42; + let e = i32x4::new(42, 42, 42, 42); + let r: i32x4 = transmute(vld1q_dup_s32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_s64() { + let elem: i64 = 42; + let e = i64x1::new(42); + let r: i64x1 = transmute(vld1_dup_s64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_s64() { + let elem: i64 = 42; + let e = i64x2::new(42, 42); + let r: i64x2 = transmute(vld1q_dup_s64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u8() { + let elem: u8 = 42; + let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u8x8 = transmute(vld1_dup_u8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u8() { + let elem: u8 = 42; + let e = u8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: u8x16 = transmute(vld1q_dup_u8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u16() { + let elem: u16 = 42; + let e = u16x4::new(42, 42, 42, 42); + let r: u16x4 = transmute(vld1_dup_u16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u16() { + let elem: u16 = 42; + let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u16x8 = transmute(vld1q_dup_u16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u32() { + let elem: u32 = 42; + let e = u32x2::new(42, 42); + let r: u32x2 = transmute(vld1_dup_u32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u32() { + let elem: u32 = 42; + let e = u32x4::new(42, 42, 42, 42); + let r: u32x4 = transmute(vld1q_dup_u32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_u64() { + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_dup_u64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_u64() { + let elem: u64 = 42; + let e = u64x2::new(42, 42); + let r: u64x2 = transmute(vld1q_dup_u64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_p8() { + let elem: p8 = 42; + let e = u8x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u8x8 = transmute(vld1_dup_p8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_p8() { + let elem: p8 = 42; + let e = u8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: u8x16 = transmute(vld1q_dup_p8(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_p16() { + let elem: p16 = 42; + let e = u16x4::new(42, 42, 42, 42); + let r: u16x4 = transmute(vld1_dup_p16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_p16() { + let elem: p16 = 42; + let e = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let r: u16x8 = transmute(vld1q_dup_p16(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1_dup_p64() { + let elem: u64 = 42; + let e = u64x1::new(42); + let r: u64x1 = transmute(vld1_dup_p64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon,aes")] + unsafe fn test_vld1q_dup_p64() { + let elem: u64 = 42; + let e = u64x2::new(42, 42); + let r: u64x2 = transmute(vld1q_dup_p64(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1_dup_f32() { + let elem: f32 = 42.; + let e = f32x2::new(42., 42.); + let r: f32x2 = transmute(vld1_dup_f32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vld1q_dup_f32() { + let elem: f32 = 42.; + let e = f32x4::new(42., 42., 42., 42.); + let r: f32x4 = transmute(vld1q_dup_f32(&elem)); + assert_eq!(r, e) + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u8() { + let v = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = vget_lane_u8::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u32() { + let v = i32x4::new(1, 2, 3, 4); + let r = vgetq_lane_u32::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s32() { + let v = i32x4::new(1, 2, 3, 4); + let r = vgetq_lane_s32::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u64() { + let v: u64 = 1; + let r = vget_lane_u64::<0>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u16() { + let v = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r = vgetq_lane_u16::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s8() { + let v = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vget_lane_s8::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_s8::<4>(transmute(v)); + assert_eq!(r, 4); + let r = vget_lane_s8::<5>(transmute(v)); + assert_eq!(r, 5); + } + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_p8() { + let v = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vget_lane_p8::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_p8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_p8::<5>(transmute(v)); + assert_eq!(r, 5); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_p16() { + let v = u16x4::new(0, 1, 2, 3); + let r = vget_lane_p16::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_p16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_p16::<0>(transmute(v)); + assert_eq!(r, 0); + let r = vget_lane_p16::<1>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s16() { + let v = i16x4::new(0, 1, 2, 3); + let r = vget_lane_s16::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_s16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_s16::<0>(transmute(v)); + assert_eq!(r, 0); + let r = vget_lane_s16::<1>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u16() { + let v = u16x4::new(0, 1, 2, 3); + let r = vget_lane_u16::<2>(transmute(v)); + assert_eq!(r, 2); + let r = vget_lane_u16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vget_lane_u16::<0>(transmute(v)); + assert_eq!(r, 0); + let r = vget_lane_u16::<1>(transmute(v)); + assert_eq!(r, 1); + } + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_f32() { + let v = f32x2::new(0.0, 1.0); + let r = vget_lane_f32::<1>(transmute(v)); + assert_eq!(r, 1.0); + let r = vget_lane_f32::<0>(transmute(v)); + assert_eq!(r, 0.0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s32() { + let v = i32x2::new(0, 1); + let r = vget_lane_s32::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vget_lane_s32::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_u32() { + let v = u32x2::new(0, 1); + let r = vget_lane_u32::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vget_lane_u32::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_s64() { + let v = i64x1::new(1); + let r = vget_lane_s64::<0>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_lane_p64() { + let v = u64x1::new(1); + let r = vget_lane_p64::<0>(transmute(v)); + assert_eq!(r, 1); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s8() { + let v = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = vgetq_lane_s8::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_s8::<13>(transmute(v)); + assert_eq!(r, 13); + let r = vgetq_lane_s8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_s8::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_p8() { + let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = vgetq_lane_p8::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_p8::<13>(transmute(v)); + assert_eq!(r, 13); + let r = vgetq_lane_p8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_p8::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u8() { + let v = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = vgetq_lane_u8::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_u8::<13>(transmute(v)); + assert_eq!(r, 13); + let r = vgetq_lane_u8::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_u8::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s16() { + let v = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vgetq_lane_s16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_s16::<6>(transmute(v)); + assert_eq!(r, 6); + let r = vgetq_lane_s16::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_p16() { + let v = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = vgetq_lane_p16::<3>(transmute(v)); + assert_eq!(r, 3); + let r = vgetq_lane_p16::<7>(transmute(v)); + assert_eq!(r, 7); + let r = vgetq_lane_p16::<1>(transmute(v)); + assert_eq!(r, 1); + } + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_f32() { + let v = f32x4::new(0.0, 1.0, 2.0, 3.0); + let r = vgetq_lane_f32::<3>(transmute(v)); + assert_eq!(r, 3.0); + let r = vgetq_lane_f32::<0>(transmute(v)); + assert_eq!(r, 0.0); + let r = vgetq_lane_f32::<2>(transmute(v)); + assert_eq!(r, 2.0); + let r = vgetq_lane_f32::<1>(transmute(v)); + assert_eq!(r, 1.0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_s64() { + let v = i64x2::new(0, 1); + let r = vgetq_lane_s64::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vgetq_lane_s64::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_p64() { + let v = u64x2::new(0, 1); + let r = vgetq_lane_p64::<1>(transmute(v)); + assert_eq!(r, 1); + let r = vgetq_lane_p64::<0>(transmute(v)); + assert_eq!(r, 0); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_s64() { + let a: i64x1 = i64x1::new(0); + let b: i64x1 = i64x1::new(1); + let e: i64x1 = i64x1::new(0); + let r: i64x1 = transmute(vext_s64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vext_u64() { + let a: u64x1 = u64x1::new(0); + let b: u64x1 = u64x1::new(1); + let e: u64x1 = u64x1::new(0); + let r: u64x1 = transmute(vext_u64::<0>(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = i8x8::new(9, 10, 11, 12, 13, 14, 15, 16); + let r: i8x8 = transmute(vget_high_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i16x4::new(5, 6, 7, 8); + let r: i16x4 = transmute(vget_high_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i32x2::new(3, 4); + let r: i32x2 = transmute(vget_high_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_s64() { + let a = i64x2::new(1, 2); + let e = i64x1::new(2); + let r: i64x1 = transmute(vget_high_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x8 = transmute(vget_high_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(5, 6, 7, 8); + let r: u16x4 = transmute(vget_high_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u32x2::new(3, 4); + let r: u32x2 = transmute(vget_high_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_u64() { + let a = u64x2::new(1, 2); + let e = u64x1::new(2); + let r: u64x1 = transmute(vget_high_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_p8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(9, 10, 11, 12, 13, 14, 15, 16); + let r: u8x8 = transmute(vget_high_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_p16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(5, 6, 7, 8); + let r: u16x4 = transmute(vget_high_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_high_f32() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e = f32x2::new(3.0, 4.0); + let r: f32x2 = transmute(vget_high_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vget_low_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vget_low_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vget_low_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_s64() { + let a = i64x2::new(1, 2); + let e = i64x1::new(1); + let r: i64x1 = transmute(vget_low_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vget_low_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vget_low_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vget_low_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_u64() { + let a = u64x2::new(1, 2); + let e = u64x1::new(1); + let r: u64x1 = transmute(vget_low_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_p8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vget_low_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_p16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vget_low_p16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vget_low_f32() { + let a = f32x4::new(1.0, 2.0, 3.0, 4.0); + let e = f32x2::new(1.0, 2.0); + let r: f32x2 = transmute(vget_low_f32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s8() { + let v: i8 = 42; + let e = i8x16::new( + 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, 42, + ); + let r: i8x16 = transmute(vdupq_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s16() { + let v: i16 = 64; + let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i16x8 = transmute(vdupq_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s32() { + let v: i32 = 64; + let e = i32x4::new(64, 64, 64, 64); + let r: i32x4 = transmute(vdupq_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_s64() { + let v: i64 = 64; + let e = i64x2::new(64, 64); + let r: i64x2 = transmute(vdupq_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u8() { + let v: u8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vdupq_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u16() { + let v: u16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vdupq_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u32() { + let v: u32 = 64; + let e = u32x4::new(64, 64, 64, 64); + let r: u32x4 = transmute(vdupq_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_u64() { + let v: u64 = 64; + let e = u64x2::new(64, 64); + let r: u64x2 = transmute(vdupq_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_p8() { + let v: p8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vdupq_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_p16() { + let v: p16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vdupq_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdupq_n_f32() { + let v: f32 = 64.0; + let e = f32x4::new(64.0, 64.0, 64.0, 64.0); + let r: f32x4 = transmute(vdupq_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s8() { + let v: i8 = 64; + let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i8x8 = transmute(vdup_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s16() { + let v: i16 = 64; + let e = i16x4::new(64, 64, 64, 64); + let r: i16x4 = transmute(vdup_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s32() { + let v: i32 = 64; + let e = i32x2::new(64, 64); + let r: i32x2 = transmute(vdup_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_s64() { + let v: i64 = 64; + let e = i64x1::new(64); + let r: i64x1 = transmute(vdup_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u8() { + let v: u8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vdup_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u16() { + let v: u16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vdup_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u32() { + let v: u32 = 64; + let e = u32x2::new(64, 64); + let r: u32x2 = transmute(vdup_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_u64() { + let v: u64 = 64; + let e = u64x1::new(64); + let r: u64x1 = transmute(vdup_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_p8() { + let v: p8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vdup_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_p16() { + let v: p16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vdup_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vdup_n_f32() { + let v: f32 = 64.0; + let e = f32x2::new(64.0, 64.0); + let r: f32x2 = transmute(vdup_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vldrq_p128() { + let v: [p128; 2] = [1, 2]; + let e: p128 = 2; + let r: p128 = vldrq_p128(v[1..].as_ptr()); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vstrq_p128() { + let v: [p128; 2] = [1, 2]; + let e: p128 = 2; + let mut r: p128 = 1; + vstrq_p128(&mut r, v[1]); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s8() { + let v: i8 = 64; + let e = i8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i8x8 = transmute(vmov_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s16() { + let v: i16 = 64; + let e = i16x4::new(64, 64, 64, 64); + let r: i16x4 = transmute(vmov_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s32() { + let v: i32 = 64; + let e = i32x2::new(64, 64); + let r: i32x2 = transmute(vmov_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_s64() { + let v: i64 = 64; + let e = i64x1::new(64); + let r: i64x1 = transmute(vmov_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u8() { + let v: u8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vmov_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u16() { + let v: u16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vmov_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u32() { + let v: u32 = 64; + let e = u32x2::new(64, 64); + let r: u32x2 = transmute(vmov_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_u64() { + let v: u64 = 64; + let e = u64x1::new(64); + let r: u64x1 = transmute(vmov_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_p8() { + let v: p8 = 64; + let e = u8x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u8x8 = transmute(vmov_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_p16() { + let v: p16 = 64; + let e = u16x4::new(64, 64, 64, 64); + let r: u16x4 = transmute(vmov_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmov_n_f32() { + let v: f32 = 64.0; + let e = f32x2::new(64.0, 64.0); + let r: f32x2 = transmute(vmov_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s8() { + let v: i8 = 64; + let e = i8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: i8x16 = transmute(vmovq_n_s8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s16() { + let v: i16 = 64; + let e = i16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: i16x8 = transmute(vmovq_n_s16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s32() { + let v: i32 = 64; + let e = i32x4::new(64, 64, 64, 64); + let r: i32x4 = transmute(vmovq_n_s32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_s64() { + let v: i64 = 64; + let e = i64x2::new(64, 64); + let r: i64x2 = transmute(vmovq_n_s64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u8() { + let v: u8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vmovq_n_u8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u16() { + let v: u16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vmovq_n_u16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u32() { + let v: u32 = 64; + let e = u32x4::new(64, 64, 64, 64); + let r: u32x4 = transmute(vmovq_n_u32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_u64() { + let v: u64 = 64; + let e = u64x2::new(64, 64); + let r: u64x2 = transmute(vmovq_n_u64(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_p8() { + let v: p8 = 64; + let e = u8x16::new( + 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, 64, + ); + let r: u8x16 = transmute(vmovq_n_p8(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_p16() { + let v: p16 = 64; + let e = u16x8::new(64, 64, 64, 64, 64, 64, 64, 64); + let r: u16x8 = transmute(vmovq_n_p16(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovq_n_f32() { + let v: f32 = 64.0; + let e = f32x4::new(64.0, 64.0, 64.0, 64.0); + let r: f32x4 = transmute(vmovq_n_f32(v)); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vgetq_lane_u64() { + let v = i64x2::new(1, 2); + let r = vgetq_lane_u64::<1>(transmute(v)); + assert_eq!(r, 2); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s8() { + test_ari_s8( + |i, j| vadd_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s8() { + testq_ari_s8( + |i, j| vaddq_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s16() { + test_ari_s16( + |i, j| vadd_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s16() { + testq_ari_s16( + |i, j| vaddq_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_s32() { + test_ari_s32( + |i, j| vadd_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_s32() { + testq_ari_s32( + |i, j| vaddq_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_add(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u8() { + test_ari_u8( + |i, j| vadd_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u8() { + testq_ari_u8( + |i, j| vaddq_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u16() { + test_ari_u16( + |i, j| vadd_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u16() { + testq_ari_u16( + |i, j| vaddq_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vadd_u32() { + test_ari_u32( + |i, j| vadd_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_u32() { + testq_ari_u32( + |i, j| vaddq_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_add(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vadd_f32() { + test_ari_f32(|i, j| vadd_f32(i, j), |a: f32, b: f32| -> f32 { a + b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaddq_f32() { + testq_ari_f32(|i, j| vaddq_f32(i, j), |a: f32, b: f32| -> f32 { a + b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s8() { + let v = i8::MAX; + let a = i8x8::new(v, v, v, v, v, v, v, v); + let v = 2 * (v as i16); + let e = i16x8::new(v, v, v, v, v, v, v, v); + let r: i16x8 = transmute(vaddl_s8(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s16() { + let v = i16::MAX; + let a = i16x4::new(v, v, v, v); + let v = 2 * (v as i32); + let e = i32x4::new(v, v, v, v); + let r: i32x4 = transmute(vaddl_s16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_s32() { + let v = i32::MAX; + let a = i32x2::new(v, v); + let v = 2 * (v as i64); + let e = i64x2::new(v, v); + let r: i64x2 = transmute(vaddl_s32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u8() { + let v = u8::MAX; + let a = u8x8::new(v, v, v, v, v, v, v, v); + let v = 2 * (v as u16); + let e = u16x8::new(v, v, v, v, v, v, v, v); + let r: u16x8 = transmute(vaddl_u8(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u16() { + let v = u16::MAX; + let a = u16x4::new(v, v, v, v); + let v = 2 * (v as u32); + let e = u32x4::new(v, v, v, v); + let r: u32x4 = transmute(vaddl_u16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_u32() { + let v = u32::MAX; + let a = u32x2::new(v, v); + let v = 2 * (v as u64); + let e = u64x2::new(v, v); + let r: u64x2 = transmute(vaddl_u32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let x = i8::MAX; + let b = i8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); + let x = x as i16; + let e = i16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15); + let r: i16x8 = transmute(vaddl_high_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let x = i16::MAX; + let b = i16x8::new(x, x, x, x, x, x, x, x); + let x = x as i32; + let e = i32x4::new(x + 4, x + 5, x + 6, x + 7); + let r: i32x4 = transmute(vaddl_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_s32() { + let a = i32x4::new(0, 1, 2, 3); + let x = i32::MAX; + let b = i32x4::new(x, x, x, x); + let x = x as i64; + let e = i64x2::new(x + 2, x + 3); + let r: i64x2 = transmute(vaddl_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let x = u8::MAX; + let b = u8x16::new(x, x, x, x, x, x, x, x, x, x, x, x, x, x, x, x); + let x = x as u16; + let e = u16x8::new(x + 8, x + 9, x + 10, x + 11, x + 12, x + 13, x + 14, x + 15); + let r: u16x8 = transmute(vaddl_high_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let x = u16::MAX; + let b = u16x8::new(x, x, x, x, x, x, x, x); + let x = x as u32; + let e = u32x4::new(x + 4, x + 5, x + 6, x + 7); + let r: u32x4 = transmute(vaddl_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddl_high_u32() { + let a = u32x4::new(0, 1, 2, 3); + let x = u32::MAX; + let b = u32x4::new(x, x, x, x); + let x = x as u64; + let e = u64x2::new(x + 2, x + 3); + let r: u64x2 = transmute(vaddl_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_s8() { + let x = i16::MAX; + let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = i8::MAX; + let b = i8x8::new(y, y, y, y, y, y, y, y); + let y = y as i16; + let e = i16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: i16x8 = transmute(vaddw_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_s16() { + let x = i32::MAX; + let a = i32x4::new(x, 1, 2, 3); + let y = i16::MAX; + let b = i16x4::new(y, y, y, y); + let y = y as i32; + let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: i32x4 = transmute(vaddw_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_s32() { + let x = i64::MAX; + let a = i64x2::new(x, 1); + let y = i32::MAX; + let b = i32x2::new(y, y); + let y = y as i64; + let e = i64x2::new(x.wrapping_add(y), 1 + y); + let r: i64x2 = transmute(vaddw_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_u8() { + let x = u16::MAX; + let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = u8::MAX; + let b = u8x8::new(y, y, y, y, y, y, y, y); + let y = y as u16; + let e = u16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: u16x8 = transmute(vaddw_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_u16() { + let x = u32::MAX; + let a = u32x4::new(x, 1, 2, 3); + let y = u16::MAX; + let b = u16x4::new(y, y, y, y); + let y = y as u32; + let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: u32x4 = transmute(vaddw_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_u32() { + let x = u64::MAX; + let a = u64x2::new(x, 1); + let y = u32::MAX; + let b = u32x2::new(y, y); + let y = y as u64; + let e = u64x2::new(x.wrapping_add(y), 1 + y); + let r: u64x2 = transmute(vaddw_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_s8() { + let x = i16::MAX; + let a = i16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = i8::MAX; + let b = i8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y); + let y = y as i16; + let e = i16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: i16x8 = transmute(vaddw_high_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_s16() { + let x = i32::MAX; + let a = i32x4::new(x, 1, 2, 3); + let y = i16::MAX; + let b = i16x8::new(0, 0, 0, 0, y, y, y, y); + let y = y as i32; + let e = i32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: i32x4 = transmute(vaddw_high_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_s32() { + let x = i64::MAX; + let a = i64x2::new(x, 1); + let y = i32::MAX; + let b = i32x4::new(0, 0, y, y); + let y = y as i64; + let e = i64x2::new(x.wrapping_add(y), 1 + y); + let r: i64x2 = transmute(vaddw_high_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_u8() { + let x = u16::MAX; + let a = u16x8::new(x, 1, 2, 3, 4, 5, 6, 7); + let y = u8::MAX; + let b = u8x16::new(0, 0, 0, 0, 0, 0, 0, 0, y, y, y, y, y, y, y, y); + let y = y as u16; + let e = u16x8::new( + x.wrapping_add(y), + 1 + y, + 2 + y, + 3 + y, + 4 + y, + 5 + y, + 6 + y, + 7 + y, + ); + let r: u16x8 = transmute(vaddw_high_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_u16() { + let x = u32::MAX; + let a = u32x4::new(x, 1, 2, 3); + let y = u16::MAX; + let b = u16x8::new(0, 0, 0, 0, y, y, y, y); + let y = y as u32; + let e = u32x4::new(x.wrapping_add(y), 1 + y, 2 + y, 3 + y); + let r: u32x4 = transmute(vaddw_high_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddw_high_u32() { + let x = u64::MAX; + let a = u64x2::new(x, 1); + let y = u32::MAX; + let b = u32x4::new(0, 0, y, y); + let y = y as u64; + let e = u64x2::new(x.wrapping_add(y), 1 + y); + let r: u64x2 = transmute(vaddw_high_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_s16() { + let a = i16x8::new( + (0 << 8) + 1, + (1 << 8) + 1, + (2 << 8) + 1, + (3 << 8) + 1, + (4 << 8) + 1, + (5 << 8) + 1, + (6 << 8) + 1, + (7 << 8) + 1, + ); + let e = i8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let r: i8x8 = transmute(vaddhn_s16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_s32() { + let a = i32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1); + let e = i16x4::new(0, 2, 4, 6); + let r: i16x4 = transmute(vaddhn_s32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_s64() { + let a = i64x2::new((0 << 32) + 1, (1 << 32) + 1); + let e = i32x2::new(0, 2); + let r: i32x2 = transmute(vaddhn_s64(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_u16() { + let a = u16x8::new( + (0 << 8) + 1, + (1 << 8) + 1, + (2 << 8) + 1, + (3 << 8) + 1, + (4 << 8) + 1, + (5 << 8) + 1, + (6 << 8) + 1, + (7 << 8) + 1, + ); + let e = u8x8::new(0, 2, 4, 6, 8, 10, 12, 14); + let r: u8x8 = transmute(vaddhn_u16(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_u32() { + let a = u32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1); + let e = u16x4::new(0, 2, 4, 6); + let r: u16x4 = transmute(vaddhn_u32(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_u64() { + let a = u64x2::new((0 << 32) + 1, (1 << 32) + 1); + let e = u32x2::new(0, 2); + let r: u32x2 = transmute(vaddhn_u64(transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_high_s16() { + let r = i8x8::splat(42); + let a = i16x8::new( + (0 << 8) + 1, + (1 << 8) + 1, + (2 << 8) + 1, + (3 << 8) + 1, + (4 << 8) + 1, + (5 << 8) + 1, + (6 << 8) + 1, + (7 << 8) + 1, + ); + let e = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 2, 4, 6, 8, 10, 12, 14); + let r: i8x16 = transmute(vaddhn_high_s16(transmute(r), transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_high_s32() { + let r = i16x4::splat(42); + let a = i32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1); + let e = i16x8::new(42, 42, 42, 42, 0, 2, 4, 6); + let r: i16x8 = transmute(vaddhn_high_s32(transmute(r), transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_high_s64() { + let r = i32x2::splat(42); + let a = i64x2::new((0 << 32) + 1, (1 << 32) + 1); + let e = i32x4::new(42, 42, 0, 2); + let r: i32x4 = transmute(vaddhn_high_s64(transmute(r), transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_high_u16() { + let r = u8x8::splat(42); + let a = u16x8::new( + (0 << 8) + 1, + (1 << 8) + 1, + (2 << 8) + 1, + (3 << 8) + 1, + (4 << 8) + 1, + (5 << 8) + 1, + (6 << 8) + 1, + (7 << 8) + 1, + ); + let e = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 2, 4, 6, 8, 10, 12, 14); + let r: u8x16 = transmute(vaddhn_high_u16(transmute(r), transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_high_u32() { + let r = u16x4::splat(42); + let a = u32x4::new((0 << 16) + 1, (1 << 16) + 1, (2 << 16) + 1, (3 << 16) + 1); + let e = u16x8::new(42, 42, 42, 42, 0, 2, 4, 6); + let r: u16x8 = transmute(vaddhn_high_u32(transmute(r), transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaddhn_high_u64() { + let r = u32x2::splat(42); + let a = u64x2::new((0 << 32) + 1, (1 << 32) + 1); + let e = u32x4::new(42, 42, 0, 2); + let r: u32x4 = transmute(vaddhn_high_u64(transmute(r), transmute(a), transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_s16() { + let round_constant: i16 = (1 << 8) - 1; + let a = i16x8::new( + 0 << 8, + 1 << 8, + 2 << 8, + 3 << 8, + 4 << 8, + 5 << 8, + 6 << 8, + 7 << 8, + ); + let b = i16x8::new( + 0 << 8, + (1 << 8) + round_constant, + 2 << 8, + (3 << 8) + round_constant, + 4 << 8, + (5 << 8) + round_constant, + 6 << 8, + (7 << 8) + round_constant, + ); + let e = i8x8::new(0, 3, 4, 7, 8, 11, 12, 15); + let r: i8x8 = transmute(vraddhn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_s32() { + let round_constant: i32 = (1 << 16) - 1; + let a = i32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16); + let b = i32x4::new( + 0 << 16, + (1 << 16) + round_constant, + 2 << 16, + (3 << 16) + round_constant, + ); + let e = i16x4::new(0, 3, 4, 7); + let r: i16x4 = transmute(vraddhn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_s64() { + let round_constant: i64 = (1 << 32) - 1; + let a = i64x2::new(0 << 32, 1 << 32); + let b = i64x2::new(0 << 32, (1 << 32) + round_constant); + let e = i32x2::new(0, 3); + let r: i32x2 = transmute(vraddhn_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_u16() { + let round_constant: u16 = (1 << 8) - 1; + let a = u16x8::new( + 0 << 8, + 1 << 8, + 2 << 8, + 3 << 8, + 4 << 8, + 5 << 8, + 6 << 8, + 7 << 8, + ); + let b = u16x8::new( + 0 << 8, + (1 << 8) + round_constant, + 2 << 8, + (3 << 8) + round_constant, + 4 << 8, + (5 << 8) + round_constant, + 6 << 8, + (7 << 8) + round_constant, + ); + let e = u8x8::new(0, 3, 4, 7, 8, 11, 12, 15); + let r: u8x8 = transmute(vraddhn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_u32() { + let round_constant: u32 = (1 << 16) - 1; + let a = u32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16); + let b = u32x4::new( + 0 << 16, + (1 << 16) + round_constant, + 2 << 16, + (3 << 16) + round_constant, + ); + let e = u16x4::new(0, 3, 4, 7); + let r: u16x4 = transmute(vraddhn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_u64() { + let round_constant: u64 = (1 << 32) - 1; + let a = u64x2::new(0 << 32, 1 << 32); + let b = u64x2::new(0 << 32, (1 << 32) + round_constant); + let e = u32x2::new(0, 3); + let r: u32x2 = transmute(vraddhn_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_high_s16() { + let r = i8x8::splat(42); + let round_constant: i16 = (1 << 8) - 1; + let a = i16x8::new( + 0 << 8, + 1 << 8, + 2 << 8, + 3 << 8, + 4 << 8, + 5 << 8, + 6 << 8, + 7 << 8, + ); + let b = i16x8::new( + 0 << 8, + (1 << 8) + round_constant, + 2 << 8, + (3 << 8) + round_constant, + 4 << 8, + (5 << 8) + round_constant, + 6 << 8, + (7 << 8) + round_constant, + ); + let e = i8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 3, 4, 7, 8, 11, 12, 15); + let r: i8x16 = transmute(vraddhn_high_s16(transmute(r), transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_high_s32() { + let r = i16x4::splat(42); + let round_constant: i32 = (1 << 16) - 1; + let a = i32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16); + let b = i32x4::new( + 0 << 16, + (1 << 16) + round_constant, + 2 << 16, + (3 << 16) + round_constant, + ); + let e = i16x8::new(42, 42, 42, 42, 0, 3, 4, 7); + let r: i16x8 = transmute(vraddhn_high_s32(transmute(r), transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_high_s64() { + let r = i32x2::splat(42); + let round_constant: i64 = (1 << 32) - 1; + let a = i64x2::new(0 << 32, 1 << 32); + let b = i64x2::new(0 << 32, (1 << 32) + round_constant); + let e = i32x4::new(42, 42, 0, 3); + let r: i32x4 = transmute(vraddhn_high_s64(transmute(r), transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_high_u16() { + let r = u8x8::splat(42); + let round_constant: u16 = (1 << 8) - 1; + let a = u16x8::new( + 0 << 8, + 1 << 8, + 2 << 8, + 3 << 8, + 4 << 8, + 5 << 8, + 6 << 8, + 7 << 8, + ); + let b = u16x8::new( + 0 << 8, + (1 << 8) + round_constant, + 2 << 8, + (3 << 8) + round_constant, + 4 << 8, + (5 << 8) + round_constant, + 6 << 8, + (7 << 8) + round_constant, + ); + let e = u8x16::new(42, 42, 42, 42, 42, 42, 42, 42, 0, 3, 4, 7, 8, 11, 12, 15); + let r: u8x16 = transmute(vraddhn_high_u16(transmute(r), transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_high_u32() { + let r = u16x4::splat(42); + let round_constant: u32 = (1 << 16) - 1; + let a = u32x4::new(0 << 16, 1 << 16, 2 << 16, 3 << 16); + let b = u32x4::new( + 0 << 16, + (1 << 16) + round_constant, + 2 << 16, + (3 << 16) + round_constant, + ); + let e = u16x8::new(42, 42, 42, 42, 0, 3, 4, 7); + let r: u16x8 = transmute(vraddhn_high_s32(transmute(r), transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vraddhn_high_u64() { + let r = u32x2::splat(42); + let round_constant: u64 = (1 << 32) - 1; + let a = u64x2::new(0 << 32, 1 << 32); + let b = u64x2::new(0 << 32, (1 << 32) + round_constant); + let e = u32x4::new(42, 42, 0, 3); + let r: u32x4 = transmute(vraddhn_high_s64(transmute(r), transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddl_s8() { + let a = i8x8::new(-4, -3, -2, -1, 0, 1, 2, 3); + let r: i16x4 = transmute(vpaddl_s8(transmute(a))); + let e = i16x4::new(-7, -3, 1, 5); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddl_s16() { + let a = i16x4::new(-2, -1, 0, 1); + let r: i32x2 = transmute(vpaddl_s16(transmute(a))); + let e = i32x2::new(-3, 1); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddl_s32() { + let a = i32x2::new(-1, 0); + let r: i64x1 = transmute(vpaddl_s32(transmute(a))); + let e = i64x1::new(-1); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddlq_s8() { + let a = i8x16::new(-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vpaddlq_s8(transmute(a))); + let e = i16x8::new(-15, -11, -7, -3, 1, 5, 9, 13); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddlq_s16() { + let a = i16x8::new(-4, -3, -2, -1, 0, 1, 2, 3); + let r: i32x4 = transmute(vpaddlq_s16(transmute(a))); + let e = i32x4::new(-7, -3, 1, 5); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddlq_s32() { + let a = i32x4::new(-2, -1, 0, 1); + let r: i64x2 = transmute(vpaddlq_s32(transmute(a))); + let e = i64x2::new(-3, 1); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddl_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX); + let r: u16x4 = transmute(vpaddl_u8(transmute(a))); + let e = u16x4::new(1, 5, 9, u8::MAX as u16 + 6); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddl_u16() { + let a = u16x4::new(0, 1, 2, u16::MAX); + let r: u32x2 = transmute(vpaddl_u16(transmute(a))); + let e = u32x2::new(1, u16::MAX as u32 + 2); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddl_u32() { + let a = u32x2::new(1, u32::MAX); + let r: u64x1 = transmute(vpaddl_u32(transmute(a))); + let e = u64x1::new(u32::MAX as u64 + 1); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddlq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX); + let r: u16x8 = transmute(vpaddlq_u8(transmute(a))); + let e = u16x8::new(1, 5, 9, 13, 17, 21, 25, u8::MAX as u16 + 14); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddlq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX); + let r: u32x4 = transmute(vpaddlq_u16(transmute(a))); + let e = u32x4::new(1, 5, 9, u16::MAX as u32 + 6); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpaddlq_u32() { + let a = u32x4::new(0, 1, 2, u32::MAX); + let r: u64x2 = transmute(vpaddlq_u32(transmute(a))); + let e = u64x2::new(1, u32::MAX as u64 + 2); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadal_s8() { + let a = i16x4::new(42, 42, 42, 42); + let b = i8x8::new(-4, -3, -2, -1, 0, 1, 2, 3); + let r: i16x4 = transmute(vpadal_s8(transmute(a), transmute(b))); + let e = i16x4::new(35, 39, 43, 47); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadal_s16() { + let a = i32x2::new(42, 42); + let b = i16x4::new(-2, -1, 0, 1); + let r: i32x2 = transmute(vpadal_s16(transmute(a), transmute(b))); + let e = i32x2::new(39, 43); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadal_s32() { + let a = i64x1::new(42); + let b = i32x2::new(-1, 0); + let r: i64x1 = transmute(vpadal_s32(transmute(a), transmute(b))); + let e = i64x1::new(41); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadalq_s8() { + let a = i16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b = i8x16::new(-8, -7, -6, -5, -4, -3, -2, -1, 0, 1, 2, 3, 4, 5, 6, 7); + let r: i16x8 = transmute(vpadalq_s8(transmute(a), transmute(b))); + let e = i16x8::new(27, 31, 35, 39, 43, 47, 51, 55); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadalq_s16() { + let a = i32x4::new(42, 42, 42, 42); + let b = i16x8::new(-4, -3, -2, -1, 0, 1, 2, 3); + let r: i32x4 = transmute(vpadalq_s16(transmute(a), transmute(b))); + let e = i32x4::new(35, 39, 43, 47); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadalq_s32() { + let a = i64x2::new(42, 42); + let b = i32x4::new(-2, -1, 0, 1); + let r: i64x2 = transmute(vpadalq_s32(transmute(a), transmute(b))); + let e = i64x2::new(39, 43); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadal_u8() { + let a = u16x4::new(42, 42, 42, 42); + let b = u8x8::new(0, 1, 2, 3, 4, 5, 6, u8::MAX); + let r: u16x4 = transmute(vpadal_u8(transmute(a), transmute(b))); + let e = u16x4::new(43, 47, 51, u8::MAX as u16 + 48); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadal_u16() { + let a = u32x2::new(42, 42); + let b = u16x4::new(0, 1, 2, u16::MAX); + let r: u32x2 = transmute(vpadal_u16(transmute(a), transmute(b))); + let e = u32x2::new(43, u16::MAX as u32 + 44); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadal_u32() { + let a = u64x1::new(42); + let b = u32x2::new(1, u32::MAX); + let r: u64x1 = transmute(vpadal_u32(transmute(a), transmute(b))); + let e = u64x1::new(u32::MAX as u64 + 43); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadalq_u8() { + let a = u16x8::new(42, 42, 42, 42, 42, 42, 42, 42); + let b = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, u8::MAX); + let r: u16x8 = transmute(vpadalq_u8(transmute(a), transmute(b))); + let e = u16x8::new(43, 47, 51, 55, 59, 63, 67, u8::MAX as u16 + 56); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadalq_u16() { + let a = u32x4::new(42, 42, 42, 42); + let b = u16x8::new(0, 1, 2, 3, 4, 5, 6, u16::MAX); + let r: u32x4 = transmute(vpadalq_u16(transmute(a), transmute(b))); + let e = u32x4::new(43, 47, 51, u16::MAX as u32 + 48); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadalq_u32() { + let a = u64x2::new(42, 42); + let b = u32x4::new(0, 1, 2, u32::MAX); + let r: u64x2 = transmute(vpadalq_u32(transmute(a), transmute(b))); + let e = u64x2::new(43, u32::MAX as u64 + 44); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = i8x8::new(-1, -2, -3, -4, -5, -6, -7, -8); + let r: i8x8 = transmute(vmvn_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = i8x16::new( + -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, -16, + ); + let r: i8x16 = transmute(vmvnq_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s16() { + let a = i16x4::new(0, 1, 2, 3); + let e = i16x4::new(-1, -2, -3, -4); + let r: i16x4 = transmute(vmvn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = i16x8::new(-1, -2, -3, -4, -5, -6, -7, -8); + let r: i16x8 = transmute(vmvnq_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_s32() { + let a = i32x2::new(0, 1); + let e = i32x2::new(-1, -2); + let r: i32x2 = transmute(vmvn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_s32() { + let a = i32x4::new(0, 1, 2, 3); + let e = i32x4::new(-1, -2, -3, -4); + let r: i32x4 = transmute(vmvnq_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); + let r: u8x8 = transmute(vmvn_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = u8x16::new( + 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, + ); + let r: u8x16 = transmute(vmvnq_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u16() { + let a = u16x4::new(0, 1, 2, 3); + let e = u16x4::new(65_535, 65_534, 65_533, 65_532); + let r: u16x4 = transmute(vmvn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u16x8::new( + 65_535, 65_534, 65_533, 65_532, 65_531, 65_530, 65_529, 65_528, + ); + let r: u16x8 = transmute(vmvnq_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_u32() { + let a = u32x2::new(0, 1); + let e = u32x2::new(4_294_967_295, 4_294_967_294); + let r: u32x2 = transmute(vmvn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let e = u32x4::new(4_294_967_295, 4_294_967_294, 4_294_967_293, 4_294_967_292); + let r: u32x4 = transmute(vmvnq_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvn_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let e = u8x8::new(255, 254, 253, 252, 251, 250, 249, 248); + let r: u8x8 = transmute(vmvn_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmvnq_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let e = u8x16::new( + 255, 254, 253, 252, 251, 250, 249, 248, 247, 246, 245, 244, 243, 242, 241, 240, + ); + let r: u8x16 = transmute(vmvnq_p8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s8() { + let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = i8x8::new(0, -2, -2, -4, -4, -6, -6, -8); + let r: i8x8 = transmute(vbic_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s8() { + let a = i8x16::new( + 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, + ); + let b = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = i8x16::new( + 0, -2, -2, -4, -4, -6, -6, -8, -8, -10, -10, -12, -12, -14, -14, -16, + ); + let r: i8x16 = transmute(vbicq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s16() { + let a = i16x4::new(0, -1, -2, -3); + let b = i16x4::new(1, 1, 1, 1); + let e = i16x4::new(0, -2, -2, -4); + let r: i16x4 = transmute(vbic_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s16() { + let a = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = i16x8::new(0, -2, -2, -4, -4, -6, -6, -8); + let r: i16x8 = transmute(vbicq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s32() { + let a = i32x2::new(0, -1); + let b = i32x2::new(1, 1); + let e = i32x2::new(0, -2); + let r: i32x2 = transmute(vbic_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s32() { + let a = i32x4::new(0, -1, -2, -3); + let b = i32x4::new(1, 1, 1, 1); + let e = i32x4::new(0, -2, -2, -4); + let r: i32x4 = transmute(vbicq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_s64() { + let a = i64x1::new(-1); + let b = i64x1::new(1); + let e = i64x1::new(-2); + let r: i64x1 = transmute(vbic_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_s64() { + let a = i64x2::new(0, -1); + let b = i64x2::new(1, 1); + let e = i64x2::new(0, -2); + let r: i64x2 = transmute(vbicq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = u8x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u8x8 = transmute(vbic_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let b = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let e = u8x16::new(0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14); + let r: u8x16 = transmute(vbicq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u16() { + let a = u16x4::new(0, 1, 2, 3); + let b = u16x4::new(1, 1, 1, 1); + let e = u16x4::new(0, 0, 2, 2); + let r: u16x4 = transmute(vbic_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let b = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let e = u16x8::new(0, 0, 2, 2, 4, 4, 6, 6); + let r: u16x8 = transmute(vbicq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u32() { + let a = u32x2::new(0, 1); + let b = u32x2::new(1, 1); + let e = u32x2::new(0, 0); + let r: u32x2 = transmute(vbic_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let b = u32x4::new(1, 1, 1, 1); + let e = u32x4::new(0, 0, 2, 2); + let r: u32x4 = transmute(vbicq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbic_u64() { + let a = u64x1::new(1); + let b = u64x1::new(1); + let e = u64x1::new(0); + let r: u64x1 = transmute(vbic_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbicq_u64() { + let a = u64x2::new(0, 1); + let b = u64x2::new(1, 1); + let e = u64x2::new(0, 0); + let r: u64x2 = transmute(vbicq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s8() { + let a = u8x8::new(u8::MAX, 0, u8::MAX, 0, u8::MAX, 0, u8::MAX, 0); + let b = i8x8::new( + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + ); + let c = i8x8::new( + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + ); + let e = i8x8::new( + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + ); + let r: i8x8 = transmute(vbsl_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s16() { + let a = u16x4::new(u16::MAX, 0, u16::MAX, 0); + let b = i16x4::new(i16::MAX, i16::MAX, i16::MAX, i16::MAX); + let c = i16x4::new(i16::MIN, i16::MIN, i16::MIN, i16::MIN); + let e = i16x4::new(i16::MAX, i16::MIN, i16::MAX, i16::MIN); + let r: i16x4 = transmute(vbsl_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s32() { + let a = u32x2::new(u32::MAX, u32::MIN); + let b = i32x2::new(i32::MAX, i32::MAX); + let c = i32x2::new(i32::MIN, i32::MIN); + let e = i32x2::new(i32::MAX, i32::MIN); + let r: i32x2 = transmute(vbsl_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_s64() { + let a = u64x1::new(u64::MAX); + let b = i64x1::new(i64::MAX); + let c = i64x1::new(i64::MIN); + let e = i64x1::new(i64::MAX); + let r: i64x1 = transmute(vbsl_s64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u8() { + let a = u8x8::new(u8::MAX, 0, u8::MAX, 0, u8::MAX, 0, u8::MAX, 0); + let b = u8x8::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x8::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x8::new( + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + ); + let r: u8x8 = transmute(vbsl_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u16() { + let a = u16x4::new(u16::MAX, 0, u16::MAX, 0); + let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX); + let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN); + let e = u16x4::new(u16::MAX, u16::MIN, u16::MAX, u16::MIN); + let r: u16x4 = transmute(vbsl_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u32() { + let a = u32x2::new(u32::MAX, 0); + let b = u32x2::new(u32::MAX, u32::MAX); + let c = u32x2::new(u32::MIN, u32::MIN); + let e = u32x2::new(u32::MAX, u32::MIN); + let r: u32x2 = transmute(vbsl_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_u64() { + let a = u64x1::new(u64::MAX); + let b = u64x1::new(u64::MAX); + let c = u64x1::new(u64::MIN); + let e = u64x1::new(u64::MAX); + let r: u64x1 = transmute(vbsl_u64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_f32() { + let a = u32x2::new(u32::MAX, 0); + let b = f32x2::new(f32::MAX, f32::MAX); + let c = f32x2::new(f32::MIN, f32::MIN); + let e = f32x2::new(f32::MAX, f32::MIN); + let r: f32x2 = transmute(vbsl_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_p8() { + let a = u8x8::new(u8::MAX, 0, u8::MAX, 0, u8::MAX, 0, u8::MAX, 0); + let b = u8x8::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x8::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x8::new( + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + ); + let r: u8x8 = transmute(vbsl_p8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbsl_p16() { + let a = u16x4::new(u16::MAX, 0, u16::MAX, 0); + let b = u16x4::new(u16::MAX, u16::MAX, u16::MAX, u16::MAX); + let c = u16x4::new(u16::MIN, u16::MIN, u16::MIN, u16::MIN); + let e = u16x4::new(u16::MAX, u16::MIN, u16::MAX, u16::MIN); + let r: u16x4 = transmute(vbsl_p16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s8() { + let a = u8x16::new( + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + ); + let b = i8x16::new( + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + i8::MAX, + ); + let c = i8x16::new( + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + i8::MIN, + ); + let e = i8x16::new( + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + i8::MAX, + i8::MIN, + ); + let r: i8x16 = transmute(vbslq_s8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s16() { + let a = u16x8::new(u16::MAX, 0, u16::MAX, 0, u16::MAX, 0, u16::MAX, 0); + let b = i16x8::new( + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + i16::MAX, + ); + let c = i16x8::new( + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + i16::MIN, + ); + let e = i16x8::new( + i16::MAX, + i16::MIN, + i16::MAX, + i16::MIN, + i16::MAX, + i16::MIN, + i16::MAX, + i16::MIN, + ); + let r: i16x8 = transmute(vbslq_s16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s32() { + let a = u32x4::new(u32::MAX, 0, u32::MAX, 0); + let b = i32x4::new(i32::MAX, i32::MAX, i32::MAX, i32::MAX); + let c = i32x4::new(i32::MIN, i32::MIN, i32::MIN, i32::MIN); + let e = i32x4::new(i32::MAX, i32::MIN, i32::MAX, i32::MIN); + let r: i32x4 = transmute(vbslq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_s64() { + let a = u64x2::new(u64::MAX, 0); + let b = i64x2::new(i64::MAX, i64::MAX); + let c = i64x2::new(i64::MIN, i64::MIN); + let e = i64x2::new(i64::MAX, i64::MIN); + let r: i64x2 = transmute(vbslq_s64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u8() { + let a = u8x16::new( + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + ); + let b = u8x16::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x16::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x16::new( + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + ); + let r: u8x16 = transmute(vbslq_u8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u16() { + let a = u16x8::new(u16::MAX, 0, u16::MAX, 0, u16::MAX, 0, u16::MAX, 0); + let b = u16x8::new( + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + ); + let c = u16x8::new( + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + ); + let e = u16x8::new( + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + ); + let r: u16x8 = transmute(vbslq_u16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u32() { + let a = u32x4::new(u32::MAX, 0, u32::MAX, 0); + let b = u32x4::new(u32::MAX, u32::MAX, u32::MAX, u32::MAX); + let c = u32x4::new(u32::MIN, u32::MIN, u32::MIN, u32::MIN); + let e = u32x4::new(u32::MAX, u32::MIN, u32::MAX, u32::MIN); + let r: u32x4 = transmute(vbslq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_u64() { + let a = u64x2::new(u64::MAX, 0); + let b = u64x2::new(u64::MAX, u64::MAX); + let c = u64x2::new(u64::MIN, u64::MIN); + let e = u64x2::new(u64::MAX, u64::MIN); + let r: u64x2 = transmute(vbslq_u64(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_f32() { + let a = u32x4::new(u32::MAX, 0, u32::MAX, 0); + let b = f32x4::new(f32::MAX, f32::MAX, f32::MAX, f32::MAX); + let c = f32x4::new(f32::MIN, f32::MIN, f32::MIN, f32::MIN); + let e = f32x4::new(f32::MAX, f32::MIN, f32::MAX, f32::MIN); + let r: f32x4 = transmute(vbslq_f32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_p8() { + let a = u8x16::new( + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + u8::MAX, + 0, + ); + let b = u8x16::new( + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + u8::MAX, + ); + let c = u8x16::new( + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + u8::MIN, + ); + let e = u8x16::new( + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + u8::MAX, + u8::MIN, + ); + let r: u8x16 = transmute(vbslq_p8(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vbslq_p16() { + let a = u16x8::new(u16::MAX, 0, u16::MAX, 0, u16::MAX, 0, u16::MAX, 0); + let b = u16x8::new( + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + u16::MAX, + ); + let c = u16x8::new( + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + u16::MIN, + ); + let e = u16x8::new( + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + u16::MAX, + u16::MIN, + ); + let r: u16x8 = transmute(vbslq_p16(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s8() { + let a = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i8x8::new(-2, -2, -2, -2, -2, -2, -2, -2); + let e = i8x8::new(1, -1, -1, -3, -3, -5, -5, -7); + let r: i8x8 = transmute(vorn_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s8() { + let a = i8x16::new( + 0, -1, -2, -3, -4, -5, -6, -7, -8, -9, -10, -11, -12, -13, -14, -15, + ); + let b = i8x16::new( + -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, + ); + let e = i8x16::new( + 1, -1, -1, -3, -3, -5, -5, -7, -7, -9, -9, -11, -11, -13, -13, -15, + ); + let r: i8x16 = transmute(vornq_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s16() { + let a = i16x4::new(0, -1, -2, -3); + let b = i16x4::new(-2, -2, -2, -2); + let e = i16x4::new(1, -1, -1, -3); + let r: i16x4 = transmute(vorn_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s16() { + let a = i16x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let b = i16x8::new(-2, -2, -2, -2, -2, -2, -2, -2); + let e = i16x8::new(1, -1, -1, -3, -3, -5, -5, -7); + let r: i16x8 = transmute(vornq_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s32() { + let a = i32x2::new(0, -1); + let b = i32x2::new(-2, -2); + let e = i32x2::new(1, -1); + let r: i32x2 = transmute(vorn_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s32() { + let a = i32x4::new(0, -1, -2, -3); + let b = i32x4::new(-2, -2, -2, -2); + let e = i32x4::new(1, -1, -1, -3); + let r: i32x4 = transmute(vornq_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_s64() { + let a = i64x1::new(0); + let b = i64x1::new(-2); + let e = i64x1::new(1); + let r: i64x1 = transmute(vorn_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_s64() { + let a = i64x2::new(0, -1); + let b = i64x2::new(-2, -2); + let e = i64x2::new(1, -1); + let r: i64x2 = transmute(vornq_s64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let t = u8::MAX - 1; + let b = u8x8::new(t, t, t, t, t, t, t, t); + let e = u8x8::new(1, 1, 3, 3, 5, 5, 7, 7); + let r: u8x8 = transmute(vorn_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let t = u8::MAX - 1; + let b = u8x16::new(t, t, t, t, t, t, t, t, t, t, t, t, t, t, t, t); + let e = u8x16::new(1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15); + let r: u8x16 = transmute(vornq_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u16() { + let a = u16x4::new(0, 1, 2, 3); + let t = u16::MAX - 1; + let b = u16x4::new(t, t, t, t); + let e = u16x4::new(1, 1, 3, 3); + let r: u16x4 = transmute(vorn_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let t = u16::MAX - 1; + let b = u16x8::new(t, t, t, t, t, t, t, t); + let e = u16x8::new(1, 1, 3, 3, 5, 5, 7, 7); + let r: u16x8 = transmute(vornq_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u32() { + let a = u32x2::new(0, 1); + let t = u32::MAX - 1; + let b = u32x2::new(t, t); + let e = u32x2::new(1, 1); + let r: u32x2 = transmute(vorn_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u32() { + let a = u32x4::new(0, 1, 2, 3); + let t = u32::MAX - 1; + let b = u32x4::new(t, t, t, t); + let e = u32x4::new(1, 1, 3, 3); + let r: u32x4 = transmute(vornq_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorn_u64() { + let a = u64x1::new(0); + let t = u64::MAX - 1; + let b = u64x1::new(t); + let e = u64x1::new(1); + let r: u64x1 = transmute(vorn_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vornq_u64() { + let a = u64x2::new(0, 1); + let t = u64::MAX - 1; + let b = u64x2::new(t, t); + let e = u64x2::new(1, 1); + let r: u64x2 = transmute(vornq_u64(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i8x8 = transmute(vmovn_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s32() { + let a = i32x4::new(1, 2, 3, 4); + let e = i16x4::new(1, 2, 3, 4); + let r: i16x4 = transmute(vmovn_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_s64() { + let a = i64x2::new(1, 2); + let e = i32x2::new(1, 2); + let r: i32x2 = transmute(vmovn_s64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let e = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u8x8 = transmute(vmovn_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u32() { + let a = u32x4::new(1, 2, 3, 4); + let e = u16x4::new(1, 2, 3, 4); + let r: u16x4 = transmute(vmovn_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovn_u64() { + let a = u64x2::new(1, 2); + let e = u32x2::new(1, 2); + let r: u32x2 = transmute(vmovn_u64(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s8() { + let e = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: i16x8 = transmute(vmovl_s8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s16() { + let e = i32x4::new(1, 2, 3, 4); + let a = i16x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vmovl_s16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_s32() { + let e = i64x2::new(1, 2); + let a = i32x2::new(1, 2); + let r: i64x2 = transmute(vmovl_s32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u8() { + let e = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let r: u16x8 = transmute(vmovl_u8(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u16() { + let e = u32x4::new(1, 2, 3, 4); + let a = u16x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vmovl_u16(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmovl_u32() { + let e = u64x2::new(1, 2); + let a = u32x2::new(1, 2); + let r: u64x2 = transmute(vmovl_u32(transmute(a))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_s8() { + let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); + let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = i8x8::new(-2, -4, 5, 7, 0, 2, 4, 6); + let r: i8x8 = transmute(vpmin_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_s16() { + let a = i16x4::new(1, 2, 3, -4); + let b = i16x4::new(0, 3, 2, 5); + let e = i16x4::new(1, -4, 0, 2); + let r: i16x4 = transmute(vpmin_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_s32() { + let a = i32x2::new(1, -2); + let b = i32x2::new(0, 3); + let e = i32x2::new(-2, 0); + let r: i32x2 = transmute(vpmin_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = u8x8::new(1, 3, 5, 7, 0, 2, 4, 6); + let r: u8x8 = transmute(vpmin_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(0, 3, 2, 5); + let e = u16x4::new(1, 3, 0, 2); + let r: u16x4 = transmute(vpmin_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(0, 3); + let e = u32x2::new(1, 0); + let r: u32x2 = transmute(vpmin_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmin_f32() { + let a = f32x2::new(1., -2.); + let b = f32x2::new(0., 3.); + let e = f32x2::new(-2., 0.); + let r: f32x2 = transmute(vpmin_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_s8() { + let a = i8x8::new(1, -2, 3, -4, 5, 6, 7, 8); + let b = i8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = i8x8::new(1, 3, 6, 8, 3, 5, 7, 9); + let r: i8x8 = transmute(vpmax_s8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_s16() { + let a = i16x4::new(1, 2, 3, -4); + let b = i16x4::new(0, 3, 2, 5); + let e = i16x4::new(2, 3, 3, 5); + let r: i16x4 = transmute(vpmax_s16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_s32() { + let a = i32x2::new(1, -2); + let b = i32x2::new(0, 3); + let e = i32x2::new(1, 3); + let r: i32x2 = transmute(vpmax_s32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(0, 3, 2, 5, 4, 7, 6, 9); + let e = u8x8::new(2, 4, 6, 8, 3, 5, 7, 9); + let r: u8x8 = transmute(vpmax_u8(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(0, 3, 2, 5); + let e = u16x4::new(2, 4, 3, 5); + let r: u16x4 = transmute(vpmax_u16(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(0, 3); + let e = u32x2::new(2, 3); + let r: u32x2 = transmute(vpmax_u32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpmax_f32() { + let a = f32x2::new(1., -2.); + let b = f32x2::new(0., 3.); + let e = f32x2::new(1., 3.); + let r: f32x2 = transmute(vpmax_f32(transmute(a), transmute(b))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_s8() { + test_bit_s8(|i, j| vand_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s8() { + testq_bit_s8(|i, j| vandq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s16() { + test_bit_s16(|i, j| vand_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s16() { + testq_bit_s16(|i, j| vandq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s32() { + test_bit_s32(|i, j| vand_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s32() { + testq_bit_s32(|i, j| vandq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_s64() { + test_bit_s64(|i, j| vand_s64(i, j), |a: i64, b: i64| -> i64 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_s64() { + testq_bit_s64(|i, j| vandq_s64(i, j), |a: i64, b: i64| -> i64 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vand_u8() { + test_bit_u8(|i, j| vand_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u8() { + testq_bit_u8(|i, j| vandq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u16() { + test_bit_u16(|i, j| vand_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u16() { + testq_bit_u16(|i, j| vandq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u32() { + test_bit_u32(|i, j| vand_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u32() { + testq_bit_u32(|i, j| vandq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vand_u64() { + test_bit_u64(|i, j| vand_u64(i, j), |a: u64, b: u64| -> u64 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vandq_u64() { + testq_bit_u64(|i, j| vandq_u64(i, j), |a: u64, b: u64| -> u64 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s8() { + test_bit_s8(|i, j| vorr_s8(i, j), |a: i8, b: i8| -> i8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s8() { + testq_bit_s8(|i, j| vorrq_s8(i, j), |a: i8, b: i8| -> i8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s16() { + test_bit_s16(|i, j| vorr_s16(i, j), |a: i16, b: i16| -> i16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s16() { + testq_bit_s16(|i, j| vorrq_s16(i, j), |a: i16, b: i16| -> i16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s32() { + test_bit_s32(|i, j| vorr_s32(i, j), |a: i32, b: i32| -> i32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s32() { + testq_bit_s32(|i, j| vorrq_s32(i, j), |a: i32, b: i32| -> i32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_s64() { + test_bit_s64(|i, j| vorr_s64(i, j), |a: i64, b: i64| -> i64 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_s64() { + testq_bit_s64(|i, j| vorrq_s64(i, j), |a: i64, b: i64| -> i64 { a | b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u8() { + test_bit_u8(|i, j| vorr_u8(i, j), |a: u8, b: u8| -> u8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u8() { + testq_bit_u8(|i, j| vorrq_u8(i, j), |a: u8, b: u8| -> u8 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u16() { + test_bit_u16(|i, j| vorr_u16(i, j), |a: u16, b: u16| -> u16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u16() { + testq_bit_u16(|i, j| vorrq_u16(i, j), |a: u16, b: u16| -> u16 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u32() { + test_bit_u32(|i, j| vorr_u32(i, j), |a: u32, b: u32| -> u32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u32() { + testq_bit_u32(|i, j| vorrq_u32(i, j), |a: u32, b: u32| -> u32 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorr_u64() { + test_bit_u64(|i, j| vorr_u64(i, j), |a: u64, b: u64| -> u64 { a | b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vorrq_u64() { + testq_bit_u64(|i, j| vorrq_u64(i, j), |a: u64, b: u64| -> u64 { a | b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_s8() { + test_bit_s8(|i, j| veor_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s8() { + testq_bit_s8(|i, j| veorq_s8(i, j), |a: i8, b: i8| -> i8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s16() { + test_bit_s16(|i, j| veor_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s16() { + testq_bit_s16(|i, j| veorq_s16(i, j), |a: i16, b: i16| -> i16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s32() { + test_bit_s32(|i, j| veor_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s32() { + testq_bit_s32(|i, j| veorq_s32(i, j), |a: i32, b: i32| -> i32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_s64() { + test_bit_s64(|i, j| veor_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_s64() { + testq_bit_s64(|i, j| veorq_s64(i, j), |a: i64, b: i64| -> i64 { a ^ b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_veor_u8() { + test_bit_u8(|i, j| veor_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u8() { + testq_bit_u8(|i, j| veorq_u8(i, j), |a: u8, b: u8| -> u8 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u16() { + test_bit_u16(|i, j| veor_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u16() { + testq_bit_u16(|i, j| veorq_u16(i, j), |a: u16, b: u16| -> u16 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u32() { + test_bit_u32(|i, j| veor_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u32() { + testq_bit_u32(|i, j| veorq_u32(i, j), |a: u32, b: u32| -> u32 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veor_u64() { + test_bit_u64(|i, j| veor_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_veorq_u64() { + testq_bit_u64(|i, j| veorq_u64(i, j), |a: u64, b: u64| -> u64 { a ^ b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s8() { + test_cmp_s8( + |i, j| vceq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s8() { + testq_cmp_s8( + |i, j| vceqq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s16() { + test_cmp_s16( + |i, j| vceq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s16() { + testq_cmp_s16( + |i, j| vceqq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_s32() { + test_cmp_s32( + |i, j| vceq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_s32() { + testq_cmp_s32( + |i, j| vceqq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u8() { + test_cmp_u8( + |i, j| vceq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u8() { + testq_cmp_u8( + |i, j| vceqq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a == b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u16() { + test_cmp_u16( + |i, j| vceq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u16() { + testq_cmp_u16( + |i, j| vceqq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a == b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceq_u32() { + test_cmp_u32( + |i, j| vceq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_u32() { + testq_cmp_u32( + |i, j| vceqq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vceq_f32() { + test_cmp_f32( + |i, j| vcge_f32(i, j), + |a: f32, b: f32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vceqq_f32() { + testq_cmp_f32( + |i, j| vcgeq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a == b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s8() { + test_cmp_s8( + |i, j| vcgt_s8(i, j), + |a: i8, b: i8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s8() { + testq_cmp_s8( + |i, j| vcgtq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s16() { + test_cmp_s16( + |i, j| vcgt_s16(i, j), + |a: i16, b: i16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s16() { + testq_cmp_s16( + |i, j| vcgtq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_s32() { + test_cmp_s32( + |i, j| vcgt_s32(i, j), + |a: i32, b: i32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_s32() { + testq_cmp_s32( + |i, j| vcgtq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u8() { + test_cmp_u8( + |i, j| vcgt_u8(i, j), + |a: u8, b: u8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u8() { + testq_cmp_u8( + |i, j| vcgtq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a > b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u16() { + test_cmp_u16( + |i, j| vcgt_u16(i, j), + |a: u16, b: u16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u16() { + testq_cmp_u16( + |i, j| vcgtq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a > b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_u32() { + test_cmp_u32( + |i, j| vcgt_u32(i, j), + |a: u32, b: u32| -> u32 { + if a > b { + 0xFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_u32() { + testq_cmp_u32( + |i, j| vcgtq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcgt_f32() { + test_cmp_f32( + |i, j| vcgt_f32(i, j), + |a: f32, b: f32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgtq_f32() { + testq_cmp_f32( + |i, j| vcgtq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a > b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s8() { + test_cmp_s8( + |i, j| vclt_s8(i, j), + |a: i8, b: i8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s8() { + testq_cmp_s8( + |i, j| vcltq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s16() { + test_cmp_s16( + |i, j| vclt_s16(i, j), + |a: i16, b: i16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s16() { + testq_cmp_s16( + |i, j| vcltq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_s32() { + test_cmp_s32( + |i, j| vclt_s32(i, j), + |a: i32, b: i32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_s32() { + testq_cmp_s32( + |i, j| vcltq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u8() { + test_cmp_u8( + |i, j| vclt_u8(i, j), + |a: u8, b: u8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u8() { + testq_cmp_u8( + |i, j| vcltq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a < b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u16() { + test_cmp_u16( + |i, j| vclt_u16(i, j), + |a: u16, b: u16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u16() { + testq_cmp_u16( + |i, j| vcltq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a < b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vclt_u32() { + test_cmp_u32( + |i, j| vclt_u32(i, j), + |a: u32, b: u32| -> u32 { + if a < b { + 0xFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_u32() { + testq_cmp_u32( + |i, j| vcltq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vclt_f32() { + test_cmp_f32( + |i, j| vclt_f32(i, j), + |a: f32, b: f32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcltq_f32() { + testq_cmp_f32( + |i, j| vcltq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a < b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s8() { + test_cmp_s8( + |i, j| vcle_s8(i, j), + |a: i8, b: i8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s8() { + testq_cmp_s8( + |i, j| vcleq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s16() { + test_cmp_s16( + |i, j| vcle_s16(i, j), + |a: i16, b: i16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s16() { + testq_cmp_s16( + |i, j| vcleq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_s32() { + test_cmp_s32( + |i, j| vcle_s32(i, j), + |a: i32, b: i32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_s32() { + testq_cmp_s32( + |i, j| vcleq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u8() { + test_cmp_u8( + |i, j| vcle_u8(i, j), + |a: u8, b: u8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u8() { + testq_cmp_u8( + |i, j| vcleq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a <= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u16() { + test_cmp_u16( + |i, j| vcle_u16(i, j), + |a: u16, b: u16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u16() { + testq_cmp_u16( + |i, j| vcleq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a <= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcle_u32() { + test_cmp_u32( + |i, j| vcle_u32(i, j), + |a: u32, b: u32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_u32() { + testq_cmp_u32( + |i, j| vcleq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcle_f32() { + test_cmp_f32( + |i, j| vcle_f32(i, j), + |a: f32, b: f32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcleq_f32() { + testq_cmp_f32( + |i, j| vcleq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a <= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s8() { + test_cmp_s8( + |i, j| vcge_s8(i, j), + |a: i8, b: i8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s8() { + testq_cmp_s8( + |i, j| vcgeq_s8(i, j), + |a: i8, b: i8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s16() { + test_cmp_s16( + |i, j| vcge_s16(i, j), + |a: i16, b: i16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s16() { + testq_cmp_s16( + |i, j| vcgeq_s16(i, j), + |a: i16, b: i16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_s32() { + test_cmp_s32( + |i, j| vcge_s32(i, j), + |a: i32, b: i32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_s32() { + testq_cmp_s32( + |i, j| vcgeq_s32(i, j), + |a: i32, b: i32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u8() { + test_cmp_u8( + |i, j| vcge_u8(i, j), + |a: u8, b: u8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u8() { + testq_cmp_u8( + |i, j| vcgeq_u8(i, j), + |a: u8, b: u8| -> u8 { + if a >= b { + 0xFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u16() { + test_cmp_u16( + |i, j| vcge_u16(i, j), + |a: u16, b: u16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u16() { + testq_cmp_u16( + |i, j| vcgeq_u16(i, j), + |a: u16, b: u16| -> u16 { + if a >= b { + 0xFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcge_u32() { + test_cmp_u32( + |i, j| vcge_u32(i, j), + |a: u32, b: u32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_u32() { + testq_cmp_u32( + |i, j| vcgeq_u32(i, j), + |a: u32, b: u32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vcge_f32() { + test_cmp_f32( + |i, j| vcge_f32(i, j), + |a: f32, b: f32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcgeq_f32() { + testq_cmp_f32( + |i, j| vcgeq_f32(i, j), + |a: f32, b: f32| -> u32 { + if a >= b { + 0xFFFFFFFF + } else { + 0 + } + }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s8() { + test_ari_s8( + |i, j| vqsub_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s8() { + testq_ari_s8( + |i, j| vqsubq_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s16() { + test_ari_s16( + |i, j| vqsub_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s16() { + testq_ari_s16( + |i, j| vqsubq_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_s32() { + test_ari_s32( + |i, j| vqsub_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_s32() { + testq_ari_s32( + |i, j| vqsubq_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_sub(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u8() { + test_ari_u8( + |i, j| vqsub_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u8() { + testq_ari_u8( + |i, j| vqsubq_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u16() { + test_ari_u16( + |i, j| vqsub_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u16() { + testq_ari_u16( + |i, j| vqsubq_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsub_u32() { + test_ari_u32( + |i, j| vqsub_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_sub(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqsubq_u32() { + testq_ari_u32( + |i, j| vqsubq_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_sub(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s8() { + test_ari_s8(|i, j| vhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s8() { + testq_ari_s8(|i, j| vhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s16() { + test_ari_s16(|i, j| vhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s16() { + testq_ari_s16(|i, j| vhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_s32() { + test_ari_s32(|i, j| vhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_s32() { + testq_ari_s32(|i, j| vhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u8() { + test_ari_u8(|i, j| vhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u8() { + testq_ari_u8(|i, j| vhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u16() { + test_ari_u16(|i, j| vhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u16() { + testq_ari_u16(|i, j| vhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhadd_u32() { + test_ari_u32(|i, j| vhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhaddq_u32() { + testq_ari_u32(|i, j| vhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s8() { + test_ari_s8(|i, j| vrhadd_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s8() { + testq_ari_s8(|i, j| vrhaddq_s8(i, j), |a: i8, b: i8| -> i8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s16() { + test_ari_s16(|i, j| vrhadd_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s16() { + testq_ari_s16(|i, j| vrhaddq_s16(i, j), |a: i16, b: i16| -> i16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_s32() { + test_ari_s32(|i, j| vrhadd_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_s32() { + testq_ari_s32(|i, j| vrhaddq_s32(i, j), |a: i32, b: i32| -> i32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u8() { + test_ari_u8(|i, j| vrhadd_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u8() { + testq_ari_u8(|i, j| vrhaddq_u8(i, j), |a: u8, b: u8| -> u8 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u16() { + test_ari_u16(|i, j| vrhadd_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u16() { + testq_ari_u16(|i, j| vrhaddq_u16(i, j), |a: u16, b: u16| -> u16 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhadd_u32() { + test_ari_u32(|i, j| vrhadd_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrhaddq_u32() { + testq_ari_u32(|i, j| vrhaddq_u32(i, j), |a: u32, b: u32| -> u32 { a & b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s8() { + test_ari_s8( + |i, j| vqadd_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s8() { + testq_ari_s8( + |i, j| vqaddq_s8(i, j), + |a: i8, b: i8| -> i8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s16() { + test_ari_s16( + |i, j| vqadd_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s16() { + testq_ari_s16( + |i, j| vqaddq_s16(i, j), + |a: i16, b: i16| -> i16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_s32() { + test_ari_s32( + |i, j| vqadd_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_s32() { + testq_ari_s32( + |i, j| vqaddq_s32(i, j), + |a: i32, b: i32| -> i32 { a.saturating_add(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u8() { + test_ari_u8( + |i, j| vqadd_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u8() { + testq_ari_u8( + |i, j| vqaddq_u8(i, j), + |a: u8, b: u8| -> u8 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u16() { + test_ari_u16( + |i, j| vqadd_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u16() { + testq_ari_u16( + |i, j| vqaddq_u16(i, j), + |a: u16, b: u16| -> u16 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqadd_u32() { + test_ari_u32( + |i, j| vqadd_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_add(b) }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vqaddq_u32() { + testq_ari_u32( + |i, j| vqaddq_u32(i, j), + |a: u32, b: u32| -> u32 { a.saturating_add(b) }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s8() { + test_ari_s8( + |i, j| vmul_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s8() { + testq_ari_s8( + |i, j| vmulq_s8(i, j), + |a: i8, b: i8| -> i8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s16() { + test_ari_s16( + |i, j| vmul_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s16() { + testq_ari_s16( + |i, j| vmulq_s16(i, j), + |a: i16, b: i16| -> i16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_s32() { + test_ari_s32( + |i, j| vmul_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_s32() { + testq_ari_s32( + |i, j| vmulq_s32(i, j), + |a: i32, b: i32| -> i32 { a.overflowing_mul(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u8() { + test_ari_u8( + |i, j| vmul_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u8() { + testq_ari_u8( + |i, j| vmulq_u8(i, j), + |a: u8, b: u8| -> u8 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u16() { + test_ari_u16( + |i, j| vmul_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u16() { + testq_ari_u16( + |i, j| vmulq_u16(i, j), + |a: u16, b: u16| -> u16 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmul_u32() { + test_ari_u32( + |i, j| vmul_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_u32() { + testq_ari_u32( + |i, j| vmulq_u32(i, j), + |a: u32, b: u32| -> u32 { a.overflowing_mul(b).0 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vmul_f32() { + test_ari_f32(|i, j| vmul_f32(i, j), |a: f32, b: f32| -> f32 { a * b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vmulq_f32() { + testq_ari_f32(|i, j| vmulq_f32(i, j), |a: f32, b: f32| -> f32 { a * b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s8() { + test_ari_s8(|i, j| vsub_s8(i, j), |a: i8, b: i8| -> i8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s8() { + testq_ari_s8(|i, j| vsubq_s8(i, j), |a: i8, b: i8| -> i8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s16() { + test_ari_s16(|i, j| vsub_s16(i, j), |a: i16, b: i16| -> i16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s16() { + testq_ari_s16(|i, j| vsubq_s16(i, j), |a: i16, b: i16| -> i16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_s32() { + test_ari_s32(|i, j| vsub_s32(i, j), |a: i32, b: i32| -> i32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_s32() { + testq_ari_s32(|i, j| vsubq_s32(i, j), |a: i32, b: i32| -> i32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u8() { + test_ari_u8(|i, j| vsub_u8(i, j), |a: u8, b: u8| -> u8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u8() { + testq_ari_u8(|i, j| vsubq_u8(i, j), |a: u8, b: u8| -> u8 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u16() { + test_ari_u16(|i, j| vsub_u16(i, j), |a: u16, b: u16| -> u16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u16() { + testq_ari_u16(|i, j| vsubq_u16(i, j), |a: u16, b: u16| -> u16 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsub_u32() { + test_ari_u32(|i, j| vsub_u32(i, j), |a: u32, b: u32| -> u32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_u32() { + testq_ari_u32(|i, j| vsubq_u32(i, j), |a: u32, b: u32| -> u32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vsub_f32() { + test_ari_f32(|i, j| vsub_f32(i, j), |a: f32, b: f32| -> f32 { a - b }); + } + #[simd_test(enable = "neon")] + unsafe fn test_vsubq_f32() { + testq_ari_f32(|i, j| vsubq_f32(i, j), |a: f32, b: f32| -> f32 { a - b }); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s8() { + test_ari_s8( + |i, j| vhsub_s8(i, j), + |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s8() { + testq_ari_s8( + |i, j| vhsubq_s8(i, j), + |a: i8, b: i8| -> i8 { (((a as i16) - (b as i16)) / 2) as i8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s16() { + test_ari_s16( + |i, j| vhsub_s16(i, j), + |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s16() { + testq_ari_s16( + |i, j| vhsubq_s16(i, j), + |a: i16, b: i16| -> i16 { (((a as i32) - (b as i32)) / 2) as i16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_s32() { + test_ari_s32( + |i, j| vhsub_s32(i, j), + |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_s32() { + testq_ari_s32( + |i, j| vhsubq_s32(i, j), + |a: i32, b: i32| -> i32 { (((a as i64) - (b as i64)) / 2) as i32 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u8() { + test_ari_u8( + |i, j| vhsub_u8(i, j), + |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u8() { + testq_ari_u8( + |i, j| vhsubq_u8(i, j), + |a: u8, b: u8| -> u8 { (((a as u16) - (b as u16)) / 2) as u8 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u16() { + test_ari_u16( + |i, j| vhsub_u16(i, j), + |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u16() { + testq_ari_u16( + |i, j| vhsubq_u16(i, j), + |a: u16, b: u16| -> u16 { (((a as u16) - (b as u16)) / 2) as u16 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsub_u32() { + test_ari_u32( + |i, j| vhsub_u32(i, j), + |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 }, + ); + } + #[simd_test(enable = "neon")] + unsafe fn test_vhsubq_u32() { + testq_ari_u32( + |i, j| vhsubq_u32(i, j), + |a: u32, b: u32| -> u32 { (((a as u64) - (b as u64)) / 2) as u32 }, + ); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vabs_s8() { + let a = i8x8::new(-1, 0, 1, -2, 0, 2, -128, 127); + let r: i8x8 = transmute(vabs_s8(transmute(a))); + let e = i8x8::new(1, 0, 1, 2, 0, 2, -128, 127); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabsq_s8() { + let a = i8x16::new(-1, 0, 1, -2, 0, 2, -128, 127, -1, 0, 1, -2, 0, 2, -128, 127); + let r: i8x16 = transmute(vabsq_s8(transmute(a))); + let e = i8x16::new(1, 0, 1, 2, 0, 2, -128, 127, 1, 0, 1, 2, 0, 2, -128, 127); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabs_s16() { + let a = i16x4::new(-1, 0, i16::MIN, i16::MAX); + let r: i16x4 = transmute(vabs_s16(transmute(a))); + let e = i16x4::new(1, 0, i16::MIN, i16::MAX); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabsq_s16() { + let a = i16x8::new(-1, 0, i16::MIN, i16::MAX, -1, 0, i16::MIN, i16::MAX); + let r: i16x8 = transmute(vabsq_s16(transmute(a))); + let e = i16x8::new(1, 0, i16::MIN, i16::MAX, 1, 0, i16::MIN, i16::MAX); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabs_s32() { + let a = i32x2::new(i32::MIN, i32::MIN + 1); + let r: i32x2 = transmute(vabs_s32(transmute(a))); + let e = i32x2::new(i32::MIN, i32::MAX); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabsq_s32() { + let a = i32x4::new(i32::MIN, i32::MIN + 1, 0, -1); + let r: i32x4 = transmute(vabsq_s32(transmute(a))); + let e = i32x4::new(i32::MIN, i32::MAX, 0, 1); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vaba_s8() { + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = i8x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: i8x8 = transmute(vaba_s8(transmute(a), transmute(b), transmute(c))); + let e = i8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_s16() { + let a = i16x4::new(1, 2, 3, 4); + let b = i16x4::new(1, 1, 1, 1); + let c = i16x4::new(10, 9, 8, 7); + let r: i16x4 = transmute(vaba_s16(transmute(a), transmute(b), transmute(c))); + let e = i16x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_s32() { + let a = i32x2::new(1, 2); + let b = i32x2::new(1, 1); + let c = i32x2::new(10, 9); + let r: i32x2 = transmute(vaba_s32(transmute(a), transmute(b), transmute(c))); + let e = i32x2::new(10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = u8x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: u8x8 = transmute(vaba_u8(transmute(a), transmute(b), transmute(c))); + let e = u8x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(1, 1, 1, 1); + let c = u16x4::new(10, 9, 8, 7); + let r: u16x4 = transmute(vaba_u16(transmute(a), transmute(b), transmute(c))); + let e = u16x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vaba_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(1, 1); + let c = u32x2::new(10, 9); + let r: u32x2 = transmute(vaba_u32(transmute(a), transmute(b), transmute(c))); + let e = u32x2::new(10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_s8() { + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2); + let b = i8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let c = i8x16::new(10, 9, 8, 7, 6, 5, 4, 3, 12, 13, 14, 15, 16, 17, 18, 19); + let r: i8x16 = transmute(vabaq_s8(transmute(a), transmute(b), transmute(c))); + let e = i8x16::new( + 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, + ); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_s16() { + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = i16x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: i16x8 = transmute(vabaq_s16(transmute(a), transmute(b), transmute(c))); + let e = i16x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_s32() { + let a = i32x4::new(1, 2, 3, 4); + let b = i32x4::new(1, 1, 1, 1); + let c = i32x4::new(10, 9, 8, 7); + let r: i32x4 = transmute(vabaq_s32(transmute(a), transmute(b), transmute(c))); + let e = i32x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_u8() { + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 8, 7, 6, 5, 4, 3, 2); + let b = u8x16::new(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1); + let c = u8x16::new(10, 9, 8, 7, 6, 5, 4, 3, 12, 13, 14, 15, 16, 17, 18, 19); + let r: u8x16 = transmute(vabaq_u8(transmute(a), transmute(b), transmute(c))); + let e = u8x16::new( + 10, 10, 10, 10, 10, 10, 10, 10, 20, 20, 20, 20, 20, 20, 20, 20, + ); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_u16() { + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u16x8::new(1, 1, 1, 1, 1, 1, 1, 1); + let c = u16x8::new(10, 9, 8, 7, 6, 5, 4, 3); + let r: u16x8 = transmute(vabaq_u16(transmute(a), transmute(b), transmute(c))); + let e = u16x8::new(10, 10, 10, 10, 10, 10, 10, 10); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vabaq_u32() { + let a = u32x4::new(1, 2, 3, 4); + let b = u32x4::new(1, 1, 1, 1); + let c = u32x4::new(10, 9, 8, 7); + let r: u32x4 = transmute(vabaq_u32(transmute(a), transmute(b), transmute(c))); + let e = u32x4::new(10, 10, 10, 10); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_s16() { + let a = i16x4::new(1, 2, 3, 4); + let b = i16x4::new(0, -1, -2, -3); + let r: i16x4 = transmute(vpadd_s16(transmute(a), transmute(b))); + let e = i16x4::new(3, 7, -1, -5); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_s32() { + let a = i32x2::new(1, 2); + let b = i32x2::new(0, -1); + let r: i32x2 = transmute(vpadd_s32(transmute(a), transmute(b))); + let e = i32x2::new(3, -1); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_s8() { + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = i8x8::new(0, -1, -2, -3, -4, -5, -6, -7); + let r: i8x8 = transmute(vpadd_s8(transmute(a), transmute(b))); + let e = i8x8::new(3, 7, 11, 15, -1, -5, -9, -13); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_u16() { + let a = u16x4::new(1, 2, 3, 4); + let b = u16x4::new(30, 31, 32, 33); + let r: u16x4 = transmute(vpadd_u16(transmute(a), transmute(b))); + let e = u16x4::new(3, 7, 61, 65); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_u32() { + let a = u32x2::new(1, 2); + let b = u32x2::new(30, 31); + let r: u32x2 = transmute(vpadd_u32(transmute(a), transmute(b))); + let e = u32x2::new(3, 61); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vpadd_u8() { + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + let b = u8x8::new(30, 31, 32, 33, 34, 35, 36, 37); + let r: u8x8 = transmute(vpadd_u8(transmute(a), transmute(b))); + let e = u8x8::new(3, 7, 11, 15, 61, 65, 69, 73); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcnt_s8() { + let a: i8x8 = transmute(u8x8::new( + 0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000, + 0b00111111, + )); + let e = i8x8::new(3, 8, 0, 7, 2, 4, 1, 6); + let r: i8x8 = transmute(vcnt_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcntq_s8() { + let a: i8x16 = transmute(u8x16::new( + 0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000, + 0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111, + 0b11100000, 0b00010000, + )); + let e = i8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1); + let r: i8x16 = transmute(vcntq_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcnt_u8() { + let a = u8x8::new( + 0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000, + 0b00111111, + ); + let e = u8x8::new(3, 8, 0, 7, 2, 4, 1, 6); + let r: u8x8 = transmute(vcnt_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcntq_u8() { + let a = u8x16::new( + 0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000, + 0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111, + 0b11100000, 0b00010000, + ); + let e = u8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1); + let r: u8x16 = transmute(vcntq_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcnt_p8() { + let a = u8x8::new( + 0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000, + 0b00111111, + ); + let e = u8x8::new(3, 8, 0, 7, 2, 4, 1, 6); + let r: u8x8 = transmute(vcnt_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vcntq_p8() { + let a = u8x16::new( + 0b11001000, 0b11111111, 0b00000000, 0b11011111, 0b10000001, 0b10101001, 0b00001000, + 0b00111111, 0b11101110, 0b00000000, 0b11111111, 0b00100001, 0b11111111, 0b10010111, + 0b11100000, 0b00010000, + ); + let e = u8x16::new(3, 8, 0, 7, 2, 4, 1, 6, 6, 0, 8, 2, 8, 5, 3, 1); + let r: u8x16 = transmute(vcntq_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i8x8 = transmute(vrev16_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16q_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + let e: i8x16 = transmute(vrev16q_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: u8x8 = transmute(vrev16_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16q_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + let e: u8x16 = transmute(vrev16q_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16_p8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i8x8 = transmute(vrev16_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev16q_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14); + let e: u8x16 = transmute(vrev16q_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: i8x8 = transmute(vrev32_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + let e: i8x16 = transmute(vrev32q_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u8x8 = transmute(vrev32_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + let e: u8x16 = transmute(vrev32q_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_s16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(1, 0, 3, 2); + let e: i16x4 = transmute(vrev32_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i16x8 = transmute(vrev32q_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_p16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(1, 0, 3, 2); + let e: i16x4 = transmute(vrev32_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_p16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: i16x8 = transmute(vrev32q_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_u16() { + let a = u16x4::new(0, 1, 2, 3); + let r = u16x4::new(1, 0, 3, 2); + let e: u16x4 = transmute(vrev32_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u16x8::new(1, 0, 3, 2, 5, 4, 7, 6); + let e: u16x8 = transmute(vrev32q_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u8x8 = transmute(vrev32_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev32q_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12); + let e: u8x16 = transmute(vrev32q_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_s8() { + let a = i8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i8x8::new(7, 6, 5, 4, 3, 2, 1, 0); + let e: i8x8 = transmute(vrev64_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_s8() { + let a = i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = i8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + let e: i8x16 = transmute(vrev64q_s8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_s16() { + let a = i16x4::new(0, 1, 2, 3); + let r = i16x4::new(3, 2, 1, 0); + let e: i16x4 = transmute(vrev64_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_s16() { + let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = i16x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: i16x8 = transmute(vrev64q_s16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_s32() { + let a = i32x2::new(0, 1); + let r = i32x2::new(1, 0); + let e: i32x2 = transmute(vrev64_s32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_s32() { + let a = i32x4::new(0, 1, 2, 3); + let r = i32x4::new(1, 0, 3, 2); + let e: i32x4 = transmute(vrev64q_s32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_u8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(7, 6, 5, 4, 3, 2, 1, 0); + let e: u8x8 = transmute(vrev64_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_u8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + let e: u8x16 = transmute(vrev64q_u8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_u16() { + let a = u16x4::new(0, 1, 2, 3); + let r = u16x4::new(3, 2, 1, 0); + let e: u16x4 = transmute(vrev64_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_u16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u16x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u16x8 = transmute(vrev64q_u16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_u32() { + let a = u32x2::new(0, 1); + let r = u32x2::new(1, 0); + let e: u32x2 = transmute(vrev64_u32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_u32() { + let a = u32x4::new(0, 1, 2, 3); + let r = u32x4::new(1, 0, 3, 2); + let e: u32x4 = transmute(vrev64q_u32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_f32() { + let a = f32x2::new(1.0, 2.0); + let r = f32x2::new(2.0, 1.0); + let e: f32x2 = transmute(vrev64_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_f32() { + let a = f32x4::new(1.0, 2.0, -2.0, -1.0); + let r = f32x4::new(2.0, 1.0, -1.0, -2.0); + let e: f32x4 = transmute(vrev64q_f32(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_p8() { + let a = u8x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u8x8::new(7, 6, 5, 4, 3, 2, 1, 0); + let e: u8x8 = transmute(vrev64_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_p8() { + let a = u8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = u8x16::new(7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8); + let e: u8x16 = transmute(vrev64q_p8(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64_p16() { + let a = u16x4::new(0, 1, 2, 3); + let r = u16x4::new(3, 2, 1, 0); + let e: u16x4 = transmute(vrev64_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon")] + unsafe fn test_vrev64q_p16() { + let a = u16x8::new(0, 1, 2, 3, 4, 5, 6, 7); + let r = u16x8::new(3, 2, 1, 0, 7, 6, 5, 4); + let e: u16x8 = transmute(vrev64q_p16(transmute(a))); + assert_eq!(r, e); + } + #[simd_test(enable = "neon,i8mm")] + unsafe fn test_vmmlaq_s32() { + let a: i32x4 = i32x4::new(1, 3, 4, 9); + let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16); + let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vmmlaq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon,i8mm")] + unsafe fn test_vmmlaq_u32() { + let a: u32x4 = u32x4::new(1, 3, 4, 9); + let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16); + let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16); + let e: u32x4 = u32x4::new(1, 2, 3, 4); + let r: u32x4 = transmute(vmmlaq_u32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } + + #[simd_test(enable = "neon,i8mm")] + unsafe fn test_vusmmlaq_s32() { + let a: i32x4 = i32x4::new(1, 3, 4, 9); + let b: i8x16 = i8x16::new(1, 21, 31, 14, 5, 6, 17, 8, 9, 13, 15, 12, 13, 19, 20, 16); + let c: i8x16 = i8x16::new(12, 22, 3, 4, 5, 56, 7, 8, 91, 10, 11, 15, 13, 14, 17, 16); + let e: i32x4 = i32x4::new(1, 2, 3, 4); + let r: i32x4 = transmute(vusmmlaq_s32(transmute(a), transmute(b), transmute(c))); + assert_eq!(r, e); + } +} + +#[cfg(all(test, target_arch = "arm", target_endian = "little"))] +mod table_lookup_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod shift_and_insert_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod load_tests; + +#[cfg(all(test, target_arch = "arm"))] +mod store_tests; diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs new file mode 100644 index 000000000..ebb8b7b9e --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/shift_and_insert_tests.rs @@ -0,0 +1,93 @@ +//! Tests for ARM+v7+neon shift and insert (vsli[q]_n, vsri[q]_n) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +use crate::core_arch::simd::*; +use std::mem::transmute; +use stdarch_test::simd_test; + +macro_rules! test_vsli { + ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => { + #[simd_test(enable = "neon")] + #[allow(unused_assignments)] + unsafe fn $test_id() { + let a = [$($a as $t),*]; + let b = [$($b as $t),*]; + let n_bit_mask: $t = (1 << $n) - 1; + let e = [$(($a as $t & n_bit_mask) | ($b as $t << $n)),*]; + let r = $fn_id::<$n>(transmute(a), transmute(b)); + let mut d = e; + d = transmute(r); + assert_eq!(d, e); + } + } +} +test_vsli!(test_vsli_n_s8, i8 => vsli_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5)); +test_vsli!(test_vsliq_n_s8, i8 => vsliq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2)); +test_vsli!(test_vsli_n_s16, i16 => vsli_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7)); +test_vsli!(test_vsliq_n_s16, i16 => vsliq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14)); +test_vsli!(test_vsli_n_s32, i32 => vsli_n_s32([125683, -78901], [-128, -112944], 23)); +test_vsli!(test_vsliq_n_s32, i32 => vsliq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15)); +test_vsli!(test_vsli_n_s64, i64 => vsli_n_s64([-333333], [1028], 45)); +test_vsli!(test_vsliq_n_s64, i64 => vsliq_n_s64([-333333, -52023], [1028, -99814], 33)); +test_vsli!(test_vsli_n_u8, u8 => vsli_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsli!(test_vsliq_n_u8, u8 => vsliq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsli!(test_vsli_n_u16, u16 => vsli_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsli!(test_vsliq_n_u16, u16 => vsliq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +test_vsli!(test_vsli_n_u32, u32 => vsli_n_u32([125683, 78901], [128, 112944], 23)); +test_vsli!(test_vsliq_n_u32, u32 => vsliq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15)); +test_vsli!(test_vsli_n_u64, u64 => vsli_n_u64([333333], [1028], 45)); +test_vsli!(test_vsliq_n_u64, u64 => vsliq_n_u64([333333, 52023], [1028, 99814], 33)); +test_vsli!(test_vsli_n_p8, i8 => vsli_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsli!(test_vsliq_n_p8, i8 => vsliq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsli!(test_vsli_n_p16, i16 => vsli_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsli!(test_vsliq_n_p16, i16 => vsliq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +test_vsli!(test_vsli_n_p64, i64 => vsli_n_p64([333333], [1028], 45)); +test_vsli!(test_vsliq_n_p64, i64 => vsliq_n_p64([333333, 52023], [1028, 99814], 33)); + +macro_rules! test_vsri { + ($test_id:ident, $t:ty => $fn_id:ident ([$($a:expr),*], [$($b:expr),*], $n:expr)) => { + #[simd_test(enable = "neon")] + #[allow(unused_assignments)] + unsafe fn $test_id() { + let a = [$($a as $t),*]; + let b = [$($b as $t),*]; + let n_bit_mask = ((1 as $t << $n) - 1).rotate_right($n); + let e = [$(($a as $t & n_bit_mask) | (($b as $t >> $n) & !n_bit_mask)),*]; + let r = $fn_id::<$n>(transmute(a), transmute(b)); + let mut d = e; + d = transmute(r); + assert_eq!(d, e); + } + } +} +test_vsri!(test_vsri_n_s8, i8 => vsri_n_s8([3, -44, 127, -56, 0, 24, -97, 10], [-128, -14, 125, -77, 27, 8, -1, 110], 5)); +test_vsri!(test_vsriq_n_s8, i8 => vsriq_n_s8([3, -44, 127, -56, 0, 24, -97, 10, -33, 1, -6, -39, 15, 101, -80, -1], [-128, -14, 125, -77, 27, 8, -1, 110, -4, -92, 111, 32, 1, -4, -29, 99], 2)); +test_vsri!(test_vsri_n_s16, i16 => vsri_n_s16([3304, -44, 2300, -546], [-1208, -140, 1225, -707], 7)); +test_vsri!(test_vsriq_n_s16, i16 => vsriq_n_s16([3304, -44, 2300, -20046, 0, 9924, -907, 1190], [-1208, -140, 4225, -707, 2701, 804, -71, 2110], 14)); +test_vsri!(test_vsri_n_s32, i32 => vsri_n_s32([125683, -78901], [-128, -112944], 23)); +test_vsri!(test_vsriq_n_s32, i32 => vsriq_n_s32([125683, -78901, 127, -12009], [-128, -112944, 125, -707], 15)); +test_vsri!(test_vsri_n_s64, i64 => vsri_n_s64([-333333], [1028], 45)); +test_vsri!(test_vsriq_n_s64, i64 => vsriq_n_s64([-333333, -52023], [1028, -99814], 33)); +test_vsri!(test_vsri_n_u8, u8 => vsri_n_u8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsri!(test_vsriq_n_u8, u8 => vsriq_n_u8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsri!(test_vsri_n_u16, u16 => vsri_n_u16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsri!(test_vsriq_n_u16, u16 => vsriq_n_u16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +test_vsri!(test_vsri_n_u32, u32 => vsri_n_u32([125683, 78901], [128, 112944], 23)); +test_vsri!(test_vsriq_n_u32, u32 => vsriq_n_u32([125683, 78901, 127, 12009], [128, 112944, 125, 707], 15)); +test_vsri!(test_vsri_n_u64, u64 => vsri_n_u64([333333], [1028], 45)); +test_vsri!(test_vsriq_n_u64, u64 => vsriq_n_u64([333333, 52023], [1028, 99814], 33)); +test_vsri!(test_vsri_n_p8, i8 => vsri_n_p8([3, 44, 127, 56, 0, 24, 97, 10], [127, 14, 125, 77, 27, 8, 1, 110], 5)); +test_vsri!(test_vsriq_n_p8, i8 => vsriq_n_p8([3, 44, 127, 56, 0, 24, 97, 10, 33, 1, 6, 39, 15, 101, 80, 1], [127, 14, 125, 77, 27, 8, 1, 110, 4, 92, 111, 32, 1, 4, 29, 99], 2)); +test_vsri!(test_vsri_n_p16, i16 => vsri_n_p16([3304, 44, 2300, 546], [1208, 140, 1225, 707], 7)); +test_vsri!(test_vsriq_n_p16, i16 => vsriq_n_p16([3304, 44, 2300, 20046, 0, 9924, 907, 1190], [1208, 140, 4225, 707, 2701, 804, 71, 2110], 14)); +test_vsri!(test_vsri_n_p64, i64 => vsri_n_p64([333333], [1028], 45)); +test_vsri!(test_vsriq_n_p64, i64 => vsriq_n_p64([333333, 52023], [1028, 99814], 33)); diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs new file mode 100644 index 000000000..cad660e87 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/store_tests.rs @@ -0,0 +1,389 @@ +//! Tests for ARM+v7+neon store (vst1) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +use crate::core_arch::simd::*; +use stdarch_test::simd_test; + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s8() { + let mut vals = [0_i8; 9]; + let a = i8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_s8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s8() { + let mut vals = [0_i8; 17]; + let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_s8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s16() { + let mut vals = [0_i16; 5]; + let a = i16x4::new(1, 2, 3, 4); + + vst1_s16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s16() { + let mut vals = [0_i16; 9]; + let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_s16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s32() { + let mut vals = [0_i32; 3]; + let a = i32x2::new(1, 2); + + vst1_s32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s32() { + let mut vals = [0_i32; 5]; + let a = i32x4::new(1, 2, 3, 4); + + vst1q_s32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_s64() { + let mut vals = [0_i64; 2]; + let a = i64x1::new(1); + + vst1_s64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_s64() { + let mut vals = [0_i64; 3]; + let a = i64x2::new(1, 2); + + vst1q_s64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u8() { + let mut vals = [0_u8; 9]; + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_u8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u8() { + let mut vals = [0_u8; 17]; + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_u8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u16() { + let mut vals = [0_u16; 5]; + let a = u16x4::new(1, 2, 3, 4); + + vst1_u16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u16() { + let mut vals = [0_u16; 9]; + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_u16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u32() { + let mut vals = [0_u32; 3]; + let a = u32x2::new(1, 2); + + vst1_u32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u32() { + let mut vals = [0_u32; 5]; + let a = u32x4::new(1, 2, 3, 4); + + vst1q_u32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_u64() { + let mut vals = [0_u64; 2]; + let a = u64x1::new(1); + + vst1_u64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_u64() { + let mut vals = [0_u64; 3]; + let a = u64x2::new(1, 2); + + vst1q_u64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_p8() { + let mut vals = [0_u8; 9]; + let a = u8x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1_p8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_p8() { + let mut vals = [0_u8; 17]; + let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + + vst1q_p8(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); + assert_eq!(vals[9], 9); + assert_eq!(vals[10], 10); + assert_eq!(vals[11], 11); + assert_eq!(vals[12], 12); + assert_eq!(vals[13], 13); + assert_eq!(vals[14], 14); + assert_eq!(vals[15], 15); + assert_eq!(vals[16], 16); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_p16() { + let mut vals = [0_u16; 5]; + let a = u16x4::new(1, 2, 3, 4); + + vst1_p16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_p16() { + let mut vals = [0_u16; 9]; + let a = u16x8::new(1, 2, 3, 4, 5, 6, 7, 8); + + vst1q_p16(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); + assert_eq!(vals[3], 3); + assert_eq!(vals[4], 4); + assert_eq!(vals[5], 5); + assert_eq!(vals[6], 6); + assert_eq!(vals[7], 7); + assert_eq!(vals[8], 8); +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vst1_p64() { + let mut vals = [0_u64; 2]; + let a = u64x1::new(1); + + vst1_p64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); +} + +#[simd_test(enable = "neon,aes")] +unsafe fn test_vst1q_p64() { + let mut vals = [0_u64; 3]; + let a = u64x2::new(1, 2); + + vst1q_p64(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0); + assert_eq!(vals[1], 1); + assert_eq!(vals[2], 2); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1_f32() { + let mut vals = [0_f32; 3]; + let a = f32x2::new(1., 2.); + + vst1_f32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); +} + +#[simd_test(enable = "neon")] +unsafe fn test_vst1q_f32() { + let mut vals = [0_f32; 5]; + let a = f32x4::new(1., 2., 3., 4.); + + vst1q_f32(vals[1..].as_mut_ptr(), transmute(a)); + + assert_eq!(vals[0], 0.); + assert_eq!(vals[1], 1.); + assert_eq!(vals[2], 2.); + assert_eq!(vals[3], 3.); + assert_eq!(vals[4], 4.); +} diff --git a/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs new file mode 100644 index 000000000..15aa2f269 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/arm_shared/neon/table_lookup_tests.rs @@ -0,0 +1,1042 @@ +//! Tests for ARM+v7+neon table lookup (vtbl, vtbx) intrinsics. +//! +//! These are included in `{arm, aarch64}::neon`. + +use super::*; + +#[cfg(target_arch = "aarch64")] +use crate::core_arch::aarch64::*; + +#[cfg(target_arch = "arm")] +use crate::core_arch::arm::*; + +use crate::core_arch::simd::*; +use std::mem; +use stdarch_test::simd_test; + +macro_rules! test_vtbl { + ($test_name:ident => $fn_id:ident: + - table[$table_t:ident]: [$($table_v:expr),*] | + $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* + ) => { + #[simd_test(enable = "neon")] + unsafe fn $test_name() { + // create table as array, and transmute it to + // arm's table type + let table: $table_t = mem::transmute([$($table_v),*]); + + // For each control vector, perform a table lookup and + // verify the result: + $( + { + let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]); + let result = $fn_id(table, mem::transmute(ctrl)); + let result: $ctrl_t = mem::transmute(result); + let expected: $ctrl_t = mem::transmute([$($exp_v),*]); + assert_eq!(result, expected); + } + )* + } + } +} + +// ARM+v7+neon and AArch64+neon tests + +test_vtbl!( + test_vtbl1_s8 => vtbl1_s8: + - table[int8x8_t]: [0_i8, -11, 2, 3, 4, 5, 6, 7] | + - ctrl[i8x8]: [3_i8, 4, 1, 6, 0, 2, 7, 5] => [3_i8, 4, -11, 6, 0, 2, 7, 5] | + - ctrl[i8x8]: [3_i8, 8, 1, -9, 10, 2, 15, 5] => [3_i8, 0, -11, 0, 0, 2, 0, 5] +); + +test_vtbl!( + test_vtbl1_u8 => vtbl1_u8: + - table[uint8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 0, 1, 0, 0, 2, 0, 5] +); + +test_vtbl!( + test_vtbl1_p8 => vtbl1_p8: + - table[poly8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 0, 1, 0, 0, 2, 0, 5] +); + +test_vtbl!( + test_vtbl2_s8 => vtbl2_s8: + - table[int8x8x2_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [0_i8, -121, -17, -72, 34, -116, 51, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, -19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0] +); + +test_vtbl!( + test_vtbl2_u8 => vtbl2_u8: + - table[uint8x8x2_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 255, 17, 238, 34, 221, 51, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 0] +); + +test_vtbl!( + test_vtbl2_p8 => vtbl2_p8: + - table[poly8x8x2_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 255, 17, 238, 34, 221, 51, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 0] +); + +test_vtbl!( + test_vtbl3_s8 => vtbl3_s8: + - table[int8x8x3_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121, + 0, 1, -2, 3, 4, -5, 6, 7 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 19, 2, 13, 21, 12] => [0_i8, -121, -17, 3, 34, -116, -5, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, -27, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, -2] +); + +test_vtbl!( + test_vtbl3_u8 => vtbl3_u8: + - table[uint8x8x3_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 2] +); + +test_vtbl!( + test_vtbl3_p8 => vtbl3_p8: + - table[poly8x8x3_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 0, 119, 2] +); + +test_vtbl!( + test_vtbl4_s8 => vtbl4_s8: + - table[int8x8x4_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121, + 0, 1, -2, 3, 4, -5, 6, 7, + 8, -9, 10, 11, 12, -13, 14, 15 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 19, 2, 13, 25, 12] => [0_i8, -121, -17, 3, 34, -116, -9, -104] | + - ctrl[i8x8]: [4_i8, 11, 32, 10, -33, 27, 7, 18] => [68_i8, -117, 0, -84, 0, 11, 119, -2] +); + +test_vtbl!( + test_vtbl4_u8 => vtbl4_u8: + - table[uint8x8x4_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 11, 119, 2] +); + +test_vtbl!( + test_vtbl4_p8 => vtbl4_p8: + - table[poly8x8x4_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 136, 153, 170, 187, 204, 221, 238, 255, + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 19, 2, 13, 21, 12] => [0_u8, 255, 17, 3, 34, 221, 5, 204] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 27, 7, 18] => [68_u8, 187, 0, 170, 102, 11, 119, 2] +); + +macro_rules! test_vtbx { + ($test_name:ident => $fn_id:ident: + - table[$table_t:ident]: [$($table_v:expr),*] | + - ext[$ext_t:ident]: [$($ext_v:expr),*] | + $(- ctrl[$ctrl_t:ident]: [$($ctrl_v:expr),*] => [$($exp_v:expr),*])|* + ) => { + #[simd_test(enable = "neon")] + unsafe fn $test_name() { + // create table as array, and transmute it to + // arm's table type + let table: $table_t = mem::transmute([$($table_v),*]); + let ext: $ext_t = mem::transmute([$($ext_v),*]); + + // For each control vector, perform a table lookup and + // verify the result: + $( + { + let ctrl: $ctrl_t = mem::transmute([$($ctrl_v),*]); + let result = $fn_id(ext, table, mem::transmute(ctrl)); + let result: $ctrl_t = mem::transmute(result); + let expected: $ctrl_t = mem::transmute([$($exp_v),*]); + assert_eq!(result, expected); + } + )* + } + } +} + +test_vtbx!( + test_vtbx1_s8 => vtbx1_s8: + - table[int8x8_t]: [0_i8, 1, 2, -3, 4, 5, 6, 7] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 4, 1, 6, 0, 2, 7, 5] => [-3_i8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[i8x8]: [3_i8, 8, 1, 9, 10, 2, -15, 5] => [-3_i8, 51, 1, 53, 54, 2, 56, 5] +); + +test_vtbx!( + test_vtbx1_u8 => vtbx1_u8: + - table[uint8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ext[uint8x8_t]: [50_u8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 51, 1, 53, 54, 2, 56, 5] +); + +test_vtbx!( + test_vtbx1_p8 => vtbx1_p8: + - table[poly8x8_t]: [0_u8, 1, 2, 3, 4, 5, 6, 7] | + - ext[poly8x8_t]: [50_u8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 0, 2, 7, 5] => [3_u8, 4, 1, 6, 0, 2, 7, 5] | + - ctrl[u8x8]: [3_u8, 8, 1, 9, 10, 2, 15, 5] => [3_u8, 51, 1, 53, 54, 2, 56, 5] +); + +test_vtbx!( + test_vtbx2_s8 => vtbx2_s8: + - table[int8x8x2_t]: [0_i8, 1, 2, -3, 4, 5, 6, 7, 8, 9, -10, 11, 12, -13, 14, 15] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 4, 1, 6, 10, 2, 7, 15] => [-3_i8, 4, 1, 6, -10, 2, 7, 15] | + - ctrl[i8x8]: [3_i8, 8, 1, 10, 17, 2, 15, -19] => [-3_i8, 8, 1, -10, 54, 2, 15, 57] +); + +test_vtbx!( + test_vtbx2_u8 => vtbx2_u8: + - table[uint8x8x2_t]: [0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | + - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 10, 2, 7, 15] => [3_i8, 4, 1, 6, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 1, 10, 17, 2, 15, 19] => [3_i8, 8, 1, 10, 54, 2, 15, 57] +); + +test_vtbx!( + test_vtbx2_p8 => vtbx2_p8: + - table[poly8x8x2_t]: [0_i8, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] | + - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 1, 6, 10, 2, 7, 15] => [3_i8, 4, 1, 6, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 1, 10, 17, 2, 15, 19] => [3_i8, 8, 1, 10, 54, 2, 15, 57] +); + +test_vtbx!( + test_vtbx3_s8 => vtbx3_s8: + - table[int8x8x3_t]: [ + 0_i8, 1, 2, -3, 4, 5, 6, 7, + 8, 9, -10, 11, 12, -13, 14, 15, + 16, -17, 18, 19, 20, 21, 22, 23 ] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 4, 17, 22, 10, 2, 7, 15] => [-3_i8, 4, -17, 22, -10, 2, 7, 15] | + - ctrl[i8x8]: [3_i8, 8, 17, 10, 37, 2, 19, -29] => [-3_i8, 8, -17, -10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx3_u8 => vtbx3_u8: + - table[uint8x8x3_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23 ] | + - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 17, 22, 10, 2, 7, 15] => [3_i8, 4, 17, 22, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 29] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx3_p8 => vtbx3_p8: + - table[poly8x8x3_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23 ] | + - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 4, 17, 22, 10, 2, 7, 15] => [3_i8, 4, 17, 22, 10, 2, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 29] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx4_s8 => vtbx4_s8: + - table[int8x8x4_t]: [ + 0_i8, 1, 2, -3, 4, 5, 6, 7, + 8, 9, -10, 11, 12, -13, 14, 15, + 16, -17, 18, 19, 20, 21, 22, 23, + -24, 25, 26, -27, 28, -29, 30, 31] | + - ext[int8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[i8x8]: [3_i8, 31, 17, 22, 10, 29, 7, 15] => [-3_i8, 31, -17, 22, -10, -29, 7, 15] | + - ctrl[i8x8]: [3_i8, 8, 17, 10, 37, 2, 19, -42] => [-3_i8, 8, -17, -10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx4_u8 => vtbx4_u8: + - table[uint8x8x4_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31] | + - ext[uint8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 31, 17, 22, 10, 29, 7, 15] => [3_i8, 31, 17, 22, 10, 29, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 42] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +test_vtbx!( + test_vtbx4_p8 => vtbx4_p8: + - table[poly8x8x4_t]: [ + 0_i8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31] | + - ext[poly8x8_t]: [50_i8, 51, 52, 53, 54, 55, 56, 57] | + - ctrl[u8x8]: [3_u8, 31, 17, 22, 10, 29, 7, 15] => [3_i8, 31, 17, 22, 10, 29, 7, 15] | + - ctrl[u8x8]: [3_u8, 8, 17, 10, 37, 2, 19, 42] => [3_i8, 8, 17, 10, 54, 2, 19, 57] +); + +// Aarch64 tests + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl1_s8 => vqtbl1_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [0_i8, -121, -17, -72, 34, -116, 51, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 0, -84, 102, 0, 119, 0] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl1q_s8 => vqtbl1q_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ctrl[i8x16]: [127_i8, 15, 1, 14, 2, 13, 3, 12, 4_i8, 11, 16, 10, 6, 19, 7, 18] + => [0_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 0, -84, 102, 0, 119, 0] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl1_u8 => vqtbl1_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl1q_u8 => vqtbl1q_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl1_p8 => vqtbl1_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [0_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl1q_p8 => vqtbl1q_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [0_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 0, 84, 102, 0, 119, 0] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl2_s8 => vqtbl2_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 0, 10, 6, 0, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl2q_s8 => vqtbl2q_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 31, 32, 10, 6, 49, 7, 18] + => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 0, 10, 6, 0, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl2_u8 => vqtbl2_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl2q_u8 => vqtbl2q_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl2_p8 => vqtbl2_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl2q_p8 => vqtbl2q_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 0, 10, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl3_s8 => vqtbl3_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, 0, 6, 0, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl3q_s8 => vqtbl3q_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 32, 46, 51, 6, 49, 7, 18] + => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, 0, 6, 0, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl3_u8 => vqtbl3_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl3q_u8 => vqtbl3q_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl3_p8 => vqtbl3_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl3q_p8 => vqtbl3q_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 0, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl4_s8 => vqtbl4_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [0_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 0, -51, 6, 0, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl4q_s8 => vqtbl4q_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 46, 64, 51, 6, 71, 7, 18] + => [0_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 0, -51, 6, 0, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl4_u8 => vqtbl4_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl4q_u8 => vqtbl4q_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl4_p8 => vqtbl4_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [0_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbl!( + test_vqtbl4q_p8 => vqtbl4q_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [0_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 0, 51, 6, 0, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx1_s8 => vqtbx1_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [127_i8, 15, 1, 14, 2, 13, 3, 12] => [100_i8, -121, -17, -72, 34, -116, 51, -104] | + - ctrl[i8x8]: [4_i8, 11, 16, 10, 6, 19, 7, 18] => [68_i8, -117, 102, -84, 102, -105, 119, -107] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx1q_s8 => vqtbx1q_s8: + - table[int8x16_t]: [ + 0_i8, -17, 34, 51, 68, 85, 102, 119, + -106, -93, -84, -117, -104, -116, -72, -121 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [127_i8, 15, 1, 14, 2, 13, 3, 12, 4_i8, 11, 16, 10, 6, 19, 7, 18] + => [100_i8, -121, -17, -72, 34, -116, 51, -104, 68, -117, 110, -84, 102, -113, 119, -115] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx1_u8 => vqtbx1_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [100_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx1q_u8 => vqtbx1q_u8: + - table[uint8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx1_p8 => vqtbx1_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [127_u8, 15, 1, 14, 2, 13, 3, 12] => [100_u8, 121, 17, 72, 34, 116, 51, 104] | + - ctrl[u8x8]: [4_u8, 11, 16, 10, 6, 19, 7, 18] => [68_u8, 117, 102, 84, 102, 105, 119, 107] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx1q_p8 => vqtbx1q_p8: + - table[poly8x16_t]: [ + 0_u8, 17, 34, 51, 68, 85, 102, 119, + 106, 93, 84, 117, 104, 116, 72, 121 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [127_u8, 15, 1, 14, 2, 13, 3, 12, 4_u8, 11, 16, 10, 6, 19, 7, 18] + => [100_u8, 121, 17, 72, 34, 116, 51, 104, 68, 117, 110, 84, 102, 113, 119, 115] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx2_s8 => vqtbx2_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 31, 32, 10, 6, 49, 7, 18] => [4_i8, -31, 102, 10, 6, -105, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx2q_s8 => vqtbx2q_s8: + - table[int8x16x2_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 31, 32, 10, 6, 49, 7, 18] + => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, -31, 110, 10, 6, -113, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx2_u8 => vqtbx2_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx2q_u8 => vqtbx2q_u8: + - table[uint8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx2_p8 => vqtbx2_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 31, 32, 10, 6, 49, 7, 18] => [4_u8, 31, 102, 10, 6, 105, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx2q_p8 => vqtbx2q_p8: + - table[poly8x16x2_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 31, 32, 10, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 31, 110, 10, 6, 113, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx3_s8 => vqtbx3_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 32, 46, 51, 6, 49, 7, 18] => [4_i8, 32, 46, -103, 6, -105, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx3q_s8 => vqtbx3q_s8: + - table[int8x16x3_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 32, 46, 51, 6, 49, 7, 18] + => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 32, 46, -111, 6, -113, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx3_u8 => vqtbx3_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx3q_u8 => vqtbx3q_u8: + - table[uint8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx3_p8 => vqtbx3_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 32, 46, 51, 6, 49, 7, 18] => [4_u8, 32, 46, 103, 6, 105, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx3q_p8 => vqtbx3q_p8: + - table[poly8x16x3_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 32, 46, 51, 6, 49, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 32, 46, 111, 6, 113, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx4_s8 => vqtbx4_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ext[int8x8_t]: [100_i8, -101, 102, -103, 104, -105, 106, -107] | + - ctrl[i8x8]: [80_i8, 15, 1, 24, 2, 13, 3, 29] => [100_i8, -15, -1, 24, 2, -13, -3, -29] | + - ctrl[i8x8]: [4_i8, 46, 64, 51, 6, 71, 7, 18] => [4_i8, 46, 102, -51, 6, -105, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx4q_s8 => vqtbx4q_s8: + - table[int8x16x4_t]: [ + 0_i8, -1, 2, -3, 4, -5, 6, -7, + 8, -9, 10, -11, 12, -13, 14, -15, + 16, -17, 18, -19, 20, -21, 22, -23, + 24, -25, 26, -27, 28, -29, 30, -31, + 32, -33, 34, -35, 36, -37, 38, -39, + 40, -41, 42, -43, 44, -45, 46, -47, + 48, -49, 50, -51, 52, -53, 54, -55, + 56, -57, 58, -59, 60, -61, 62, -63 + ] | + - ext[int8x16_t]: [ + 100_i8, -101, 102, -103, 104, -105, 106, -107, + 108, -109, 110, -111, 112, -113, 114, -115 + ] | + - ctrl[i8x16]: [80_i8, 15, 1, 24, 2, 13, 3, 29, 4_i8, 46, 64, 51, 6, 71, 7, 18] + => [100_i8, -15, -1, 24, 2, -13, -3, -29, 4, 46, 110, -51, 6, -113, -7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx4_u8 => vqtbx4_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[uint8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx4q_u8 => vqtbx4q_u8: + - table[uint8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[uint8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx4_p8 => vqtbx4_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[poly8x8_t]: [100_u8, 101, 102, 103, 104, 105, 106, 107] | + - ctrl[u8x8]: [80_u8, 15, 1, 24, 2, 13, 3, 29] => [100_u8, 15, 1, 24, 2, 13, 3, 29] | + - ctrl[u8x8]: [4_u8, 46, 64, 51, 6, 71, 7, 18] => [4_u8, 46, 102, 51, 6, 105, 7, 18] +); + +#[cfg(target_arch = "aarch64")] +test_vtbx!( + test_vqtbx4q_p8 => vqtbx4q_p8: + - table[poly8x16x4_t]: [ + 0_u8, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63 + ] | + - ext[poly8x16_t]: [ + 100_u8, 101, 102, 103, 104, 105, 106, 107, + 108, 109, 110, 111, 112, 113, 114, 115 + ] | + - ctrl[u8x16]: [80_u8, 15, 1, 24, 2, 13, 3, 29, 4_u8, 46, 64, 51, 6, 71, 7, 18] + => [100_u8, 15, 1, 24, 2, 13, 3, 29, 4, 46, 110, 51, 6, 113, 7, 18] +); -- cgit v1.2.3