summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86_64
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86_64')
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/abm.rs62
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/adx.rs148
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/avx.rs48
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/avx2.rs47
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/avx512f.rs12346
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/bmi.rs183
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/bmi2.rs139
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/bswap.rs29
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/bt.rs135
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs73
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/fxsr.rs112
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/macros.rs36
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/mod.rs55
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/rdrand.rs44
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/sse.rs148
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/sse2.rs209
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/sse41.rs62
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/sse42.rs37
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/xsave.rs227
19 files changed, 14140 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86_64/abm.rs b/library/stdarch/crates/core_arch/src/x86_64/abm.rs
new file mode 100644
index 000000000..988074d67
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/abm.rs
@@ -0,0 +1,62 @@
+//! Advanced Bit Manipulation (ABM) instructions
+//!
+//! The POPCNT and LZCNT have their own CPUID bits to indicate support.
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref].
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+//! System Instructions][amd64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Counts the leading most significant zero bits.
+///
+/// When the operand is zero, it returns its size in bits.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64)
+#[inline]
+#[target_feature(enable = "lzcnt")]
+#[cfg_attr(test, assert_instr(lzcnt))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _lzcnt_u64(x: u64) -> u64 {
+ x.leading_zeros() as u64
+}
+
+/// Counts the bits that are set.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64)
+#[inline]
+#[target_feature(enable = "popcnt")]
+#[cfg_attr(test, assert_instr(popcnt))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _popcnt64(x: i64) -> i32 {
+ x.count_ones() as i32
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::arch::x86_64::*;
+
+ #[simd_test(enable = "lzcnt")]
+ unsafe fn test_lzcnt_u64() {
+ assert_eq!(_lzcnt_u64(0b0101_1010), 57);
+ }
+
+ #[simd_test(enable = "popcnt")]
+ unsafe fn test_popcnt64() {
+ assert_eq!(_popcnt64(0b0101_1010), 4);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
new file mode 100644
index 000000000..a54d71136
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs
@@ -0,0 +1,148 @@
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "unadjusted" {
+ #[link_name = "llvm.x86.addcarry.64"]
+ fn llvm_addcarry_u64(a: u8, b: u64, c: u64) -> (u8, u64);
+ #[link_name = "llvm.x86.addcarryx.u64"]
+ fn llvm_addcarryx_u64(a: u8, b: u64, c: u64, d: *mut u8) -> u8;
+ #[link_name = "llvm.x86.subborrow.64"]
+ fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64);
+}
+
+/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry flag), and store the unsigned 64-bit result in `out`, and the carry-out
+/// is returned (carry or overflow flag).
+#[inline]
+#[cfg_attr(test, assert_instr(adc))]
+#[stable(feature = "simd_x86_adx", since = "1.33.0")]
+pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
+ let (a, b) = llvm_addcarry_u64(c_in, a, b);
+ *out = b;
+ a
+}
+
+/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`
+/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and
+/// the carry-out is returned (carry or overflow flag).
+#[inline]
+#[target_feature(enable = "adx")]
+#[cfg_attr(test, assert_instr(adc))]
+#[stable(feature = "simd_x86_adx", since = "1.33.0")]
+pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
+ llvm_addcarryx_u64(c_in, a, b, out as *mut _ as *mut u8)
+}
+
+/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`.
+/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and
+/// the carry-out is returned (carry or overflow flag).
+#[inline]
+#[cfg_attr(test, assert_instr(sbb))]
+#[stable(feature = "simd_x86_adx", since = "1.33.0")]
+pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 {
+ let (a, b) = llvm_subborrow_u64(c_in, a, b);
+ *out = b;
+ a
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86_64::*;
+
+ #[test]
+ fn test_addcarry_u64() {
+ unsafe {
+ let a = u64::MAX;
+ let mut out = 0;
+
+ let r = _addcarry_u64(0, a, 1, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, 0);
+
+ let r = _addcarry_u64(0, a, 0, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, a);
+
+ let r = _addcarry_u64(1, a, 1, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, 1);
+
+ let r = _addcarry_u64(1, a, 0, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, 0);
+
+ let r = _addcarry_u64(0, 3, 4, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 7);
+
+ let r = _addcarry_u64(1, 3, 4, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 8);
+ }
+ }
+
+ #[simd_test(enable = "adx")]
+ unsafe fn test_addcarryx_u64() {
+ let a = u64::MAX;
+ let mut out = 0;
+
+ let r = _addcarry_u64(0, a, 1, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, 0);
+
+ let r = _addcarry_u64(0, a, 0, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, a);
+
+ let r = _addcarry_u64(1, a, 1, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, 1);
+
+ let r = _addcarry_u64(1, a, 0, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, 0);
+
+ let r = _addcarry_u64(0, 3, 4, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 7);
+
+ let r = _addcarry_u64(1, 3, 4, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 8);
+ }
+
+ #[test]
+ fn test_subborrow_u64() {
+ unsafe {
+ let a = u64::MAX;
+ let mut out = 0;
+
+ let r = _subborrow_u64(0, 0, 1, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, a);
+
+ let r = _subborrow_u64(0, 0, 0, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 0);
+
+ let r = _subborrow_u64(1, 0, 1, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, a - 1);
+
+ let r = _subborrow_u64(1, 0, 0, &mut out);
+ assert_eq!(r, 1);
+ assert_eq!(out, a);
+
+ let r = _subborrow_u64(0, 7, 3, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 4);
+
+ let r = _subborrow_u64(1, 7, 3, &mut out);
+ assert_eq!(r, 0);
+ assert_eq!(out, 3);
+ }
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs
new file mode 100644
index 000000000..7ba26371c
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs
@@ -0,0 +1,48 @@
+//! Advanced Vector Extensions (AVX)
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture
+//! Programmer's Manual, Volume 3: General-Purpose and System
+//! Instructions][amd64_ref].
+//!
+//! [Wikipedia][wiki] provides a quick overview of the instructions available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+
+use crate::{
+ core_arch::{simd_llvm::*, x86::*},
+ mem::transmute,
+};
+
+/// Copies `a` to result, and insert the 64-bit integer `i` into result
+/// at the location specified by `index`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64)
+#[inline]
+#[rustc_legacy_const_generics(2)]
+#[target_feature(enable = "avx")]
+// This intrinsic has no corresponding instruction.
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm256_insert_epi64<const INDEX: i32>(a: __m256i, i: i64) -> __m256i {
+ static_assert_imm2!(INDEX);
+ transmute(simd_insert(a.as_i64x4(), INDEX as u32, i))
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+
+ #[simd_test(enable = "avx")]
+ unsafe fn test_mm256_insert_epi64() {
+ let a = _mm256_setr_epi64x(1, 2, 3, 4);
+ let r = _mm256_insert_epi64::<3>(a, 0);
+ let e = _mm256_setr_epi64x(1, 2, 3, 0);
+ assert_eq_m256i(r, e);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
new file mode 100644
index 000000000..14447a137
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs
@@ -0,0 +1,47 @@
+//! Advanced Vector Extensions 2 (AVX)
+//!
+//! AVX2 expands most AVX commands to 256-bit wide vector registers and
+//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate).
+//!
+//! The references are:
+//!
+//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2:
+//! Instruction Set Reference, A-Z][intel64_ref].
+//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and
+//! System Instructions][amd64_ref].
+//!
+//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick
+//! overview of the instructions available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf
+//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions
+//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate
+
+use crate::core_arch::{simd_llvm::*, x86::*};
+
+/// Extracts a 64-bit integer from `a`, selected with `INDEX`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64)
+#[inline]
+#[target_feature(enable = "avx2")]
+#[rustc_legacy_const_generics(1)]
+// This intrinsic has no corresponding instruction.
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm256_extract_epi64<const INDEX: i32>(a: __m256i) -> i64 {
+ static_assert_imm2!(INDEX);
+ simd_extract(a.as_i64x4(), INDEX as u32)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::arch::x86_64::*;
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "avx2")]
+ unsafe fn test_mm256_extract_epi64() {
+ let a = _mm256_setr_epi64x(0, 1, 2, 3);
+ let r = _mm256_extract_epi64::<3>(a);
+ assert_eq!(r, 3);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
new file mode 100644
index 000000000..5eed0502c
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs
@@ -0,0 +1,12346 @@
+use crate::{
+ core_arch::{simd::*, simd_llvm::*, x86::*, x86_64::*},
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i64&expand=1792)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si))]
+pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 {
+ _mm_cvtsd_si64(a)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i64&expand=1894)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si))]
+pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 {
+ _mm_cvtss_si64(a)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u64&expand=1902)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi))]
+pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 {
+ transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u64&expand=1800)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi))]
+pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 {
+ transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2ss))]
+pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 {
+ let b = b as f32;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sd&expand=1644)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2sd))]
+pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d {
+ let b = b as f64;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtusi2ss))]
+pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 {
+ let b = b as f32;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtusi2sd))]
+pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d {
+ let b = b as f64;
+ let r = simd_insert(a, 0, b);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i64&expand=2016)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si))]
+pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 {
+ transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u64&expand=2021)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi))]
+pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 {
+ transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm_cvttss_i64&expand=2023)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si))]
+pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 {
+ transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u64&expand=2027)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi))]
+pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 {
+ transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
+}
+
+/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_cvt_roundi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f64x2();
+ let r = vcvtsi2sd64(a, b, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_cvt_roundsi64_sd<const ROUNDING: i32>(a: __m128d, b: i64) -> __m128d {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f64x2();
+ let r = vcvtsi2sd64(a, b, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_cvt_roundi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f32x4();
+ let r = vcvtsi2ss64(a, b, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_cvt_roundu64_sd<const ROUNDING: i32>(a: __m128d, b: u64) -> __m128d {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f64x2();
+ let r = vcvtusi2sd64(a, b, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_cvt_roundsi64_ss<const ROUNDING: i32>(a: __m128, b: i64) -> __m128 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f32x4();
+ let r = vcvtsi2ss64(a, b, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))]
+#[rustc_legacy_const_generics(2)]
+pub unsafe fn _mm_cvt_roundu64_ss<const ROUNDING: i32>(a: __m128, b: u64) -> __m128 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f32x4();
+ let r = vcvtusi2ss64(a, b, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvt_roundsd_si64<const ROUNDING: i32>(a: __m128d) -> i64 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f64x2();
+ let r = vcvtsd2si64(a, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvt_roundsd_i64<const ROUNDING: i32>(a: __m128d) -> i64 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f64x2();
+ let r = vcvtsd2si64(a, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvt_roundsd_u64<const ROUNDING: i32>(a: __m128d) -> u64 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f64x2();
+ let r = vcvtsd2usi64(a, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvt_roundss_si64<const ROUNDING: i32>(a: __m128) -> i64 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f32x4();
+ let r = vcvtss2si64(a, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvt_roundss_i64<const ROUNDING: i32>(a: __m128) -> i64 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f32x4();
+ let r = vcvtss2si64(a, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\
+/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\
+/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\
+/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\
+/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\
+/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\
+/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvt_roundss_u64<const ROUNDING: i32>(a: __m128) -> u64 {
+ static_assert_rounding!(ROUNDING);
+ let a = a.as_f32x4();
+ let r = vcvtss2usi64(a, ROUNDING);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtt_roundsd_si64<const SAE: i32>(a: __m128d) -> i64 {
+ static_assert_sae!(SAE);
+ let a = a.as_f64x2();
+ let r = vcvtsd2si64(a, SAE);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtt_roundsd_i64<const SAE: i32>(a: __m128d) -> i64 {
+ static_assert_sae!(SAE);
+ let a = a.as_f64x2();
+ let r = vcvtsd2si64(a, SAE);
+ transmute(r)
+}
+
+/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtt_roundsd_u64<const SAE: i32>(a: __m128d) -> u64 {
+ static_assert_sae!(SAE);
+ let a = a.as_f64x2();
+ let r = vcvtsd2usi64(a, SAE);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtt_roundss_i64<const SAE: i32>(a: __m128) -> i64 {
+ static_assert_sae!(SAE);
+ let a = a.as_f32x4();
+ let r = vcvtss2si64(a, SAE);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtt_roundss_si64<const SAE: i32>(a: __m128) -> i64 {
+ static_assert_sae!(SAE);
+ let a = a.as_f32x4();
+ let r = vcvtss2si64(a, SAE);
+ transmute(r)
+}
+
+/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\
+/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939)
+#[inline]
+#[target_feature(enable = "avx512f")]
+#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))]
+#[rustc_legacy_const_generics(1)]
+pub unsafe fn _mm_cvtt_roundss_u64<const SAE: i32>(a: __m128) -> u64 {
+ static_assert_sae!(SAE);
+ let a = a.as_f32x4();
+ let r = vcvtss2usi64(a, SAE);
+ transmute(r)
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512.vcvtss2si64"]
+ fn vcvtss2si64(a: f32x4, rounding: i32) -> i64;
+ #[link_name = "llvm.x86.avx512.vcvtss2usi64"]
+ fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64;
+ #[link_name = "llvm.x86.avx512.vcvtsd2si64"]
+ fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64;
+ #[link_name = "llvm.x86.avx512.vcvtsd2usi64"]
+ fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64;
+
+ #[link_name = "llvm.x86.avx512.cvtsi2ss64"]
+ fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4;
+ #[link_name = "llvm.x86.avx512.cvtsi2sd64"]
+ fn vcvtsi2sd64(a: f64x2, b: i64, rounding: i32) -> f64x2;
+ #[link_name = "llvm.x86.avx512.cvtusi642ss"]
+ fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4;
+ #[link_name = "llvm.x86.avx512.cvtusi642sd"]
+ fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2;
+}
+
+#[cfg(test)]
+mod tests {
+
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+ use crate::core_arch::x86_64::*;
+ use crate::hint::black_box;
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_abs_epi64() {
+ let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let r = _mm512_abs_epi64(a);
+ let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_abs_epi64() {
+ let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let r = _mm512_mask_abs_epi64(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_abs_epi64(a, 0b11111111, a);
+ let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_abs_epi64() {
+ let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let r = _mm512_maskz_abs_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_abs_epi64(0b11111111, a);
+ let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_abs_epi64() {
+ let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
+ let r = _mm256_abs_epi64(a);
+ let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_abs_epi64() {
+ let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
+ let r = _mm256_mask_abs_epi64(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_abs_epi64(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_abs_epi64() {
+ let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100);
+ let r = _mm256_maskz_abs_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_abs_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_abs_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let r = _mm512_abs_pd(a);
+ let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MAX, 100., 100., 32.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_abs_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let r = _mm512_mask_abs_pd(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_abs_pd(a, 0b00001111, a);
+ let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MIN, 100., -100., -32.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mov_epi64() {
+ let src = _mm512_set1_epi64(1);
+ let a = _mm512_set1_epi64(2);
+ let r = _mm512_mask_mov_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_mov_epi64(src, 0b11111111, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_mov_epi64() {
+ let a = _mm512_set1_epi64(2);
+ let r = _mm512_maskz_mov_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mov_epi64(0b11111111, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_mov_epi64() {
+ let src = _mm256_set1_epi64x(1);
+ let a = _mm256_set1_epi64x(2);
+ let r = _mm256_mask_mov_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_mov_epi64(src, 0b00001111, a);
+ assert_eq_m256i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_mov_epi64() {
+ let a = _mm256_set1_epi64x(2);
+ let r = _mm256_maskz_mov_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_mov_epi64(0b00001111, a);
+ assert_eq_m256i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_mov_epi64() {
+ let src = _mm_set1_epi64x(1);
+ let a = _mm_set1_epi64x(2);
+ let r = _mm_mask_mov_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_mov_epi64(src, 0b00000011, a);
+ assert_eq_m128i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_mov_epi64() {
+ let a = _mm_set1_epi64x(2);
+ let r = _mm_maskz_mov_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_mov_epi64(0b00000011, a);
+ assert_eq_m128i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mov_pd() {
+ let src = _mm512_set1_pd(1.);
+ let a = _mm512_set1_pd(2.);
+ let r = _mm512_mask_mov_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_mov_pd(src, 0b11111111, a);
+ assert_eq_m512d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_mov_pd() {
+ let a = _mm512_set1_pd(2.);
+ let r = _mm512_maskz_mov_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_mov_pd(0b11111111, a);
+ assert_eq_m512d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_mov_pd() {
+ let src = _mm256_set1_pd(1.);
+ let a = _mm256_set1_pd(2.);
+ let r = _mm256_mask_mov_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_mov_pd(src, 0b00001111, a);
+ assert_eq_m256d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_mov_pd() {
+ let a = _mm256_set1_pd(2.);
+ let r = _mm256_maskz_mov_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_mov_pd(0b00001111, a);
+ assert_eq_m256d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_mov_pd() {
+ let src = _mm_set1_pd(1.);
+ let a = _mm_set1_pd(2.);
+ let r = _mm_mask_mov_pd(src, 0, a);
+ assert_eq_m128d(r, src);
+ let r = _mm_mask_mov_pd(src, 0b00000011, a);
+ assert_eq_m128d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_mov_pd() {
+ let a = _mm_set1_pd(2.);
+ let r = _mm_maskz_mov_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_mov_pd(0b00000011, a);
+ assert_eq_m128d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_add_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let b = _mm512_set1_epi64(1);
+ let r = _mm512_add_epi64(a, b);
+ let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN + 1, 101, -99, -31);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_add_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let b = _mm512_set1_epi64(1);
+ let r = _mm512_mask_add_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_add_epi64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN, 100, -100, -32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_add_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let b = _mm512_set1_epi64(1);
+ let r = _mm512_maskz_add_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_add_epi64(0b00001111, a, b);
+ let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_add_epi64() {
+ let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_mask_add_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_add_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_add_epi64() {
+ let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_maskz_add_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_add_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_add_epi64() {
+ let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_mask_add_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_add_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_add_epi64() {
+ let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_maskz_add_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_add_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_add_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_add_pd(a, b);
+ let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN + 1., 101., -99., -31.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_add_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_mask_add_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_add_pd(a, 0b00001111, a, b);
+ let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN, 100., -100., -32.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_add_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_add_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_add_pd(0b00001111, a, b);
+ let e = _mm512_setr_pd(1., 2., 0., f64::MAX, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_add_pd() {
+ let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+ let b = _mm256_set1_pd(1.);
+ let r = _mm256_mask_add_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_add_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_add_pd() {
+ let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+ let b = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_add_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_add_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_add_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set1_pd(1.);
+ let r = _mm_mask_add_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_add_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(f64::MAX, f64::MIN + 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_add_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set1_pd(1.);
+ let r = _mm_maskz_add_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_add_pd(0b00000011, a, b);
+ let e = _mm_set_pd(f64::MAX, f64::MIN + 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sub_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let b = _mm512_set1_epi64(1);
+ let r = _mm512_sub_epi64(a, b);
+ let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MAX, 99, -101, -33);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sub_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let b = _mm512_set1_epi64(1);
+ let r = _mm512_mask_sub_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sub_epi64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MIN, 100, -100, -32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sub_epi64() {
+ let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32);
+ let b = _mm512_set1_epi64(1);
+ let r = _mm512_maskz_sub_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sub_epi64(0b00001111, a, b);
+ let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_sub_epi64() {
+ let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_mask_sub_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_sub_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_epi64() {
+ let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_maskz_sub_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_sub_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_sub_epi64() {
+ let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_mask_sub_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_sub_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_sub_epi64() {
+ let a = _mm_set_epi64x(i64::MAX, i64::MIN);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_maskz_sub_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_sub_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sub_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_sub_pd(a, b);
+ let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 99., -101., -33.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sub_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_mask_sub_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_sub_pd(a, 0b00001111, a, b);
+ let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 100., -100., -32.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sub_pd() {
+ let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_sub_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_sub_pd(0b00001111, a, b);
+ let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_sub_pd() {
+ let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+ let b = _mm256_set1_pd(1.);
+ let r = _mm256_mask_sub_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_sub_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_sub_pd() {
+ let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN);
+ let b = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_sub_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_sub_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_sub_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set1_pd(1.);
+ let r = _mm_mask_sub_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_sub_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(f64::MAX - 1., f64::MIN);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_sub_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set1_pd(1.);
+ let r = _mm_maskz_sub_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_sub_pd(0b00000011, a, b);
+ let e = _mm_set_pd(f64::MAX - 1., f64::MIN);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mul_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_mul_epi32(a, b);
+ let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mul_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_mask_mul_epi32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mul_epi32(a, 0b00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32,
+ 7, 5, 3, 1,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_mul_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_mul_epi32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mul_epi32(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_mul_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_mask_mul_epi32(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_mul_epi32(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(2, 4, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_maskz_mul_epi32(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_mul_epi32(0b00001111, a, b);
+ let e = _mm256_set_epi64x(2, 4, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_mul_epi32() {
+ let a = _mm_set1_epi32(1);
+ let b = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_mask_mul_epi32(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_mul_epi32(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(2, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_mul_epi32() {
+ let a = _mm_set1_epi32(1);
+ let b = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_maskz_mul_epi32(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_mul_epi32(0b00000011, a, b);
+ let e = _mm_set_epi64x(2, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mul_epu32() {
+ let a = _mm512_set1_epi32(1);
+ let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_mul_epu32(a, b);
+ let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mul_epu32() {
+ let a = _mm512_set1_epi32(1);
+ let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_mask_mul_epu32(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mul_epu32(a, 0b00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32,
+ 7, 5, 3, 1,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_mul_epu32() {
+ let a = _mm512_set1_epi32(1);
+ let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+ let r = _mm512_maskz_mul_epu32(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_mul_epu32(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_mul_epu32() {
+ let a = _mm256_set1_epi32(1);
+ let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_mask_mul_epu32(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_mul_epu32(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(2, 4, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_epu32() {
+ let a = _mm256_set1_epi32(1);
+ let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm256_maskz_mul_epu32(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_mul_epu32(0b00001111, a, b);
+ let e = _mm256_set_epi64x(2, 4, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_mul_epu32() {
+ let a = _mm_set1_epi32(1);
+ let b = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_mask_mul_epu32(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_mul_epu32(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(2, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_mul_epu32() {
+ let a = _mm_set1_epi32(1);
+ let b = _mm_set_epi32(1, 2, 3, 4);
+ let r = _mm_maskz_mul_epu32(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_mul_epu32(0b00000011, a, b);
+ let e = _mm_set_epi64x(2, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mullox_epi64() {
+ let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32);
+ let b = _mm512_set1_epi64(2);
+ let r = _mm512_mullox_epi64(a, b);
+ let e = _mm512_setr_epi64(0, 2, -2, 0, -2, 200, -200, -64);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mullox_epi64() {
+ let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32);
+ let b = _mm512_set1_epi64(2);
+ let r = _mm512_mask_mullox_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_mullox_epi64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(0, 2, -2, 0, i64::MAX, 100, -100, -32);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mul_pd() {
+ let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
+ let b = _mm512_set1_pd(2.);
+ let r = _mm512_mul_pd(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 0., 2., f64::INFINITY, f64::NEG_INFINITY,
+ f64::INFINITY, f64::NEG_INFINITY, -200., -64.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mul_pd() {
+ let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
+ let b = _mm512_set1_pd(2.);
+ let r = _mm512_mask_mul_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_mul_pd(a, 0b00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 0., 2., f64::INFINITY, f64::NEG_INFINITY,
+ f64::MAX, f64::MIN, -100., -32.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_mul_pd() {
+ let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
+ let b = _mm512_set1_pd(2.);
+ let r = _mm512_maskz_mul_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_mul_pd(0b00001111, a, b);
+ let e = _mm512_setr_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_mul_pd() {
+ let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+ let b = _mm256_set1_pd(2.);
+ let r = _mm256_mask_mul_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_mul_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_mul_pd() {
+ let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+ let b = _mm256_set1_pd(2.);
+ let r = _mm256_maskz_mul_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_mul_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_mul_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set1_pd(2.);
+ let r = _mm_mask_mul_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_mul_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_mul_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set1_pd(2.);
+ let r = _mm_maskz_mul_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_mul_pd(0b00000011, a, b);
+ let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_div_pd() {
+ let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
+ let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
+ let r = _mm512_div_pd(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 0., 0.5, f64::INFINITY, f64::NEG_INFINITY,
+ f64::INFINITY, f64::NEG_INFINITY, -50., -16.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_div_pd() {
+ let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
+ let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
+ let r = _mm512_mask_div_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_div_pd(a, 0b00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 0., 0.5, f64::INFINITY, f64::NEG_INFINITY,
+ f64::MAX, f64::MIN, -100., -32.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_div_pd() {
+ let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.);
+ let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.);
+ let r = _mm512_maskz_div_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_div_pd(0b00001111, a, b);
+ let e = _mm512_setr_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_div_pd() {
+ let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+ let b = _mm256_set_pd(2., 2., 0., 0.);
+ let r = _mm256_mask_div_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_div_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_div_pd() {
+ let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN);
+ let b = _mm256_set_pd(2., 2., 0., 0.);
+ let r = _mm256_maskz_div_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_div_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_div_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set_pd(0., 0.);
+ let r = _mm_mask_div_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_div_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_div_pd() {
+ let a = _mm_set_pd(f64::MAX, f64::MIN);
+ let b = _mm_set_pd(0., 0.);
+ let r = _mm_maskz_div_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_div_pd(0b00000011, a, b);
+ let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_max_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_max_epi64(a, b);
+ let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_max_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_max_epi64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_max_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_max_epi64(0b00001111, a, b);
+ let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_max_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_max_epi64(a, b);
+ let e = _mm256_set_epi64x(3, 2, 2, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_max_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_mask_max_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_max_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(3, 2, 2, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_max_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_maskz_max_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_max_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(3, 2, 2, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_max_epi64() {
+ let a = _mm_set_epi64x(2, 3);
+ let b = _mm_set_epi64x(3, 2);
+ let r = _mm_max_epi64(a, b);
+ let e = _mm_set_epi64x(3, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_max_epi64() {
+ let a = _mm_set_epi64x(2, 3);
+ let b = _mm_set_epi64x(3, 2);
+ let r = _mm_mask_max_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_max_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(3, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_max_epi64() {
+ let a = _mm_set_epi64x(2, 3);
+ let b = _mm_set_epi64x(3, 2);
+ let r = _mm_maskz_max_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_max_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(3, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_max_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_max_pd(a, b);
+ let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_mask_max_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_max_pd(a, 0b00001111, a, b);
+ let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_maskz_max_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_max_pd(0b00001111, a, b);
+ let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_max_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let b = _mm256_set_pd(3., 2., 1., 0.);
+ let r = _mm256_mask_max_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_max_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(3., 2., 2., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_max_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let b = _mm256_set_pd(3., 2., 1., 0.);
+ let r = _mm256_maskz_max_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_max_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(3., 2., 2., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_max_pd() {
+ let a = _mm_set_pd(2., 3.);
+ let b = _mm_set_pd(3., 2.);
+ let r = _mm_mask_max_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_max_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(3., 3.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_max_pd() {
+ let a = _mm_set_pd(2., 3.);
+ let b = _mm_set_pd(3., 2.);
+ let r = _mm_maskz_max_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_max_pd(0b00000011, a, b);
+ let e = _mm_set_pd(3., 3.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_max_epu64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_max_epu64(a, b);
+ let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_epu64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_max_epu64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_max_epu64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_epu64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_max_epu64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_max_epu64(0b00001111, a, b);
+ let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_max_epu64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_max_epu64(a, b);
+ let e = _mm256_set_epi64x(3, 2, 2, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_max_epu64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_mask_max_epu64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_max_epu64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(3, 2, 2, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_max_epu64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_maskz_max_epu64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_max_epu64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(3, 2, 2, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_max_epu64() {
+ let a = _mm_set_epi64x(2, 3);
+ let b = _mm_set_epi64x(3, 2);
+ let r = _mm_max_epu64(a, b);
+ let e = _mm_set_epi64x(3, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_max_epu64() {
+ let a = _mm_set_epi64x(2, 3);
+ let b = _mm_set_epi64x(3, 2);
+ let r = _mm_mask_max_epu64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_max_epu64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(3, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_max_epu64() {
+ let a = _mm_set_epi64x(2, 3);
+ let b = _mm_set_epi64x(3, 2);
+ let r = _mm_maskz_max_epu64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_max_epu64(0b00000011, a, b);
+ let e = _mm_set_epi64x(3, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_min_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_min_epi64(a, b);
+ let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_min_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_min_epi64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_min_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_min_epi64(0b00001111, a, b);
+ let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_min_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_min_epi64(a, b);
+ let e = _mm256_set_epi64x(0, 1, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_min_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_mask_min_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_min_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(0, 1, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_min_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_maskz_min_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_min_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(0, 1, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_min_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_min_pd(a, b);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_mask_min_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_min_pd(a, 0b00001111, a, b);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_maskz_min_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_min_pd(0b00001111, a, b);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_min_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let b = _mm256_set_pd(3., 2., 1., 0.);
+ let r = _mm256_mask_min_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_min_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(0., 1., 1., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_min_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let b = _mm256_set_pd(3., 2., 1., 0.);
+ let r = _mm256_maskz_min_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_min_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(0., 1., 1., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_min_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let b = _mm_set_pd(1., 0.);
+ let r = _mm_mask_min_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_min_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(0., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_min_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let b = _mm_set_pd(1., 0.);
+ let r = _mm_maskz_min_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_min_pd(0b00000011, a, b);
+ let e = _mm_set_pd(0., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_min_epu64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_min_epu64(a, b);
+ let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_epu64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_mask_min_epu64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_min_epu64(a, 0b00001111, a, b);
+ let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_epu64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0);
+ let r = _mm512_maskz_min_epu64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_min_epu64(0b00001111, a, b);
+ let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_min_epu64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_min_epu64(a, b);
+ let e = _mm256_set_epi64x(0, 1, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_min_epu64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_mask_min_epu64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_min_epu64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(0, 1, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_min_epu64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_maskz_min_epu64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_min_epu64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(0, 1, 1, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_min_epu64() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(1, 0);
+ let r = _mm_min_epu64(a, b);
+ let e = _mm_set_epi64x(0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_min_epu64() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(1, 0);
+ let r = _mm_mask_min_epu64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_min_epu64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_min_epu64() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(1, 0);
+ let r = _mm_maskz_min_epu64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_min_epu64(0b00000011, a, b);
+ let e = _mm_set_epi64x(0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sqrt_pd() {
+ let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.);
+ let r = _mm512_sqrt_pd(a);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sqrt_pd() {
+ let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.);
+ let r = _mm512_mask_sqrt_pd(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_sqrt_pd(a, 0b00001111, a);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 16., 25., 36., 49.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sqrt_pd() {
+ let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.);
+ let r = _mm512_maskz_sqrt_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_sqrt_pd(0b00001111, a);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_sqrt_pd() {
+ let a = _mm256_set_pd(0., 1., 4., 9.);
+ let r = _mm256_mask_sqrt_pd(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_sqrt_pd(a, 0b00001111, a);
+ let e = _mm256_set_pd(0., 1., 2., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_sqrt_pd() {
+ let a = _mm256_set_pd(0., 1., 4., 9.);
+ let r = _mm256_maskz_sqrt_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_sqrt_pd(0b00001111, a);
+ let e = _mm256_set_pd(0., 1., 2., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_sqrt_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let r = _mm_mask_sqrt_pd(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_sqrt_pd(a, 0b00000011, a);
+ let e = _mm_set_pd(0., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_sqrt_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let r = _mm_maskz_sqrt_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_sqrt_pd(0b00000011, a);
+ let e = _mm_set_pd(0., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmadd_pd() {
+ let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let r = _mm512_fmadd_pd(a, b, c);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmadd_pd() {
+ let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let r = _mm512_mask_fmadd_pd(a, 0, b, c);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmadd_pd(a, 0b00001111, b, c);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmadd_pd() {
+ let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let r = _mm512_maskz_fmadd_pd(0, a, b, c);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmadd_pd(0b00001111, a, b, c);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmadd_pd() {
+ let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
+ let r = _mm512_mask3_fmadd_pd(a, b, c, 0);
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmadd_pd(a, b, c, 0b00001111);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fmadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask_fmadd_pd(a, 0, b, c);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_fmadd_pd(a, 0b00001111, b, c);
+ let e = _mm256_set_pd(1., 2., 3., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fmadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_fmadd_pd(0, a, b, c);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_fmadd_pd(0b00001111, a, b, c);
+ let e = _mm256_set_pd(1., 2., 3., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask3_fmadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask3_fmadd_pd(a, b, c, 0);
+ assert_eq_m256d(r, c);
+ let r = _mm256_mask3_fmadd_pd(a, b, c, 0b00001111);
+ let e = _mm256_set_pd(1., 2., 3., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fmadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask_fmadd_pd(a, 0, b, c);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_fmadd_pd(a, 0b00000011, b, c);
+ let e = _mm_set_pd(1., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fmadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_maskz_fmadd_pd(0, a, b, c);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_fmadd_pd(0b00000011, a, b, c);
+ let e = _mm_set_pd(1., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask3_fmadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask3_fmadd_pd(a, b, c, 0);
+ assert_eq_m128d(r, c);
+ let r = _mm_mask3_fmadd_pd(a, b, c, 0b00000011);
+ let e = _mm_set_pd(1., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_fmsub_pd(a, b, c);
+ let e = _mm512_setr_pd(-1., 0., 1., 2., 3., 4., 5., 6.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fmsub_pd(a, 0, b, c);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmsub_pd(a, 0b00001111, b, c);
+ let e = _mm512_setr_pd(-1., 0., 1., 2., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fmsub_pd(0, a, b, c);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmsub_pd(0b00001111, a, b, c);
+ let e = _mm512_setr_pd(-1., 0., 1., 2., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
+ let r = _mm512_mask3_fmsub_pd(a, b, c, 0);
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmsub_pd(a, b, c, 0b00001111);
+ let e = _mm512_setr_pd(-1., 0., 1., 2., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fmsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask_fmsub_pd(a, 0, b, c);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_fmsub_pd(a, 0b00001111, b, c);
+ let e = _mm256_set_pd(-1., 0., 1., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fmsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_fmsub_pd(0, a, b, c);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_fmsub_pd(0b00001111, a, b, c);
+ let e = _mm256_set_pd(-1., 0., 1., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask3_fmsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask3_fmsub_pd(a, b, c, 0);
+ assert_eq_m256d(r, c);
+ let r = _mm256_mask3_fmsub_pd(a, b, c, 0b00001111);
+ let e = _mm256_set_pd(-1., 0., 1., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fmsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask_fmsub_pd(a, 0, b, c);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_fmsub_pd(a, 0b00000011, b, c);
+ let e = _mm_set_pd(-1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fmsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_maskz_fmsub_pd(0, a, b, c);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_fmsub_pd(0b00000011, a, b, c);
+ let e = _mm_set_pd(-1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask3_fmsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask3_fmsub_pd(a, b, c, 0);
+ assert_eq_m128d(r, c);
+ let r = _mm_mask3_fmsub_pd(a, b, c, 0b00000011);
+ let e = _mm_set_pd(-1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmaddsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_fmaddsub_pd(a, b, c);
+ let e = _mm512_setr_pd(-1., 2., 1., 4., 3., 6., 5., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmaddsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fmaddsub_pd(a, 0, b, c);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmaddsub_pd(a, 0b00001111, b, c);
+ let e = _mm512_setr_pd(-1., 2., 1., 4., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmaddsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fmaddsub_pd(0, a, b, c);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmaddsub_pd(0b00001111, a, b, c);
+ let e = _mm512_setr_pd(-1., 2., 1., 4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmaddsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
+ let r = _mm512_mask3_fmaddsub_pd(a, b, c, 0);
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmaddsub_pd(a, b, c, 0b00001111);
+ let e = _mm512_setr_pd(-1., 2., 1., 4., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fmaddsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask_fmaddsub_pd(a, 0, b, c);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_fmaddsub_pd(a, 0b00001111, b, c);
+ let e = _mm256_set_pd(1., 0., 3., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fmaddsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_fmaddsub_pd(0, a, b, c);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_fmaddsub_pd(0b00001111, a, b, c);
+ let e = _mm256_set_pd(1., 0., 3., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask3_fmaddsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask3_fmaddsub_pd(a, b, c, 0);
+ assert_eq_m256d(r, c);
+ let r = _mm256_mask3_fmaddsub_pd(a, b, c, 0b00001111);
+ let e = _mm256_set_pd(1., 0., 3., 2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fmaddsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask_fmaddsub_pd(a, 0, b, c);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_fmaddsub_pd(a, 0b00000011, b, c);
+ let e = _mm_set_pd(1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fmaddsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_maskz_fmaddsub_pd(0, a, b, c);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_fmaddsub_pd(0b00000011, a, b, c);
+ let e = _mm_set_pd(1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask3_fmaddsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask3_fmaddsub_pd(a, b, c, 0);
+ assert_eq_m128d(r, c);
+ let r = _mm_mask3_fmaddsub_pd(a, b, c, 0b00000011);
+ let e = _mm_set_pd(1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmsubadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_fmsubadd_pd(a, b, c);
+ let e = _mm512_setr_pd(1., 0., 3., 2., 5., 4., 7., 6.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmsubadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fmsubadd_pd(a, 0, b, c);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmsubadd_pd(a, 0b00001111, b, c);
+ let e = _mm512_setr_pd(1., 0., 3., 2., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmsubadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fmsubadd_pd(0, a, b, c);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmsubadd_pd(0b00001111, a, b, c);
+ let e = _mm512_setr_pd(1., 0., 3., 2., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmsubadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
+ let r = _mm512_mask3_fmsubadd_pd(a, b, c, 0);
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmsubadd_pd(a, b, c, 0b00001111);
+ let e = _mm512_setr_pd(1., 0., 3., 2., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fmsubadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask_fmsubadd_pd(a, 0, b, c);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_fmsubadd_pd(a, 0b00001111, b, c);
+ let e = _mm256_set_pd(-1., 2., 1., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fmsubadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_fmsubadd_pd(0, a, b, c);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_fmsubadd_pd(0b00001111, a, b, c);
+ let e = _mm256_set_pd(-1., 2., 1., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask3_fmsubadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask3_fmsubadd_pd(a, b, c, 0);
+ assert_eq_m256d(r, c);
+ let r = _mm256_mask3_fmsubadd_pd(a, b, c, 0b00001111);
+ let e = _mm256_set_pd(-1., 2., 1., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fmsubadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask_fmsubadd_pd(a, 0, b, c);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_fmsubadd_pd(a, 0b00000011, b, c);
+ let e = _mm_set_pd(-1., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fmsubadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_maskz_fmsubadd_pd(0, a, b, c);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_fmsubadd_pd(0b00000011, a, b, c);
+ let e = _mm_set_pd(-1., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask3_fmsubadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask3_fmsubadd_pd(a, b, c, 0);
+ assert_eq_m128d(r, c);
+ let r = _mm_mask3_fmsubadd_pd(a, b, c, 0b00000011);
+ let e = _mm_set_pd(-1., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fnmadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_fnmadd_pd(a, b, c);
+ let e = _mm512_setr_pd(1., 0., -1., -2., -3., -4., -5., -6.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fnmadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fnmadd_pd(a, 0, b, c);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fnmadd_pd(a, 0b00001111, b, c);
+ let e = _mm512_setr_pd(1., 0., -1., -2., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fnmadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fnmadd_pd(0, a, b, c);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fnmadd_pd(0b00001111, a, b, c);
+ let e = _mm512_setr_pd(1., 0., -1., -2., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fnmadd_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
+ let r = _mm512_mask3_fnmadd_pd(a, b, c, 0);
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fnmadd_pd(a, b, c, 0b00001111);
+ let e = _mm512_setr_pd(1., 0., -1., -2., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fnmadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask_fnmadd_pd(a, 0, b, c);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_fnmadd_pd(a, 0b00001111, b, c);
+ let e = _mm256_set_pd(1., 0., -1., -2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fnmadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_fnmadd_pd(0, a, b, c);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_fnmadd_pd(0b00001111, a, b, c);
+ let e = _mm256_set_pd(1., 0., -1., -2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask3_fnmadd_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask3_fnmadd_pd(a, b, c, 0);
+ assert_eq_m256d(r, c);
+ let r = _mm256_mask3_fnmadd_pd(a, b, c, 0b00001111);
+ let e = _mm256_set_pd(1., 0., -1., -2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fnmadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask_fnmadd_pd(a, 0, b, c);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_fnmadd_pd(a, 0b00000011, b, c);
+ let e = _mm_set_pd(1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fnmadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_maskz_fnmadd_pd(0, a, b, c);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_fnmadd_pd(0b00000011, a, b, c);
+ let e = _mm_set_pd(1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask3_fnmadd_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask3_fnmadd_pd(a, b, c, 0);
+ assert_eq_m128d(r, c);
+ let r = _mm_mask3_fnmadd_pd(a, b, c, 0b00000011);
+ let e = _mm_set_pd(1., 0.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fnmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_fnmsub_pd(a, b, c);
+ let e = _mm512_setr_pd(-1., -2., -3., -4., -5., -6., -7., -8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fnmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fnmsub_pd(a, 0, b, c);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fnmsub_pd(a, 0b00001111, b, c);
+ let e = _mm512_setr_pd(-1., -2., -3., -4., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fnmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fnmsub_pd(0, a, b, c);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fnmsub_pd(0b00001111, a, b, c);
+ let e = _mm512_setr_pd(-1., -2., -3., -4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fnmsub_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.);
+ let r = _mm512_mask3_fnmsub_pd(a, b, c, 0);
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fnmsub_pd(a, b, c, 0b00001111);
+ let e = _mm512_setr_pd(-1., -2., -3., -4., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fnmsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask_fnmsub_pd(a, 0, b, c);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_fnmsub_pd(a, 0b00001111, b, c);
+ let e = _mm256_set_pd(-1., -2., -3., -4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fnmsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_maskz_fnmsub_pd(0, a, b, c);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_fnmsub_pd(0b00001111, a, b, c);
+ let e = _mm256_set_pd(-1., -2., -3., -4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask3_fnmsub_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set_pd(0., 1., 2., 3.);
+ let c = _mm256_set1_pd(1.);
+ let r = _mm256_mask3_fnmsub_pd(a, b, c, 0);
+ assert_eq_m256d(r, c);
+ let r = _mm256_mask3_fnmsub_pd(a, b, c, 0b00001111);
+ let e = _mm256_set_pd(-1., -2., -3., -4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fnmsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask_fnmsub_pd(a, 0, b, c);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_fnmsub_pd(a, 0b00000011, b, c);
+ let e = _mm_set_pd(-1., -2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fnmsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_maskz_fnmsub_pd(0, a, b, c);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_fnmsub_pd(0b00000011, a, b, c);
+ let e = _mm_set_pd(-1., -2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask3_fnmsub_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set_pd(0., 1.);
+ let c = _mm_set1_pd(1.);
+ let r = _mm_mask3_fnmsub_pd(a, b, c, 0);
+ assert_eq_m128d(r, c);
+ let r = _mm_mask3_fnmsub_pd(a, b, c, 0b00000011);
+ let e = _mm_set_pd(-1., -2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rcp14_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_rcp14_pd(a);
+ let e = _mm512_set1_pd(0.3333320617675781);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rcp14_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_mask_rcp14_pd(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_rcp14_pd(a, 0b11110000, a);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 3., 3., 3., 3.,
+ 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, 0.3333320617675781,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rcp14_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_maskz_rcp14_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_rcp14_pd(0b11110000, a);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 0., 0., 0., 0.,
+ 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, 0.3333320617675781,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_rcp14_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_rcp14_pd(a);
+ let e = _mm256_set1_pd(0.3333320617675781);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_rcp14_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_mask_rcp14_pd(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_rcp14_pd(a, 0b00001111, a);
+ let e = _mm256_set1_pd(0.3333320617675781);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_rcp14_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_maskz_rcp14_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_rcp14_pd(0b00001111, a);
+ let e = _mm256_set1_pd(0.3333320617675781);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_rcp14_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_rcp14_pd(a);
+ let e = _mm_set1_pd(0.3333320617675781);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_rcp14_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_mask_rcp14_pd(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_rcp14_pd(a, 0b00000011, a);
+ let e = _mm_set1_pd(0.3333320617675781);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_rcp14_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_maskz_rcp14_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_rcp14_pd(0b00000011, a);
+ let e = _mm_set1_pd(0.3333320617675781);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rsqrt14_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_rsqrt14_pd(a);
+ let e = _mm512_set1_pd(0.5773391723632813);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rsqrt14_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_mask_rsqrt14_pd(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_rsqrt14_pd(a, 0b11110000, a);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 3., 3., 3., 3.,
+ 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, 0.5773391723632813,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rsqrt14_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_maskz_rsqrt14_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_rsqrt14_pd(0b11110000, a);
+ #[rustfmt::skip]
+ let e = _mm512_setr_pd(
+ 0., 0., 0., 0.,
+ 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, 0.5773391723632813,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_rsqrt14_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_mask_rsqrt14_pd(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_rsqrt14_pd(a, 0b00001111, a);
+ let e = _mm256_set1_pd(0.5773391723632813);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_rsqrt14_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_maskz_rsqrt14_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_rsqrt14_pd(0b00001111, a);
+ let e = _mm256_set1_pd(0.5773391723632813);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_rsqrt14_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_mask_rsqrt14_pd(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_rsqrt14_pd(a, 0b00000011, a);
+ let e = _mm_set1_pd(0.5773391723632813);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_rsqrt14_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_maskz_rsqrt14_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_rsqrt14_pd(0b00000011, a);
+ let e = _mm_set1_pd(0.5773391723632813);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_getexp_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_getexp_pd(a);
+ let e = _mm512_set1_pd(1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_getexp_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_mask_getexp_pd(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_getexp_pd(a, 0b11110000, a);
+ let e = _mm512_setr_pd(3., 3., 3., 3., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_getexp_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_maskz_getexp_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_getexp_pd(0b11110000, a);
+ let e = _mm512_setr_pd(0., 0., 0., 0., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_getexp_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_getexp_pd(a);
+ let e = _mm256_set1_pd(1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_getexp_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_mask_getexp_pd(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_getexp_pd(a, 0b00001111, a);
+ let e = _mm256_set1_pd(1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_getexp_pd() {
+ let a = _mm256_set1_pd(3.);
+ let r = _mm256_maskz_getexp_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_getexp_pd(0b00001111, a);
+ let e = _mm256_set1_pd(1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_getexp_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_getexp_pd(a);
+ let e = _mm_set1_pd(1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_getexp_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_mask_getexp_pd(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_getexp_pd(a, 0b00000011, a);
+ let e = _mm_set1_pd(1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_getexp_pd() {
+ let a = _mm_set1_pd(3.);
+ let r = _mm_maskz_getexp_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_getexp_pd(0b00000011, a);
+ let e = _mm_set1_pd(1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_roundscale_pd() {
+ let a = _mm512_set1_pd(1.1);
+ let r = _mm512_roundscale_pd::<0b00_00_00_00>(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_roundscale_pd() {
+ let a = _mm512_set1_pd(1.1);
+ let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
+ let e = _mm512_set1_pd(1.1);
+ assert_eq_m512d(r, e);
+ let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0b11111111, a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_roundscale_pd() {
+ let a = _mm512_set1_pd(1.1);
+ let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0b11111111, a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_roundscale_pd() {
+ let a = _mm256_set1_pd(1.1);
+ let r = _mm256_roundscale_pd::<0b00_00_00_00>(a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_roundscale_pd() {
+ let a = _mm256_set1_pd(1.1);
+ let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00001111, a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_roundscale_pd() {
+ let a = _mm256_set1_pd(1.1);
+ let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0b00001111, a);
+ let e = _mm256_set1_pd(1.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_roundscale_pd() {
+ let a = _mm_set1_pd(1.1);
+ let r = _mm_roundscale_pd::<0b00_00_00_00>(a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_roundscale_pd() {
+ let a = _mm_set1_pd(1.1);
+ let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a);
+ let e = _mm_set1_pd(1.1);
+ assert_eq_m128d(r, e);
+ let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00000011, a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_roundscale_pd() {
+ let a = _mm_set1_pd(1.1);
+ let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0b00000011, a);
+ let e = _mm_set1_pd(1.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_scalef_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_scalef_pd(a, b);
+ let e = _mm512_set1_pd(8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_scalef_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_mask_scalef_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_scalef_pd(a, 0b11110000, a, b);
+ let e = _mm512_set_pd(8., 8., 8., 8., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_scalef_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_maskz_scalef_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_scalef_pd(0b11110000, a, b);
+ let e = _mm512_set_pd(8., 8., 8., 8., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_scalef_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set1_pd(3.);
+ let r = _mm256_scalef_pd(a, b);
+ let e = _mm256_set1_pd(8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_scalef_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set1_pd(3.);
+ let r = _mm256_mask_scalef_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_scalef_pd(a, 0b00001111, a, b);
+ let e = _mm256_set1_pd(8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_scalef_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set1_pd(3.);
+ let r = _mm256_maskz_scalef_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_scalef_pd(0b00001111, a, b);
+ let e = _mm256_set1_pd(8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_scalef_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set1_pd(3.);
+ let r = _mm_scalef_pd(a, b);
+ let e = _mm_set1_pd(8.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_scalef_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set1_pd(3.);
+ let r = _mm_mask_scalef_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_scalef_pd(a, 0b00000011, a, b);
+ let e = _mm_set1_pd(8.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_scalef_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set1_pd(3.);
+ let r = _mm_maskz_scalef_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_scalef_pd(0b00000011, a, b);
+ let e = _mm_set1_pd(8.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fixupimm_pd() {
+ let a = _mm512_set1_pd(f64::NAN);
+ let b = _mm512_set1_pd(f64::MAX);
+ let c = _mm512_set1_epi64(i32::MAX as i64);
+ let r = _mm512_fixupimm_pd::<5>(a, b, c);
+ let e = _mm512_set1_pd(0.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fixupimm_pd() {
+ let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
+ let b = _mm512_set1_pd(f64::MAX);
+ let c = _mm512_set1_epi64(i32::MAX as i64);
+ let r = _mm512_mask_fixupimm_pd::<5>(a, 0b11110000, b, c);
+ let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fixupimm_pd() {
+ let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
+ let b = _mm512_set1_pd(f64::MAX);
+ let c = _mm512_set1_epi64(i32::MAX as i64);
+ let r = _mm512_maskz_fixupimm_pd::<5>(0b11110000, a, b, c);
+ let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_fixupimm_pd() {
+ let a = _mm256_set1_pd(f64::NAN);
+ let b = _mm256_set1_pd(f64::MAX);
+ let c = _mm256_set1_epi64x(i32::MAX as i64);
+ let r = _mm256_fixupimm_pd::<5>(a, b, c);
+ let e = _mm256_set1_pd(0.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_fixupimm_pd() {
+ let a = _mm256_set1_pd(f64::NAN);
+ let b = _mm256_set1_pd(f64::MAX);
+ let c = _mm256_set1_epi64x(i32::MAX as i64);
+ let r = _mm256_mask_fixupimm_pd::<5>(a, 0b00001111, b, c);
+ let e = _mm256_set1_pd(0.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_fixupimm_pd() {
+ let a = _mm256_set1_pd(f64::NAN);
+ let b = _mm256_set1_pd(f64::MAX);
+ let c = _mm256_set1_epi64x(i32::MAX as i64);
+ let r = _mm256_maskz_fixupimm_pd::<5>(0b00001111, a, b, c);
+ let e = _mm256_set1_pd(0.0);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_fixupimm_pd() {
+ let a = _mm_set1_pd(f64::NAN);
+ let b = _mm_set1_pd(f64::MAX);
+ let c = _mm_set1_epi64x(i32::MAX as i64);
+ let r = _mm_fixupimm_pd::<5>(a, b, c);
+ let e = _mm_set1_pd(0.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_fixupimm_pd() {
+ let a = _mm_set1_pd(f64::NAN);
+ let b = _mm_set1_pd(f64::MAX);
+ let c = _mm_set1_epi64x(i32::MAX as i64);
+ let r = _mm_mask_fixupimm_pd::<5>(a, 0b00000011, b, c);
+ let e = _mm_set1_pd(0.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_fixupimm_pd() {
+ let a = _mm_set1_pd(f64::NAN);
+ let b = _mm_set1_pd(f64::MAX);
+ let c = _mm_set1_epi64x(i32::MAX as i64);
+ let r = _mm_maskz_fixupimm_pd::<5>(0b00000011, a, b, c);
+ let e = _mm_set1_pd(0.0);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_ternarylogic_epi64() {
+ let a = _mm512_set1_epi64(1 << 2);
+ let b = _mm512_set1_epi64(1 << 1);
+ let c = _mm512_set1_epi64(1 << 0);
+ let r = _mm512_ternarylogic_epi64::<8>(a, b, c);
+ let e = _mm512_set1_epi64(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_ternarylogic_epi64() {
+ let src = _mm512_set1_epi64(1 << 2);
+ let a = _mm512_set1_epi64(1 << 1);
+ let b = _mm512_set1_epi64(1 << 0);
+ let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0, a, b);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0b11111111, a, b);
+ let e = _mm512_set1_epi64(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_ternarylogic_epi64() {
+ let a = _mm512_set1_epi64(1 << 2);
+ let b = _mm512_set1_epi64(1 << 1);
+ let c = _mm512_set1_epi64(1 << 0);
+ let r = _mm512_maskz_ternarylogic_epi64::<8>(0, a, b, c);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_ternarylogic_epi64::<8>(0b11111111, a, b, c);
+ let e = _mm512_set1_epi64(0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_ternarylogic_epi64() {
+ let a = _mm256_set1_epi64x(1 << 2);
+ let b = _mm256_set1_epi64x(1 << 1);
+ let c = _mm256_set1_epi64x(1 << 0);
+ let r = _mm256_ternarylogic_epi64::<8>(a, b, c);
+ let e = _mm256_set1_epi64x(0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_ternarylogic_epi64() {
+ let src = _mm256_set1_epi64x(1 << 2);
+ let a = _mm256_set1_epi64x(1 << 1);
+ let b = _mm256_set1_epi64x(1 << 0);
+ let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0, a, b);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0b00001111, a, b);
+ let e = _mm256_set1_epi64x(0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_ternarylogic_epi64() {
+ let a = _mm256_set1_epi64x(1 << 2);
+ let b = _mm256_set1_epi64x(1 << 1);
+ let c = _mm256_set1_epi64x(1 << 0);
+ let r = _mm256_maskz_ternarylogic_epi64::<9>(0, a, b, c);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_ternarylogic_epi64::<8>(0b00001111, a, b, c);
+ let e = _mm256_set1_epi64x(0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_ternarylogic_epi64() {
+ let a = _mm_set1_epi64x(1 << 2);
+ let b = _mm_set1_epi64x(1 << 1);
+ let c = _mm_set1_epi64x(1 << 0);
+ let r = _mm_ternarylogic_epi64::<8>(a, b, c);
+ let e = _mm_set1_epi64x(0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_ternarylogic_epi64() {
+ let src = _mm_set1_epi64x(1 << 2);
+ let a = _mm_set1_epi64x(1 << 1);
+ let b = _mm_set1_epi64x(1 << 0);
+ let r = _mm_mask_ternarylogic_epi64::<8>(src, 0, a, b);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_ternarylogic_epi64::<8>(src, 0b00000011, a, b);
+ let e = _mm_set1_epi64x(0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_ternarylogic_epi64() {
+ let a = _mm_set1_epi64x(1 << 2);
+ let b = _mm_set1_epi64x(1 << 1);
+ let c = _mm_set1_epi64x(1 << 0);
+ let r = _mm_maskz_ternarylogic_epi64::<9>(0, a, b, c);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_ternarylogic_epi64::<8>(0b00000011, a, b, c);
+ let e = _mm_set1_epi64x(0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_getmant_pd() {
+ let a = _mm512_set1_pd(10.);
+ let r = _mm512_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a);
+ let e = _mm512_set1_pd(1.25);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_getmant_pd() {
+ let a = _mm512_set1_pd(10.);
+ let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11110000, a);
+ let e = _mm512_setr_pd(10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_getmant_pd() {
+ let a = _mm512_set1_pd(10.);
+ let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11110000, a);
+ let e = _mm512_setr_pd(0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_getmant_pd() {
+ let a = _mm256_set1_pd(10.);
+ let r = _mm256_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a);
+ let e = _mm256_set1_pd(1.25);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_getmant_pd() {
+ let a = _mm256_set1_pd(10.);
+ let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a);
+ let e = _mm256_set1_pd(1.25);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_getmant_pd() {
+ let a = _mm256_set1_pd(10.);
+ let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a);
+ let e = _mm256_set1_pd(1.25);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_getmant_pd() {
+ let a = _mm_set1_pd(10.);
+ let r = _mm_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a);
+ let e = _mm_set1_pd(1.25);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_getmant_pd() {
+ let a = _mm_set1_pd(10.);
+ let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00000011, a);
+ let e = _mm_set1_pd(1.25);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_getmant_pd() {
+ let a = _mm_set1_pd(10.);
+ let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00000011, a);
+ let e = _mm_set1_pd(1.25);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtps_pd() {
+ let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtps_pd(a);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtps_pd() {
+ let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm512_set1_pd(0.);
+ let r = _mm512_mask_cvtps_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvtps_pd(src, 0b00001111, a);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtps_pd() {
+ let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvtps_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_cvtps_pd(0b00001111, a);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpslo_pd() {
+ let v2 = _mm512_setr_ps(
+ 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100.,
+ );
+ let r = _mm512_cvtpslo_pd(v2);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpslo_pd() {
+ let v2 = _mm512_setr_ps(
+ 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100.,
+ );
+ let src = _mm512_set1_pd(0.);
+ let r = _mm512_mask_cvtpslo_pd(src, 0, v2);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvtpslo_pd(src, 0b00001111, v2);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpd_ps() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtpd_ps(a);
+ let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpd_ps() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_ps(0.);
+ let r = _mm512_mask_cvtpd_ps(src, 0, a);
+ assert_eq_m256(r, src);
+ let r = _mm512_mask_cvtpd_ps(src, 0b00001111, a);
+ let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtpd_ps() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvtpd_ps(0, a);
+ assert_eq_m256(r, _mm256_setzero_ps());
+ let r = _mm512_maskz_cvtpd_ps(0b00001111, a);
+ let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_ps() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let src = _mm_set1_ps(0.);
+ let r = _mm256_mask_cvtpd_ps(src, 0, a);
+ assert_eq_m128(r, src);
+ let r = _mm256_mask_cvtpd_ps(src, 0b00001111, a);
+ let e = _mm_set_ps(4., -5.5, 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_ps() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let r = _mm256_maskz_cvtpd_ps(0, a);
+ assert_eq_m128(r, _mm_setzero_ps());
+ let r = _mm256_maskz_cvtpd_ps(0b00001111, a);
+ let e = _mm_set_ps(4., -5.5, 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtpd_ps() {
+ let a = _mm_set_pd(6., -7.5);
+ let src = _mm_set1_ps(0.);
+ let r = _mm_mask_cvtpd_ps(src, 0, a);
+ assert_eq_m128(r, src);
+ let r = _mm_mask_cvtpd_ps(src, 0b00000011, a);
+ let e = _mm_set_ps(0., 0., 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_ps() {
+ let a = _mm_set_pd(6., -7.5);
+ let r = _mm_maskz_cvtpd_ps(0, a);
+ assert_eq_m128(r, _mm_setzero_ps());
+ let r = _mm_maskz_cvtpd_ps(0b00000011, a);
+ let e = _mm_set_ps(0., 0., 6., -7.5);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtpd_epi32(a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvtpd_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtpd_epi32(src, 0b11111111, a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvtpd_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtpd_epi32(0b11111111, a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_epi32() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtpd_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, -6, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_epi32() {
+ let a = _mm256_set_pd(4., -5.5, 6., -7.5);
+ let r = _mm256_maskz_cvtpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtpd_epi32(0b00001111, a);
+ let e = _mm_set_epi32(4, -6, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtpd_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let r = _mm_maskz_cvtpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtpd_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpd_epu32() {
+ let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
+ let r = _mm512_cvtpd_epu32(a);
+ let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpd_epu32() {
+ let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvtpd_epu32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtpd_epu32(src, 0b11111111, a);
+ let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtpd_epu32() {
+ let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5);
+ let r = _mm512_maskz_cvtpd_epu32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtpd_epu32(0b11111111, a);
+ let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_cvtpd_epu32(a);
+ let e = _mm_set_epi32(4, 6, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtpd_epu32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 6, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_maskz_cvtpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtpd_epu32(0b00001111, a);
+ let e = _mm_set_epi32(4, 6, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_cvtpd_epu32(a);
+ let e = _mm_set_epi32(0, 0, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtpd_epu32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_maskz_cvtpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtpd_epu32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtpd_pslo() {
+ let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtpd_pslo(v2);
+ let e = _mm512_setr_ps(
+ 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtpd_pslo() {
+ let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm512_set1_ps(0.);
+ let r = _mm512_mask_cvtpd_pslo(src, 0, v2);
+ assert_eq_m512(r, src);
+ let r = _mm512_mask_cvtpd_pslo(src, 0b00001111, v2);
+ let e = _mm512_setr_ps(
+ 0., -1.5, 2., -3.5, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi8_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepi8_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepi8_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi8_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_cvtepi8_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepi8_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepi8_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepi8_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepi8_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepi8_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi8_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepi8_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi8_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepu8_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepu8_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepu8_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepu8_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_cvtepu8_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepu8_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu8_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu8_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu8_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepu8_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu8_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu8_epi64() {
+ let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu8_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu8_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi16_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepi16_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi16_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_cvtepi16_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepi16_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepi16_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepi16_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepi16_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepi16_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi16_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepi16_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi16_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepu16_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepu16_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepu16_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepu16_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_cvtepu16_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepu16_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu16_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu16_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu16_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepu16_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu16_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu16_epi64() {
+ let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu16_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu16_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi32_epi64() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi32_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi32_epi64() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepi32_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepi32_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi32_epi64() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi32_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_cvtepi32_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi32_epi64() {
+ let a = _mm_set_epi32(8, 9, 10, 11);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepi32_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepi32_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(8, 9, 10, 11);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi32_epi64() {
+ let a = _mm_set_epi32(8, 9, 10, 11);
+ let r = _mm256_maskz_cvtepi32_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepi32_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(8, 9, 10, 11);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi32_epi64() {
+ let a = _mm_set_epi32(8, 9, 10, 11);
+ let src = _mm_set1_epi64x(0);
+ let r = _mm_mask_cvtepi32_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi32_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(10, 11);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi32_epi64() {
+ let a = _mm_set_epi32(8, 9, 10, 11);
+ let r = _mm_maskz_cvtepi32_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi32_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(10, 11);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepu32_epi64() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepu32_epi64(a);
+ let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepu32_epi64() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_epi64(-1);
+ let r = _mm512_mask_cvtepu32_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_cvtepu32_epi64(src, 0b00001111, a);
+ let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepu32_epi64() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepu32_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_cvtepu32_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm256_set1_epi64x(-1);
+ let r = _mm256_mask_cvtepu32_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_cvtepu32_epi64(src, 0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu32_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_cvtepu32_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm_set1_epi64x(-1);
+ let r = _mm_mask_cvtepu32_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepu32_epi64(src, 0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu32_epi64() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu32_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepu32_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi32_pd() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi32_pd(a);
+ let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi32_pd() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_cvtepi32_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvtepi32_pd(src, 0b00001111, a);
+ let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi32_pd() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi32_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_cvtepi32_pd(0b00001111, a);
+ let e = _mm512_set_pd(0., 0., 0., 0., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm256_set1_pd(-1.);
+ let r = _mm256_mask_cvtepi32_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_cvtepi32_pd(src, 0b00001111, a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepi32_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_cvtepi32_pd(0b00001111, a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm_set1_pd(-1.);
+ let r = _mm_mask_cvtepi32_pd(src, 0, a);
+ assert_eq_m128d(r, src);
+ let r = _mm_mask_cvtepi32_pd(src, 0b00000011, a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_maskz_cvtepi32_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_cvtepi32_pd(0b00000011, a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepu32_pd() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepu32_pd(a);
+ let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepu32_pd() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_cvtepu32_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvtepu32_pd(src, 0b00001111, a);
+ let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepu32_pd() {
+ let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepu32_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_cvtepu32_pd(0b00001111, a);
+ let e = _mm512_set_pd(0., 0., 0., 0., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_cvtepu32_pd(a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm256_set1_pd(-1.);
+ let r = _mm256_mask_cvtepu32_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_cvtepu32_pd(src, 0b00001111, a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepu32_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_cvtepu32_pd(0b00001111, a);
+ let e = _mm256_set_pd(12., 13., 14., 15.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_cvtepu32_pd(a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let src = _mm_set1_pd(-1.);
+ let r = _mm_mask_cvtepu32_pd(src, 0, a);
+ assert_eq_m128d(r, src);
+ let r = _mm_mask_cvtepu32_pd(src, 0b00000011, a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepu32_pd() {
+ let a = _mm_set_epi32(12, 13, 14, 15);
+ let r = _mm_maskz_cvtepu32_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_cvtepu32_pd(0b00000011, a);
+ let e = _mm_set_pd(14., 15.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi32lo_pd() {
+ let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi32lo_pd(a);
+ let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi32lo_pd() {
+ let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_cvtepi32lo_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvtepi32lo_pd(src, 0b00001111, a);
+ let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepu32lo_pd() {
+ let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepu32lo_pd(a);
+ let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepu32lo_pd() {
+ let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_cvtepu32lo_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvtepu32lo_pd(src, 0b00001111, a);
+ let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi64_epi32() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi64_epi32(a);
+ let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi64_epi32() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm256_set1_epi32(-1);
+ let r = _mm512_mask_cvtepi64_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtepi64_epi32(src, 0b00001111, a);
+ let e = _mm256_set_epi32(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi64_epi32() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi64_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtepi64_epi32(0b00001111, a);
+ let e = _mm256_set_epi32(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtepi64_epi32() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_cvtepi64_epi32(a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_epi32() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtepi64_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi64_epi32() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let r = _mm256_maskz_cvtepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtepi64_epi32(0b00001111, a);
+ let e = _mm_set_epi32(1, 2, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtepi64_epi32() {
+ let a = _mm_set_epi64x(3, 4);
+ let r = _mm_cvtepi64_epi32(a);
+ let e = _mm_set_epi32(0, 0, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_epi32() {
+ let a = _mm_set_epi64x(3, 4);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi64_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi64_epi32() {
+ let a = _mm_set_epi64x(3, 4);
+ let r = _mm_maskz_cvtepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi64_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi64_epi16() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi64_epi16(a);
+ let e = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi64_epi16() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set1_epi16(-1);
+ let r = _mm512_mask_cvtepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm512_mask_cvtepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(-1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi64_epi16() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm512_maskz_cvtepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtepi64_epi16() {
+ let a = _mm256_set_epi64x(12, 13, 14, 15);
+ let r = _mm256_cvtepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_epi16() {
+ let a = _mm256_set_epi64x(12, 13, 14, 15);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtepi64_epi16(src, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi64_epi16() {
+ let a = _mm256_set_epi64x(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtepi64_epi16(0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtepi64_epi16() {
+ let a = _mm_set_epi64x(14, 15);
+ let r = _mm_cvtepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_epi16() {
+ let a = _mm_set_epi64x(14, 15);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi64_epi16(src, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi64_epi16() {
+ let a = _mm_set_epi64x(14, 15);
+ let r = _mm_maskz_cvtepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi64_epi16(0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtepi64_epi8() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_cvtepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi64_epi8() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_mask_cvtepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm512_mask_cvtepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtepi64_epi8() {
+ let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15);
+ let r = _mm512_maskz_cvtepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm512_maskz_cvtepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtepi64_epi8() {
+ let a = _mm256_set_epi64x(12, 13, 14, 15);
+ let r = _mm256_cvtepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_epi8() {
+ let a = _mm256_set_epi64x(12, 13, 14, 15);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtepi64_epi8() {
+ let a = _mm256_set_epi64x(12, 13, 14, 15);
+ let r = _mm256_maskz_cvtepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtepi64_epi8() {
+ let a = _mm_set_epi64x(14, 15);
+ let r = _mm_cvtepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_epi8() {
+ let a = _mm_set_epi64x(14, 15);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtepi64_epi8(src, 0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtepi64_epi8() {
+ let a = _mm_set_epi64x(14, 15);
+ let r = _mm_maskz_cvtepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtepi64_epi8(0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtsepi64_epi32() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let r = _mm512_cvtsepi64_epi32(a);
+ let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, i32::MIN, i32::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_epi32() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let src = _mm256_set1_epi32(-1);
+ let r = _mm512_mask_cvtsepi64_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtsepi64_epi32(src, 0b00001111, a);
+ let e = _mm256_set_epi32(-1, -1, -1, -1, 4, 5, i32::MIN, i32::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtsepi64_epi32() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let r = _mm512_maskz_cvtsepi64_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtsepi64_epi32(0b00001111, a);
+ let e = _mm256_set_epi32(0, 0, 0, 0, 4, 5, i32::MIN, i32::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_cvtsepi64_epi32(a);
+ let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi32(-1);
+ let r = _mm256_mask_cvtsepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi64_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_maskz_cvtsepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi64_epi32(0b00001111, a);
+ let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi64_epi32() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_cvtsepi64_epi32(a);
+ let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_epi32() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtsepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi64_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi64_epi32() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_maskz_cvtsepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi64_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtsepi64_epi16() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let r = _mm512_cvtsepi64_epi16(a);
+ let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_epi16() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi16(-1);
+ let r = _mm512_mask_cvtsepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm512_mask_cvtsepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(-1, -1, -1, -1, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtsepi64_epi16() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let r = _mm512_maskz_cvtsepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm512_maskz_cvtsepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_cvtsepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtsepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_maskz_cvtsepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi64_epi16() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_cvtsepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_epi16() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtsepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi64_epi16(src, 0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi64_epi16() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_maskz_cvtsepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi64_epi16(0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtsepi64_epi8() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let r = _mm512_cvtsepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_epi8() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_mask_cvtsepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm512_mask_cvtsepi64_epi8(src, 0b00001111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ -1, -1, -1, -1,
+ 4, 5, i8::MIN, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtsepi64_epi8() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX);
+ let r = _mm512_maskz_cvtsepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm512_maskz_cvtsepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtsepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_cvtsepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtsepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtsepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtsepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX);
+ let r = _mm256_maskz_cvtsepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtsepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtsepi64_epi8() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_cvtsepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_epi8() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtsepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtsepi64_epi8(src, 0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtsepi64_epi8() {
+ let a = _mm_set_epi64x(i64::MIN, i64::MAX);
+ let r = _mm_maskz_cvtsepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtsepi64_epi8(0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtusepi64_epi32() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let r = _mm512_cvtusepi64_epi32(a);
+ let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, -1, -1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_epi32() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let src = _mm256_set1_epi32(-1);
+ let r = _mm512_mask_cvtusepi64_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtusepi64_epi32(src, 0b00001111, a);
+ let e = _mm256_set_epi32(-1, -1, -1, -1, 4, 5, -1, -1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtusepi64_epi32() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let r = _mm512_maskz_cvtusepi64_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtusepi64_epi32(0b00001111, a);
+ let e = _mm256_set_epi32(0, 0, 0, 0, 4, 5, -1, -1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_cvtusepi64_epi32(a);
+ let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvtusepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi64_epi32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi64_epi32() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_maskz_cvtusepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi64_epi32(0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi64_epi32() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_cvtusepi64_epi32(a);
+ let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_epi32() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvtusepi64_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi64_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi64_epi32() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_maskz_cvtusepi64_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi64_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtusepi64_epi16() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let r = _mm512_cvtusepi64_epi16(a);
+ let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, -1, -1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_epi16() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let src = _mm_set1_epi16(-1);
+ let r = _mm512_mask_cvtusepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm512_mask_cvtusepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(-1, -1, -1, -1, 4, 5, -1, -1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtusepi64_epi16() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let r = _mm512_maskz_cvtusepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm512_maskz_cvtusepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, -1, -1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_cvtusepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm256_mask_cvtusepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi64_epi16(src, 0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi64_epi16() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_maskz_cvtusepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi64_epi16(0b00001111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi64_epi16() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_cvtusepi64_epi16(a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_epi16() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let src = _mm_set1_epi16(0);
+ let r = _mm_mask_cvtusepi64_epi16(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi64_epi16(src, 0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi64_epi16() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_maskz_cvtusepi64_epi16(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi64_epi16(0b00000011, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtusepi64_epi8() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let r = _mm512_cvtusepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, -1, -1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_epi8() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_mask_cvtusepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm512_mask_cvtusepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 4, 5, -1, -1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtusepi64_epi8() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN);
+ let r = _mm512_maskz_cvtusepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm512_maskz_cvtusepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, -1, -1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvtusepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_cvtusepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm256_mask_cvtusepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvtusepi64_epi8(src, 0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvtusepi64_epi8() {
+ let a = _mm256_set_epi64x(4, 5, 6, i64::MAX);
+ let r = _mm256_maskz_cvtusepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvtusepi64_epi8(0b00001111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvtusepi64_epi8() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_cvtusepi64_epi8(a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_epi8() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let src = _mm_set1_epi8(0);
+ let r = _mm_mask_cvtusepi64_epi8(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvtusepi64_epi8(src, 0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvtusepi64_epi8() {
+ let a = _mm_set_epi64x(6, i64::MAX);
+ let r = _mm_maskz_cvtusepi64_epi8(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvtusepi64_epi8(0b00000011, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtt_roundpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtt_roundpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtt_roundpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvtt_roundpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtt_roundpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvtt_roundpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvttpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvttpd_epi32(a);
+ let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvttpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvttpd_epi32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvttpd_epi32(src, 0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvttpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvttpd_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvttpd_epi32(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvttpd_epi32() {
+ let a = _mm256_setr_pd(4., -5.5, 6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvttpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvttpd_epi32(src, 0b00001111, a);
+ let e = _mm_setr_epi32(4, -5, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttpd_epi32() {
+ let a = _mm256_setr_pd(4., -5.5, 6., -7.5);
+ let r = _mm256_maskz_cvttpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvttpd_epi32(0b00001111, a);
+ let e = _mm_setr_epi32(4, -5, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvttpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvttpd_epi32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvttpd_epi32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvttpd_epi32() {
+ let a = _mm_set_pd(6., -7.5);
+ let r = _mm_maskz_cvttpd_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvttpd_epi32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, -7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvttpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvttpd_epu32(a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvttpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvttpd_epu32(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvttpd_epu32(src, 0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvttpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvttpd_epu32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvttpd_epu32(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cvttpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_cvttpd_epu32(a);
+ let e = _mm_set_epi32(4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvttpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm256_mask_cvttpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm256_mask_cvttpd_epu32(src, 0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_cvttpd_epu32() {
+ let a = _mm256_set_pd(4., 5.5, 6., 7.5);
+ let r = _mm256_maskz_cvttpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm256_maskz_cvttpd_epu32(0b00001111, a);
+ let e = _mm_set_epi32(4, 5, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cvttpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_cvttpd_epu32(a);
+ let e = _mm_set_epi32(0, 0, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvttpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let src = _mm_set1_epi32(0);
+ let r = _mm_mask_cvttpd_epu32(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_cvttpd_epu32(src, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_cvttpd_epu32() {
+ let a = _mm_set_pd(6., 7.5);
+ let r = _mm_maskz_cvttpd_epu32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_cvttpd_epu32(0b00000011, a);
+ let e = _mm_set_epi32(0, 0, 6, 7);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_add_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
+ let b = _mm512_set1_pd(-1.);
+ let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0);
+ assert_eq_m512d(r, e);
+ let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_add_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
+ let b = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, a, b,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_add_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
+ let b = _mm512_set1_pd(-1.);
+ let r =
+ _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sub_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0);
+ assert_eq_m512d(r, e);
+ let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sub_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, a, b,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sub_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let r =
+ _mm512_maskz_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mul_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.);
+ let b = _mm512_set1_pd(0.1);
+ let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_setr_pd(
+ 0.8,
+ 0.9500000000000001,
+ 1.,
+ 1.1500000000000001,
+ 1.2000000000000002,
+ 1.35,
+ 1.4000000000000001,
+ 0.,
+ );
+ assert_eq_m512d(r, e);
+ let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_setr_pd(0.8, 0.95, 1.0, 1.15, 1.2, 1.3499999999999999, 1.4, 0.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_mul_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.);
+ let b = _mm512_set1_pd(0.1);
+ let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, a, b,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(
+ 8.,
+ 9.5,
+ 10.,
+ 11.5,
+ 1.2000000000000002,
+ 1.35,
+ 1.4000000000000001,
+ 0.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_mul_round_pd() {
+ let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.);
+ let b = _mm512_set1_pd(0.1);
+ let r =
+ _mm512_maskz_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(
+ 0.,
+ 0.,
+ 0.,
+ 0.,
+ 1.2000000000000002,
+ 1.35,
+ 1.4000000000000001,
+ 0.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_div_round_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pd(0.3333333333333333);
+ assert_eq_m512d(r, e);
+ let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pd(0.3333333333333333);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_div_round_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, a, b,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(
+ 1.,
+ 1.,
+ 1.,
+ 1.,
+ 0.3333333333333333,
+ 0.3333333333333333,
+ 0.3333333333333333,
+ 0.3333333333333333,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_div_round_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r =
+ _mm512_maskz_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b11110000, a, b,
+ );
+ let e = _mm512_setr_pd(
+ 0.,
+ 0.,
+ 0.,
+ 0.,
+ 0.3333333333333333,
+ 0.3333333333333333,
+ 0.3333333333333333,
+ 0.3333333333333333,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sqrt_round_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set1_pd(1.7320508075688772);
+ assert_eq_m512d(r, e);
+ let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a);
+ let e = _mm512_set1_pd(1.7320508075688774);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sqrt_round_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r =
+ _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b11110000, a,
+ );
+ let e = _mm512_setr_pd(
+ 3.,
+ 3.,
+ 3.,
+ 3.,
+ 1.7320508075688772,
+ 1.7320508075688772,
+ 1.7320508075688772,
+ 1.7320508075688772,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sqrt_round_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r =
+ _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b11110000, a,
+ );
+ let e = _mm512_setr_pd(
+ 0.,
+ 0.,
+ 0.,
+ 0.,
+ 1.7320508075688772,
+ 1.7320508075688772,
+ 1.7320508075688772,
+ 1.7320508075688772,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(-1.);
+ assert_eq_m512d(r, e);
+ let r = _mm512_fmadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(-0.9999999999999999);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b00001111, b, c,
+ );
+ let e = _mm512_setr_pd(
+ -1.,
+ -1.,
+ -1.,
+ -1.,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_maskz_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00001111, a, b, c,
+ );
+ let e = _mm512_setr_pd(-1., -1., -1., -1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask3_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0b00001111,
+ );
+ let e = _mm512_setr_pd(-1., -1., -1., -1., -1., -1., -1., -1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(-1.);
+ assert_eq_m512d(r, e);
+ let r = _mm512_fmsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(-0.9999999999999999);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b00001111, b, c,
+ );
+ let e = _mm512_setr_pd(
+ -1.,
+ -1.,
+ -1.,
+ -1.,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00001111, a, b, c,
+ );
+ let e = _mm512_setr_pd(-1., -1., -1., -1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask3_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0b00001111,
+ );
+ let e = _mm512_setr_pd(-1., -1., -1., -1., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmaddsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r =
+ _mm512_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_setr_pd(1., -1., 1., -1., 1., -1., 1., -1.);
+ assert_eq_m512d(r, e);
+ let r = _mm512_fmaddsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_setr_pd(
+ 1.,
+ -0.9999999999999999,
+ 1.,
+ -0.9999999999999999,
+ 1.,
+ -0.9999999999999999,
+ 1.,
+ -0.9999999999999999,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmaddsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b00001111, b, c,
+ );
+ let e = _mm512_setr_pd(
+ 1.,
+ -1.,
+ 1.,
+ -1.,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmaddsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_maskz_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00001111, a, b, c,
+ );
+ let e = _mm512_setr_pd(1., -1., 1., -1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmaddsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask3_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0b00001111,
+ );
+ let e = _mm512_setr_pd(1., -1., 1., -1., -1., -1., -1., -1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fmsubadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r =
+ _mm512_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_setr_pd(-1., 1., -1., 1., -1., 1., -1., 1.);
+ assert_eq_m512d(r, e);
+ let r = _mm512_fmsubadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_setr_pd(
+ -0.9999999999999999,
+ 1.,
+ -0.9999999999999999,
+ 1.,
+ -0.9999999999999999,
+ 1.,
+ -0.9999999999999999,
+ 1.,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fmsubadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b00001111, b, c,
+ );
+ let e = _mm512_setr_pd(
+ -1.,
+ 1.,
+ -1.,
+ 1.,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fmsubadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_maskz_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00001111, a, b, c,
+ );
+ let e = _mm512_setr_pd(-1., 1., -1., 1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fmsubadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask3_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0b00001111,
+ );
+ let e = _mm512_setr_pd(-1., 1., -1., 1., -1., -1., -1., -1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fnmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r =
+ _mm512_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(1.);
+ assert_eq_m512d(r, e);
+ let r = _mm512_fnmadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(0.9999999999999999);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fnmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b00001111, b, c,
+ );
+ let e = _mm512_setr_pd(
+ 1.,
+ 1.,
+ 1.,
+ 1.,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fnmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_maskz_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00001111, a, b, c,
+ );
+ let e = _mm512_setr_pd(1., 1., 1., 1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fnmadd_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(1.);
+ let r = _mm512_mask3_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0b00001111,
+ );
+ let e = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fnmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r =
+ _mm512_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(1.);
+ assert_eq_m512d(r, e);
+ let r = _mm512_fnmsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c);
+ let e = _mm512_set1_pd(0.9999999999999999);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fnmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, b, c,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b00001111, b, c,
+ );
+ let e = _mm512_setr_pd(
+ 1.,
+ 1.,
+ 1.,
+ 1.,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ 0.000000000000000007,
+ );
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fnmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_maskz_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b, c,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b00001111, a, b, c,
+ );
+ let e = _mm512_setr_pd(1., 1., 1., 1., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask3_fnmsub_round_pd() {
+ let a = _mm512_set1_pd(0.000000000000000007);
+ let b = _mm512_set1_pd(1.);
+ let c = _mm512_set1_pd(-1.);
+ let r = _mm512_mask3_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0,
+ );
+ assert_eq_m512d(r, c);
+ let r = _mm512_mask3_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, b, c, 0b00001111,
+ );
+ let e = _mm512_setr_pd(1., 1., 1., 1., -1., -1., -1., -1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_max_round_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b);
+ let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_max_round_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b00001111, a, b);
+ let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_max_round_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a, b);
+ let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_min_round_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_min_round_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b00001111, a, b);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_min_round_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.);
+ let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a, b);
+ let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_getexp_round_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e = _mm512_set1_pd(1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_getexp_round_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11110000, a);
+ let e = _mm512_setr_pd(3., 3., 3., 3., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_getexp_round_pd() {
+ let a = _mm512_set1_pd(3.);
+ let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b11110000, a);
+ let e = _mm512_setr_pd(0., 0., 0., 0., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_roundscale_round_pd() {
+ let a = _mm512_set1_pd(1.1);
+ let r = _mm512_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_roundscale_round_pd() {
+ let a = _mm512_set1_pd(1.1);
+ let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a);
+ let e = _mm512_set1_pd(1.1);
+ assert_eq_m512d(r, e);
+ let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_roundscale_round_pd() {
+ let a = _mm512_set1_pd(1.1);
+ let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a);
+ let e = _mm512_set1_pd(1.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_scalef_round_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm512_set1_pd(8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_scalef_round_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0, a, b,
+ );
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ a, 0b11110000, a, b,
+ );
+ let e = _mm512_set_pd(8., 8., 8., 8., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_scalef_round_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(3.);
+ let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0, a, b,
+ );
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(
+ 0b11110000, a, b,
+ );
+ let e = _mm512_set_pd(8., 8., 8., 8., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_fixupimm_round_pd() {
+ let a = _mm512_set1_pd(f64::NAN);
+ let b = _mm512_set1_pd(f64::MAX);
+ let c = _mm512_set1_epi64(i32::MAX as i64);
+ let r = _mm512_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c);
+ let e = _mm512_set1_pd(0.0);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_fixupimm_round_pd() {
+ let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
+ let b = _mm512_set1_pd(f64::MAX);
+ let c = _mm512_set1_epi64(i32::MAX as i64);
+ let r = _mm512_mask_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11110000, b, c);
+ let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_fixupimm_round_pd() {
+ let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.);
+ let b = _mm512_set1_pd(f64::MAX);
+ let c = _mm512_set1_epi64(i32::MAX as i64);
+ let r = _mm512_maskz_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(0b11110000, a, b, c);
+ let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_getmant_round_pd() {
+ let a = _mm512_set1_pd(10.);
+ let r = _mm512_getmant_round_pd::<
+ _MM_MANT_NORM_1_2,
+ _MM_MANT_SIGN_SRC,
+ _MM_FROUND_CUR_DIRECTION,
+ >(a);
+ let e = _mm512_set1_pd(1.25);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_getmant_round_pd() {
+ let a = _mm512_set1_pd(10.);
+ let r = _mm512_mask_getmant_round_pd::<
+ _MM_MANT_NORM_1_2,
+ _MM_MANT_SIGN_SRC,
+ _MM_FROUND_CUR_DIRECTION,
+ >(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_getmant_round_pd::<
+ _MM_MANT_NORM_1_2,
+ _MM_MANT_SIGN_SRC,
+ _MM_FROUND_CUR_DIRECTION,
+ >(a, 0b11110000, a);
+ let e = _mm512_setr_pd(10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_getmant_round_pd() {
+ let a = _mm512_set1_pd(10.);
+ let r = _mm512_maskz_getmant_round_pd::<
+ _MM_MANT_NORM_1_2,
+ _MM_MANT_SIGN_SRC,
+ _MM_FROUND_CUR_DIRECTION,
+ >(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_getmant_round_pd::<
+ _MM_MANT_NORM_1_2,
+ _MM_MANT_SIGN_SRC,
+ _MM_FROUND_CUR_DIRECTION,
+ >(0b11110000, a);
+ let e = _mm512_setr_pd(0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvt_roundps_pd() {
+ let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvt_roundps_pd() {
+ let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm512_set1_pd(0.);
+ let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvt_roundps_pd() {
+ let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a);
+ let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvt_roundpd_ps() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvt_roundpd_ps() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_ps(0.);
+ let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
+ assert_eq_m256(r, src);
+ let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a);
+ let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvt_roundpd_ps() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0, a);
+ assert_eq_m256(r, _mm256_setzero_ps());
+ let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a);
+ let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.);
+ assert_eq_m256(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvt_roundpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvt_roundpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvt_roundpd_epi32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cvt_roundpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvt_roundpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let src = _mm256_set1_epi32(0);
+ let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_cvt_roundpd_epu32() {
+ let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5);
+ let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a);
+ let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_setzero_pd() {
+ assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.));
+ }
+
+ unsafe fn test_mm512_set1_epi64() {
+ let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2);
+ assert_eq_m512i(r, _mm512_set1_epi64(2));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_set1_pd() {
+ let expected = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.);
+ assert_eq_m512d(expected, _mm512_set1_pd(2.));
+ }
+
+ unsafe fn test_mm512_set4_epi64() {
+ let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
+ assert_eq_m512i(r, _mm512_set4_epi64(4, 3, 2, 1));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_set4_pd() {
+ let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.);
+ assert_eq_m512d(r, _mm512_set4_pd(4., 3., 2., 1.));
+ }
+
+ unsafe fn test_mm512_setr4_epi64() {
+ let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1);
+ assert_eq_m512i(r, _mm512_setr4_epi64(1, 2, 3, 4));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_setr4_pd() {
+ let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.);
+ assert_eq_m512d(r, _mm512_setr4_pd(1., 2., 3., 4.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmplt_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let m = _mm512_cmplt_pd_mask(a, b);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmplt_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmplt_pd_mask(mask, a, b);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpnlt_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ assert_eq!(_mm512_cmpnlt_pd_mask(a, b), !_mm512_cmplt_pd_mask(a, b));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpnlt_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let mask = 0b01111010;
+ assert_eq!(_mm512_mask_cmpnlt_pd_mask(mask, a, b), 0b01111010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmple_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ assert_eq!(_mm512_cmple_pd_mask(a, b), 0b00100101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmple_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let mask = 0b01111010;
+ assert_eq!(_mm512_mask_cmple_pd_mask(mask, a, b), 0b00100000);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpnle_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let m = _mm512_cmpnle_pd_mask(b, a);
+ assert_eq!(m, 0b00001101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpnle_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmpnle_pd_mask(mask, b, a);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpeq_pd_mask() {
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let m = _mm512_cmpeq_pd_mask(b, a);
+ assert_eq!(m, 0b11001101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpeq_pd_mask() {
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpeq_pd_mask(mask, b, a);
+ assert_eq!(r, 0b01001000);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpneq_pd_mask() {
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let m = _mm512_cmpneq_pd_mask(b, a);
+ assert_eq!(m, 0b00110010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpneq_pd_mask() {
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpneq_pd_mask(mask, b, a);
+ assert_eq!(r, 0b00110010)
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_pd_mask() {
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let m = _mm512_cmp_pd_mask::<_CMP_LT_OQ>(a, b);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_pd_mask() {
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmp_pd_mask() {
+ let a = _mm256_set_pd(0., 1., -1., 13.);
+ let b = _mm256_set1_pd(1.);
+ let m = _mm256_cmp_pd_mask::<_CMP_LT_OQ>(a, b);
+ assert_eq!(m, 0b00001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmp_pd_mask() {
+ let a = _mm256_set_pd(0., 1., -1., 13.);
+ let b = _mm256_set1_pd(1.);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b);
+ assert_eq!(r, 0b00001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmp_pd_mask() {
+ let a = _mm_set_pd(0., 1.);
+ let b = _mm_set1_pd(1.);
+ let m = _mm_cmp_pd_mask::<_CMP_LT_OQ>(a, b);
+ assert_eq!(m, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmp_pd_mask() {
+ let a = _mm_set_pd(0., 1.);
+ let b = _mm_set1_pd(1.);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b);
+ assert_eq!(r, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_round_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let m = _mm512_cmp_round_pd_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_round_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.);
+ let b = _mm512_set1_pd(-1.);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmp_round_pd_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpord_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
+ #[rustfmt::skip]
+ let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
+ let m = _mm512_cmpord_pd_mask(a, b);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpord_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
+ #[rustfmt::skip]
+ let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
+ let mask = 0b11000011;
+ let m = _mm512_mask_cmpord_pd_mask(mask, a, b);
+ assert_eq!(m, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpunord_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
+ #[rustfmt::skip]
+ let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
+ let m = _mm512_cmpunord_pd_mask(a, b);
+
+ assert_eq!(m, 0b11111010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpunord_pd_mask() {
+ #[rustfmt::skip]
+ let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.);
+ #[rustfmt::skip]
+ let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.);
+ let mask = 0b00001111;
+ let m = _mm512_mask_cmpunord_pd_mask(mask, a, b);
+ assert_eq!(m, 0b000001010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmplt_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let m = _mm512_cmplt_epu64_mask(a, b);
+ assert_eq!(m, 0b11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmplt_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmplt_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b01001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmplt_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, 100);
+ let b = _mm256_set1_epi64x(2);
+ let r = _mm256_cmplt_epu64_mask(a, b);
+ assert_eq!(r, 0b00001100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmplt_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, 100);
+ let b = _mm256_set1_epi64x(2);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmplt_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00001100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmplt_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(2);
+ let r = _mm_cmplt_epu64_mask(a, b);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmplt_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(2);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmplt_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpgt_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let m = _mm512_cmpgt_epu64_mask(b, a);
+ assert_eq!(m, 0b11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpgt_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b01001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpgt_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_cmpgt_epu64_mask(a, b);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpgt_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let b = _mm256_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpgt_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpgt_epu64_mask() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_cmpgt_epu64_mask(a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpgt_epu64_mask() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpgt_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmple_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ assert_eq!(
+ _mm512_cmple_epu64_mask(a, b),
+ !_mm512_cmpgt_epu64_mask(a, b)
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmple_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01111010;
+ assert_eq!(_mm512_mask_cmple_epu64_mask(mask, a, b), 0b01111010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmple_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, 1);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_cmple_epu64_mask(a, b);
+ assert_eq!(r, 0b00001101)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmple_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, 1);
+ let b = _mm256_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmple_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00001101)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmple_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_cmple_epu64_mask(a, b);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmple_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmple_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpge_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ assert_eq!(
+ _mm512_cmpge_epu64_mask(a, b),
+ !_mm512_cmplt_epu64_mask(a, b)
+ );
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpge_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b11111111;
+ let r = _mm512_mask_cmpge_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00110000);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpge_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64);
+ let b = _mm256_set1_epi64x(1);
+ let r = _mm256_cmpge_epu64_mask(a, b);
+ assert_eq!(r, 0b00000111);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpge_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64);
+ let b = _mm256_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpge_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00000111);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpge_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_cmpge_epu64_mask(a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpge_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpge_epu64_mask(mask, a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpeq_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let m = _mm512_cmpeq_epu64_mask(b, a);
+ assert_eq!(m, 0b11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpeq_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b01001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpeq_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let m = _mm256_cmpeq_epu64_mask(b, a);
+ assert_eq!(m, 0b00001100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpeq_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpeq_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b00001100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpeq_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(0, 1);
+ let m = _mm_cmpeq_epu64_mask(b, a);
+ assert_eq!(m, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpeq_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(0, 1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpeq_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpneq_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let m = _mm512_cmpneq_epu64_mask(b, a);
+ assert_eq!(m, !_mm512_cmpeq_epu64_mask(b, a));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpneq_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, -100, 100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpneq_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b00110010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpneq_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let r = _mm256_cmpneq_epu64_mask(b, a);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpneq_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpneq_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpneq_epu64_mask() {
+ let a = _mm_set_epi64x(-1, u64::MAX as i64);
+ let b = _mm_set_epi64x(13, 42);
+ let r = _mm_cmpneq_epu64_mask(b, a);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpneq_epu64_mask() {
+ let a = _mm_set_epi64x(-1, u64::MAX as i64);
+ let b = _mm_set_epi64x(13, 42);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpneq_epu64_mask(mask, b, a);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let m = _mm512_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b);
+ assert_eq!(m, 0b11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_epu64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b);
+ assert_eq!(r, 0b01001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmp_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 100);
+ let b = _mm256_set1_epi64x(1);
+ let m = _mm256_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b);
+ assert_eq!(m, 0b00001000);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmp_epu64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 100);
+ let b = _mm256_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b);
+ assert_eq!(r, 0b00001000);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmp_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let m = _mm_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b);
+ assert_eq!(m, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmp_epu64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b);
+ assert_eq!(r, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmplt_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let m = _mm512_cmplt_epi64_mask(a, b);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmplt_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmplt_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmplt_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, -13);
+ let b = _mm256_set1_epi64x(-1);
+ let r = _mm256_cmplt_epi64_mask(a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmplt_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, -13);
+ let b = _mm256_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmplt_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmplt_epi64_mask() {
+ let a = _mm_set_epi64x(-1, -13);
+ let b = _mm_set1_epi64x(-1);
+ let r = _mm_cmplt_epi64_mask(a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmplt_epi64_mask() {
+ let a = _mm_set_epi64x(-1, -13);
+ let b = _mm_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmplt_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000001);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpgt_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let m = _mm512_cmpgt_epi64_mask(b, a);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpgt_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpgt_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set1_epi64x(-1);
+ let r = _mm256_cmpgt_epi64_mask(a, b);
+ assert_eq!(r, 0b00001101);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpgt_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpgt_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00001101);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpgt_epi64_mask() {
+ let a = _mm_set_epi64x(0, -1);
+ let b = _mm_set1_epi64x(-1);
+ let r = _mm_cmpgt_epi64_mask(a, b);
+ assert_eq!(r, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpgt_epi64_mask() {
+ let a = _mm_set_epi64x(0, -1);
+ let b = _mm_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpgt_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmple_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ assert_eq!(
+ _mm512_cmple_epi64_mask(a, b),
+ !_mm512_cmpgt_epi64_mask(a, b)
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmple_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01111010;
+ assert_eq!(_mm512_mask_cmple_epi64_mask(mask, a, b), 0b00110000);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmple_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
+ let b = _mm256_set1_epi64x(-1);
+ let r = _mm256_cmple_epi64_mask(a, b);
+ assert_eq!(r, 0b00000010)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmple_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
+ let b = _mm256_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmple_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000010)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmple_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let r = _mm_cmple_epi64_mask(a, b);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmple_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmple_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpge_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ assert_eq!(
+ _mm512_cmpge_epi64_mask(a, b),
+ !_mm512_cmplt_epi64_mask(a, b)
+ )
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpge_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b11111111;
+ let r = _mm512_mask_cmpge_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b11111010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpge_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
+ let b = _mm256_set1_epi64x(-1);
+ let r = _mm256_cmpge_epi64_mask(a, b);
+ assert_eq!(r, 0b00001111);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpge_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, i64::MAX);
+ let b = _mm256_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpge_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00001111);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpge_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(-1);
+ let r = _mm_cmpge_epi64_mask(a, b);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpge_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(-1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpge_epi64_mask(mask, a, b);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmpeq_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let m = _mm512_cmpeq_epi64_mask(b, a);
+ assert_eq!(m, 0b11001111);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpeq_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b01001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpeq_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let m = _mm256_cmpeq_epi64_mask(b, a);
+ assert_eq!(m, 0b00001100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpeq_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpeq_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b00001100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpeq_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(0, 1);
+ let m = _mm_cmpeq_epi64_mask(b, a);
+ assert_eq!(m, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpeq_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set_epi64x(0, 1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpeq_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b00000011);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_set_epi64() {
+ let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0))
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_setr_epi64() {
+ let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0))
+ }
+
+ unsafe fn test_mm512_cmpneq_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let m = _mm512_cmpneq_epi64_mask(b, a);
+ assert_eq!(m, !_mm512_cmpeq_epi64_mask(b, a));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmpneq_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, -100, 100);
+ let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100);
+ let mask = 0b01111010;
+ let r = _mm512_mask_cmpneq_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b00110010)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmpneq_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let r = _mm256_cmpneq_epi64_mask(b, a);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmpneq_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set_epi64x(0, 1, 13, 42);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmpneq_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmpneq_epi64_mask() {
+ let a = _mm_set_epi64x(-1, 13);
+ let b = _mm_set_epi64x(13, 42);
+ let r = _mm_cmpneq_epi64_mask(b, a);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmpneq_epi64_mask() {
+ let a = _mm_set_epi64x(-1, 13);
+ let b = _mm_set_epi64x(13, 42);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmpneq_epi64_mask(mask, b, a);
+ assert_eq!(r, 0b00000011)
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_cmp_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let m = _mm512_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b);
+ assert_eq!(m, 0b00000101);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cmp_epi64_mask() {
+ let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100);
+ let b = _mm512_set1_epi64(-1);
+ let mask = 0b01100110;
+ let r = _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b);
+ assert_eq!(r, 0b00000100);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_cmp_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set1_epi64x(1);
+ let m = _mm256_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b);
+ assert_eq!(m, 0b00001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cmp_epi64_mask() {
+ let a = _mm256_set_epi64x(0, 1, -1, 13);
+ let b = _mm256_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b);
+ assert_eq!(r, 0b00001010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_cmp_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let m = _mm_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b);
+ assert_eq!(m, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cmp_epi64_mask() {
+ let a = _mm_set_epi64x(0, 1);
+ let b = _mm_set1_epi64x(1);
+ let mask = 0b11111111;
+ let r = _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b);
+ assert_eq!(r, 0b00000010);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32gather_pd() {
+ let mut arr = [0f64; 128];
+ for i in 0..128 {
+ arr[i] = i as f64;
+ }
+ // A multiplier of 8 is word-addressing
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr() as *const u8);
+ assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32gather_pd() {
+ let mut arr = [0f64; 128];
+ for i in 0..128 {
+ arr[i] = i as f64;
+ }
+ let src = _mm512_set1_pd(2.);
+ let mask = 0b10101010;
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ // A multiplier of 8 is word-addressing
+ let r = _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr() as *const u8);
+ assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64gather_pd() {
+ let mut arr = [0f64; 128];
+ for i in 0..128 {
+ arr[i] = i as f64;
+ }
+ // A multiplier of 8 is word-addressing
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr() as *const u8);
+ assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64gather_pd() {
+ let mut arr = [0f64; 128];
+ for i in 0..128 {
+ arr[i] = i as f64;
+ }
+ let src = _mm512_set1_pd(2.);
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ // A multiplier of 8 is word-addressing
+ let r = _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr() as *const u8);
+ assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64gather_ps() {
+ let mut arr = [0f32; 128];
+ for i in 0..128 {
+ arr[i] = i as f32;
+ }
+ // A multiplier of 4 is word-addressing
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let r = _mm512_i64gather_ps::<4>(index, arr.as_ptr() as *const u8);
+ assert_eq_m256(r, _mm256_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64gather_ps() {
+ let mut arr = [0f32; 128];
+ for i in 0..128 {
+ arr[i] = i as f32;
+ }
+ let src = _mm256_set1_ps(2.);
+ let mask = 0b10101010;
+ #[rustfmt::skip]
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ // A multiplier of 4 is word-addressing
+ let r = _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8);
+ assert_eq_m256(r, _mm256_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32gather_epi64() {
+ let mut arr = [0i64; 128];
+ for i in 0..128i64 {
+ arr[i as usize] = i;
+ }
+ // A multiplier of 8 is word-addressing
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ let r = _mm512_i32gather_epi64::<8>(index, arr.as_ptr() as *const u8);
+ assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32gather_epi64() {
+ let mut arr = [0i64; 128];
+ for i in 0..128i64 {
+ arr[i as usize] = i;
+ }
+ let src = _mm512_set1_epi64(2);
+ let mask = 0b10101010;
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ // A multiplier of 8 is word-addressing
+ let r = _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr() as *const u8);
+ assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64gather_epi64() {
+ let mut arr = [0i64; 128];
+ for i in 0..128i64 {
+ arr[i as usize] = i;
+ }
+ // A multiplier of 8 is word-addressing
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let r = _mm512_i64gather_epi64::<8>(index, arr.as_ptr() as *const u8);
+ assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64gather_epi64() {
+ let mut arr = [0i64; 128];
+ for i in 0..128i64 {
+ arr[i as usize] = i;
+ }
+ let src = _mm512_set1_epi64(2);
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ // A multiplier of 8 is word-addressing
+ let r = _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr() as *const u8);
+ assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64gather_epi32() {
+ let mut arr = [0i64; 128];
+ for i in 0..128i64 {
+ arr[i as usize] = i;
+ }
+ // A multiplier of 8 is word-addressing
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let r = _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const u8);
+ assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64gather_epi32() {
+ let mut arr = [0i64; 128];
+ for i in 0..128i64 {
+ arr[i as usize] = i;
+ }
+ let src = _mm256_set1_epi32(2);
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ // A multiplier of 8 is word-addressing
+ let r = _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const u8);
+ assert_eq_m256i(r, _mm256_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112));
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32scatter_pd() {
+ let mut arr = [0f64; 128];
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ // A multiplier of 8 is word-addressing
+ _mm512_i32scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, index, src);
+ let mut expected = [0f64; 128];
+ for i in 0..8 {
+ expected[i * 16] = (i + 1) as f64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32scatter_pd() {
+ let mut arr = [0f64; 128];
+ let mask = 0b10101010;
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ // A multiplier of 8 is word-addressing
+ _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src);
+ let mut expected = [0f64; 128];
+ for i in 0..4 {
+ expected[i * 32 + 16] = 2. * (i + 1) as f64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64scatter_pd() {
+ let mut arr = [0f64; 128];
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ // A multiplier of 8 is word-addressing
+ _mm512_i64scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, index, src);
+ let mut expected = [0f64; 128];
+ for i in 0..8 {
+ expected[i * 16] = (i + 1) as f64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64scatter_pd() {
+ let mut arr = [0f64; 128];
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ // A multiplier of 8 is word-addressing
+ _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src);
+ let mut expected = [0f64; 128];
+ for i in 0..4 {
+ expected[i * 32 + 16] = 2. * (i + 1) as f64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64scatter_ps() {
+ let mut arr = [0f32; 128];
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+ // A multiplier of 4 is word-addressing
+ _mm512_i64scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src);
+ let mut expected = [0f32; 128];
+ for i in 0..8 {
+ expected[i * 16] = (i + 1) as f32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64scatter_ps() {
+ let mut arr = [0f32; 128];
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.);
+ // A multiplier of 4 is word-addressing
+ _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
+ let mut expected = [0f32; 128];
+ for i in 0..4 {
+ expected[i * 32 + 16] = 2. * (i + 1) as f32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i32scatter_epi64() {
+ let mut arr = [0i64; 128];
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ // A multiplier of 8 is word-addressing
+ _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src);
+ let mut expected = [0i64; 128];
+ for i in 0..8 {
+ expected[i * 16] = (i + 1) as i64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i32scatter_epi64() {
+ let mut arr = [0i64; 128];
+ let mask = 0b10101010;
+ let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ // A multiplier of 8 is word-addressing
+ _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src);
+ let mut expected = [0i64; 128];
+ for i in 0..4 {
+ expected[i * 32 + 16] = 2 * (i + 1) as i64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64scatter_epi64() {
+ let mut arr = [0i64; 128];
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ // A multiplier of 8 is word-addressing
+ _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src);
+ let mut expected = [0i64; 128];
+ for i in 0..8 {
+ expected[i * 16] = (i + 1) as i64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64scatter_epi64() {
+ let mut arr = [0i64; 128];
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ // A multiplier of 8 is word-addressing
+ _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src);
+ let mut expected = [0i64; 128];
+ for i in 0..4 {
+ expected[i * 32 + 16] = 2 * (i + 1) as i64;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_i64scatter_epi32() {
+ let mut arr = [0i32; 128];
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ // A multiplier of 4 is word-addressing
+ _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src);
+ let mut expected = [0i32; 128];
+ for i in 0..8 {
+ expected[i * 16] = (i + 1) as i32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_i64scatter_epi32() {
+ let mut arr = [0i32; 128];
+ let mask = 0b10101010;
+ let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112);
+ let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8);
+ // A multiplier of 4 is word-addressing
+ _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src);
+ let mut expected = [0i32; 128];
+ for i in 0..4 {
+ expected[i * 32 + 16] = 2 * (i + 1) as i32;
+ }
+ assert_eq!(&arr[..], &expected[..],);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rol_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 63, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_rol_epi64::<1>(a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 0, 1 << 33, 1 << 33, 1 << 33,
+ 1 << 33, 1 << 33, 1 << 33, 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rol_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 63, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_mask_rol_epi64::<1>(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_rol_epi64::<1>(a, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 0, 1 << 33, 1 << 33, 1 << 33,
+ 1 << 33, 1 << 33, 1 << 33, 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rol_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 63,
+ );
+ let r = _mm512_maskz_rol_epi64::<1>(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_rol_epi64::<1>(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_rol_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_rol_epi64::<1>(a);
+ let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_rol_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_mask_rol_epi64::<1>(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_rol_epi64::<1>(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_rol_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_maskz_rol_epi64::<1>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_rol_epi64::<1>(0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_rol_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let r = _mm_rol_epi64::<1>(a);
+ let e = _mm_set_epi64x(1 << 0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_rol_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let r = _mm_mask_rol_epi64::<1>(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_rol_epi64::<1>(a, 0b00000011, a);
+ let e = _mm_set_epi64x(1 << 0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_rol_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let r = _mm_maskz_rol_epi64::<1>(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_rol_epi64::<1>(0b00000011, a);
+ let e = _mm_set_epi64x(1 << 0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_ror_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 0, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_ror_epi64::<1>(a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 63, 1 << 31, 1 << 31, 1 << 31,
+ 1 << 31, 1 << 31, 1 << 31, 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_ror_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 0, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_mask_ror_epi64::<1>(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_ror_epi64::<1>(a, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 63, 1 << 31, 1 << 31, 1 << 31,
+ 1 << 31, 1 << 31, 1 << 31, 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_ror_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 0,
+ );
+ let r = _mm512_maskz_ror_epi64::<1>(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_ror_epi64::<1>(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_ror_epi64() {
+ let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_ror_epi64::<1>(a);
+ let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_ror_epi64() {
+ let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_mask_ror_epi64::<1>(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_ror_epi64::<1>(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_ror_epi64() {
+ let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_maskz_ror_epi64::<1>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_ror_epi64::<1>(0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_ror_epi64() {
+ let a = _mm_set_epi64x(1 << 0, 1 << 32);
+ let r = _mm_ror_epi64::<1>(a);
+ let e = _mm_set_epi64x(1 << 63, 1 << 31);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_ror_epi64() {
+ let a = _mm_set_epi64x(1 << 0, 1 << 32);
+ let r = _mm_mask_ror_epi64::<1>(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_ror_epi64::<1>(a, 0b00000011, a);
+ let e = _mm_set_epi64x(1 << 63, 1 << 31);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_ror_epi64() {
+ let a = _mm_set_epi64x(1 << 0, 1 << 32);
+ let r = _mm_maskz_ror_epi64::<1>(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_ror_epi64::<1>(0b00000011, a);
+ let e = _mm_set_epi64x(1 << 63, 1 << 31);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_slli_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 63, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_slli_epi64::<1>(a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 33, 1 << 33, 1 << 33,
+ 1 << 33, 1 << 33, 1 << 33, 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_slli_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 63, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_mask_slli_epi64::<1>(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_slli_epi64::<1>(a, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 33, 1 << 33, 1 << 33,
+ 1 << 33, 1 << 33, 1 << 33, 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_slli_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 63,
+ );
+ let r = _mm512_maskz_slli_epi64::<1>(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_slli_epi64::<1>(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_slli_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_mask_slli_epi64::<1>(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_slli_epi64::<1>(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_slli_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let r = _mm256_maskz_slli_epi64::<1>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_slli_epi64::<1>(0b00001111, a);
+ let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_slli_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let r = _mm_mask_slli_epi64::<1>(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_slli_epi64::<1>(a, 0b00000011, a);
+ let e = _mm_set_epi64x(0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_slli_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let r = _mm_maskz_slli_epi64::<1>(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_slli_epi64::<1>(0b00000011, a);
+ let e = _mm_set_epi64x(0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srli_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 0, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_srli_epi64::<1>(a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 31, 1 << 31, 1 << 31,
+ 1 << 31, 1 << 31, 1 << 31, 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srli_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 0, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let r = _mm512_mask_srli_epi64::<1>(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srli_epi64::<1>(a, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 31, 1 << 31, 1 << 31,
+ 1 << 31, 1 << 31, 1 << 31, 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srli_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 0,
+ );
+ let r = _mm512_maskz_srli_epi64::<1>(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srli_epi64::<1>(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_srli_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let r = _mm256_mask_srli_epi64::<1>(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_srli_epi64::<1>(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_srli_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let r = _mm256_maskz_srli_epi64::<1>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_srli_epi64::<1>(0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_srli_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let r = _mm_mask_srli_epi64::<1>(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_srli_epi64::<1>(a, 0b00000011, a);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_srli_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let r = _mm_maskz_srli_epi64::<1>(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_srli_epi64::<1>(0b00000011, a);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rolv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 63, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_rolv_epi64(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 1 << 0, 1 << 34, 1 << 35,
+ 1 << 36, 1 << 37, 1 << 38, 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rolv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 63, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_rolv_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 1 << 0, 1 << 34, 1 << 35,
+ 1 << 36, 1 << 37, 1 << 38, 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rolv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 62,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2);
+ let r = _mm512_maskz_rolv_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_rolv_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_rolv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32);
+ let b = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_rolv_epi64(a, b);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_rolv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32);
+ let b = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_mask_rolv_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_rolv_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_rolv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32);
+ let b = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_maskz_rolv_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_rolv_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_rolv_epi64() {
+ let a = _mm_set_epi64x(1 << 32, 1 << 63);
+ let b = _mm_set_epi64x(0, 1);
+ let r = _mm_rolv_epi64(a, b);
+ let e = _mm_set_epi64x(1 << 32, 1 << 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_rolv_epi64() {
+ let a = _mm_set_epi64x(1 << 32, 1 << 63);
+ let b = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_rolv_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_rolv_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(1 << 32, 1 << 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_rolv_epi64() {
+ let a = _mm_set_epi64x(1 << 32, 1 << 63);
+ let b = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_rolv_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_rolv_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(1 << 32, 1 << 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_rorv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 0, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_rorv_epi64(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 1 << 63, 1 << 30, 1 << 29,
+ 1 << 28, 1 << 27, 1 << 26, 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_rorv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 0, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_rorv_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 1 << 63, 1 << 30, 1 << 29,
+ 1 << 28, 1 << 27, 1 << 26, 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_rorv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 0,
+ );
+ let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2);
+ let r = _mm512_maskz_rorv_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_rorv_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_rorv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32);
+ let b = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_rorv_epi64(a, b);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_rorv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32);
+ let b = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_mask_rorv_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_rorv_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_rorv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32);
+ let b = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_maskz_rorv_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_rorv_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_rorv_epi64() {
+ let a = _mm_set_epi64x(1 << 32, 1 << 0);
+ let b = _mm_set_epi64x(0, 1);
+ let r = _mm_rorv_epi64(a, b);
+ let e = _mm_set_epi64x(1 << 32, 1 << 63);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_rorv_epi64() {
+ let a = _mm_set_epi64x(1 << 32, 1 << 0);
+ let b = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_rorv_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_rorv_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(1 << 32, 1 << 63);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_rorv_epi64() {
+ let a = _mm_set_epi64x(1 << 32, 1 << 0);
+ let b = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_rorv_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_rorv_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(1 << 32, 1 << 63);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sllv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 63, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_sllv_epi64(a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 0, 1 << 34, 1 << 35,
+ 1 << 36, 1 << 37, 1 << 38, 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sllv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 63, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_sllv_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 1 << 33, 0, 1 << 35,
+ 1 << 36, 1 << 37, 1 << 38, 1 << 39,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sllv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 63,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1);
+ let r = _mm512_maskz_sllv_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sllv_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_sllv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32);
+ let count = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_mask_sllv_epi64(a, 0, a, count);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_sllv_epi64(a, 0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 33, 0, 1 << 35);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_sllv_epi64() {
+ let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32);
+ let count = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_maskz_sllv_epi64(0, a, count);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_sllv_epi64(0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 32, 1 << 33, 0, 1 << 35);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_sllv_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let count = _mm_set_epi64x(2, 3);
+ let r = _mm_mask_sllv_epi64(a, 0, a, count);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_sllv_epi64(a, 0b00000011, a, count);
+ let e = _mm_set_epi64x(0, 1 << 35);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_sllv_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let count = _mm_set_epi64x(2, 3);
+ let r = _mm_maskz_sllv_epi64(0, a, count);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_sllv_epi64(0b00000011, a, count);
+ let e = _mm_set_epi64x(0, 1 << 35);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srlv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 0, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_srlv_epi64(a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 0, 1 << 30, 1 << 29,
+ 1 << 28, 1 << 27, 1 << 26, 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srlv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 0, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_srlv_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 32, 0, 1 << 30, 1 << 29,
+ 1 << 28, 1 << 27, 1 << 26, 1 << 25,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srlv_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 0,
+ );
+ let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_maskz_srlv_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srlv_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_srlv_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm256_set1_epi64x(1);
+ let r = _mm256_mask_srlv_epi64(a, 0, a, count);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_srlv_epi64(a, 0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_srlv_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm256_set1_epi64x(1);
+ let r = _mm256_maskz_srlv_epi64(0, a, count);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_srlv_epi64(0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_srlv_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set1_epi64x(1);
+ let r = _mm_mask_srlv_epi64(a, 0, a, count);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_srlv_epi64(a, 0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_srlv_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set1_epi64x(1);
+ let r = _mm_maskz_srlv_epi64(0, a, count);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_srlv_epi64(0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sll_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 63, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_sll_epi64(a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 33, 1 << 33, 1 << 33,
+ 1 << 33, 1 << 33, 1 << 33, 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ let count = _mm_set_epi64x(1, 0);
+ let r = _mm512_sll_epi64(a, count);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sll_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 63, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_mask_sll_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 33, 1 << 33, 1 << 33,
+ 1 << 33, 1 << 33, 1 << 33, 1 << 33,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sll_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 63,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_maskz_sll_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sll_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_sll_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_mask_sll_epi64(a, 0, a, count);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_sll_epi64(a, 0b00001111, a, count);
+ let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_sll_epi64() {
+ let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_maskz_sll_epi64(0, a, count);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_sll_epi64(0b00001111, a, count);
+ let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_sll_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_sll_epi64(a, 0, a, count);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_sll_epi64(a, 0b00000011, a, count);
+ let e = _mm_set_epi64x(0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_sll_epi64() {
+ let a = _mm_set_epi64x(1 << 63, 1 << 32);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_sll_epi64(0, a, count);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_sll_epi64(0b00000011, a, count);
+ let e = _mm_set_epi64x(0, 1 << 33);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srl_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 0, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_srl_epi64(a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 31, 1 << 31, 1 << 31,
+ 1 << 31, 1 << 31, 1 << 31, 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srl_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 0, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_mask_srl_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 1 << 31, 1 << 31, 1 << 31,
+ 1 << 31, 1 << 31, 1 << 31, 1 << 31,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srl_epi64() {
+ #[rustfmt::skip]
+ let a = _mm512_set_epi64(
+ 1 << 32, 1 << 32, 1 << 32, 1 << 32,
+ 1 << 32, 1 << 32, 1 << 32, 1 << 0,
+ );
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm512_maskz_srl_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srl_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_srl_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_mask_srl_epi64(a, 0, a, count);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_srl_epi64(a, 0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_srl_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_maskz_srl_epi64(0, a, count);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_srl_epi64(0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_srl_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_srl_epi64(a, 0, a, count);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_srl_epi64(a, 0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_srl_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_srl_epi64(0, a, count);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_srl_epi64(0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_sra_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm_set_epi64x(0, 2);
+ let r = _mm512_sra_epi64(a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_sra_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm_set_epi64x(0, 2);
+ let r = _mm512_mask_sra_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_sra_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm_set_epi64x(0, 2);
+ let r = _mm512_maskz_sra_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_sra_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_sra_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_sra_epi64(a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_sra_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_mask_sra_epi64(a, 0, a, count);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_sra_epi64(a, 0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_sra_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm256_maskz_sra_epi64(0, a, count);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_sra_epi64(0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_sra_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_sra_epi64(a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_sra_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_sra_epi64(a, 0, a, count);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_sra_epi64(a, 0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_sra_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_sra_epi64(0, a, count);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_sra_epi64(0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srav_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+ let r = _mm512_srav_epi64(a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srav_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+ let r = _mm512_mask_srav_epi64(a, 0, a, count);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count);
+ let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srav_epi64() {
+ let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16);
+ let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1);
+ let r = _mm512_maskz_srav_epi64(0, a, count);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srav_epi64(0b00001111, a, count);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_srav_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm256_set1_epi64x(1);
+ let r = _mm256_srav_epi64(a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_srav_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm256_set1_epi64x(1);
+ let r = _mm256_mask_srav_epi64(a, 0, a, count);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_srav_epi64(a, 0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_srav_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let count = _mm256_set1_epi64x(1);
+ let r = _mm256_maskz_srav_epi64(0, a, count);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_srav_epi64(0b00001111, a, count);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_srav_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set1_epi64x(1);
+ let r = _mm_srav_epi64(a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_srav_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set1_epi64x(1);
+ let r = _mm_mask_srav_epi64(a, 0, a, count);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_srav_epi64(a, 0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_srav_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let count = _mm_set1_epi64x(1);
+ let r = _mm_maskz_srav_epi64(0, a, count);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_srav_epi64(0b00000011, a, count);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_srai_epi64() {
+ let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+ let r = _mm512_srai_epi64::<2>(a);
+ let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_srai_epi64() {
+ let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+ let r = _mm512_mask_srai_epi64::<2>(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_srai_epi64::<2>(a, 0b11111111, a);
+ let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_srai_epi64() {
+ let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16);
+ let r = _mm512_maskz_srai_epi64::<2>(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_srai_epi64::<2>(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_srai_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let r = _mm256_srai_epi64::<1>(a);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_srai_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let r = _mm256_mask_srai_epi64::<1>(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_srai_epi64::<1>(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_srai_epi64() {
+ let a = _mm256_set_epi64x(1 << 5, 0, 0, 0);
+ let r = _mm256_maskz_srai_epi64::<1>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_srai_epi64::<1>(0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 4, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_srai_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let r = _mm_srai_epi64::<1>(a);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_srai_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let r = _mm_mask_srai_epi64::<1>(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_srai_epi64::<1>(a, 0b00000011, a);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_srai_epi64() {
+ let a = _mm_set_epi64x(1 << 5, 0);
+ let r = _mm_maskz_srai_epi64::<1>(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_srai_epi64::<1>(0b00000011, a);
+ let e = _mm_set_epi64x(1 << 4, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permute_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_permute_pd::<0b11_11_11_11>(a);
+ let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permute_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0b11111111, a);
+ let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permute_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0b11111111, a);
+ let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permute_pd() {
+ let a = _mm256_set_pd(3., 2., 1., 0.);
+ let r = _mm256_mask_permute_pd::<0b11_11>(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_permute_pd::<0b11_11>(a, 0b00001111, a);
+ let e = _mm256_set_pd(3., 3., 1., 1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permute_pd() {
+ let a = _mm256_set_pd(3., 2., 1., 0.);
+ let r = _mm256_maskz_permute_pd::<0b11_11>(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_permute_pd::<0b11_11>(0b00001111, a);
+ let e = _mm256_set_pd(3., 3., 1., 1.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_permute_pd() {
+ let a = _mm_set_pd(1., 0.);
+ let r = _mm_mask_permute_pd::<0b11>(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_permute_pd::<0b11>(a, 0b00000011, a);
+ let e = _mm_set_pd(1., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_permute_pd() {
+ let a = _mm_set_pd(1., 0.);
+ let r = _mm_maskz_permute_pd::<0b11>(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_permute_pd::<0b11>(0b00000011, a);
+ let e = _mm_set_pd(1., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutex_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_permutex_epi64::<0b11_11_11_11>(a);
+ let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutex_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0b11111111, a);
+ let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutex_epi64() {
+ let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0b11111111, a);
+ let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_permutex_epi64() {
+ let a = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_permutex_epi64::<0b11_11_11_11>(a);
+ let e = _mm256_set_epi64x(3, 3, 3, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutex_epi64() {
+ let a = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(3, 3, 3, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm256_maskz_permutex_epi64() {
+ let a = _mm256_set_epi64x(3, 2, 1, 0);
+ let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0b00001111, a);
+ let e = _mm256_set_epi64x(3, 3, 3, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutex_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_permutex_pd::<0b11_11_11_11>(a);
+ let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutex_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0b11111111, a);
+ let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutex_pd() {
+ let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0b11111111, a);
+ let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_permutex_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_permutex_pd::<0b11_11_11_11>(a);
+ let e = _mm256_set_pd(0., 0., 0., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutex_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0b00001111, a);
+ let e = _mm256_set_pd(0., 0., 0., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permutex_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0b00001111, a);
+ let e = _mm256_set_pd(0., 0., 0., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutevar_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_set1_epi64(0b1);
+ let r = _mm512_permutevar_pd(a, b);
+ let e = _mm512_set_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutevar_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_set1_epi64(0b1);
+ let r = _mm512_mask_permutevar_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_permutevar_pd(a, 0b11111111, a, b);
+ let e = _mm512_set_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutevar_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let b = _mm512_set1_epi64(0b1);
+ let r = _mm512_maskz_permutevar_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_permutevar_pd(0b00001111, a, b);
+ let e = _mm512_set_pd(0., 0., 0., 0., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutevar_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let b = _mm256_set1_epi64x(0b1);
+ let r = _mm256_mask_permutevar_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_permutevar_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(1., 1., 3., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permutevar_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let b = _mm256_set1_epi64x(0b1);
+ let r = _mm256_maskz_permutevar_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_permutevar_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(1., 1., 3., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_permutevar_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let b = _mm_set1_epi64x(0b1);
+ let r = _mm_mask_permutevar_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_permutevar_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(1., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_permutevar_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let b = _mm_set1_epi64x(0b1);
+ let r = _mm_maskz_permutevar_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_permutevar_pd(0b00000011, a, b);
+ let e = _mm_set_pd(1., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutexvar_epi64() {
+ let idx = _mm512_set1_epi64(1);
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_permutexvar_epi64(idx, a);
+ let e = _mm512_set1_epi64(6);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutexvar_epi64() {
+ let idx = _mm512_set1_epi64(1);
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_permutexvar_epi64(a, 0, idx, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_permutexvar_epi64(a, 0b11111111, idx, a);
+ let e = _mm512_set1_epi64(6);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutexvar_epi64() {
+ let idx = _mm512_set1_epi64(1);
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_maskz_permutexvar_epi64(0, idx, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_permutexvar_epi64(0b00001111, idx, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 6, 6, 6, 6);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_permutexvar_epi64() {
+ let idx = _mm256_set1_epi64x(1);
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_permutexvar_epi64(idx, a);
+ let e = _mm256_set1_epi64x(2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutexvar_epi64() {
+ let idx = _mm256_set1_epi64x(1);
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_mask_permutexvar_epi64(a, 0, idx, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_permutexvar_epi64(a, 0b00001111, idx, a);
+ let e = _mm256_set1_epi64x(2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permutexvar_epi64() {
+ let idx = _mm256_set1_epi64x(1);
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_maskz_permutexvar_epi64(0, idx, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_permutexvar_epi64(0b00001111, idx, a);
+ let e = _mm256_set1_epi64x(2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutexvar_pd() {
+ let idx = _mm512_set1_epi64(1);
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_permutexvar_pd(idx, a);
+ let e = _mm512_set1_pd(6.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutexvar_pd() {
+ let idx = _mm512_set1_epi64(1);
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_mask_permutexvar_pd(a, 0, idx, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_permutexvar_pd(a, 0b11111111, idx, a);
+ let e = _mm512_set1_pd(6.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutexvar_pd() {
+ let idx = _mm512_set1_epi64(1);
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_maskz_permutexvar_pd(0, idx, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_permutexvar_pd(0b00001111, idx, a);
+ let e = _mm512_set_pd(0., 0., 0., 0., 6., 6., 6., 6.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_permutexvar_pd() {
+ let idx = _mm256_set1_epi64x(1);
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_permutexvar_pd(idx, a);
+ let e = _mm256_set1_pd(2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutexvar_pd() {
+ let idx = _mm256_set1_epi64x(1);
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_mask_permutexvar_pd(a, 0, idx, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_permutexvar_pd(a, 0b00001111, idx, a);
+ let e = _mm256_set1_pd(2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permutexvar_pd() {
+ let idx = _mm256_set1_epi64x(1);
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_maskz_permutexvar_pd(0, idx, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_permutexvar_pd(0b00001111, idx, a);
+ let e = _mm256_set1_pd(2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutex2var_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_epi64(100);
+ let r = _mm512_permutex2var_epi64(a, idx, b);
+ let e = _mm512_set_epi64(6, 100, 5, 100, 4, 100, 3, 100);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutex2var_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_epi64(100);
+ let r = _mm512_mask_permutex2var_epi64(a, 0, idx, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_permutex2var_epi64(a, 0b11111111, idx, b);
+ let e = _mm512_set_epi64(6, 100, 5, 100, 4, 100, 3, 100);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutex2var_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_epi64(100);
+ let r = _mm512_maskz_permutex2var_epi64(0, a, idx, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_permutex2var_epi64(0b00001111, a, idx, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 4, 100, 3, 100);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask2_permutex2var_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let idx = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_epi64(100);
+ let r = _mm512_mask2_permutex2var_epi64(a, idx, 0, b);
+ assert_eq_m512i(r, idx);
+ let r = _mm512_mask2_permutex2var_epi64(a, idx, 0b00001111, b);
+ let e = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 4, 100, 3, 100);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_permutex2var_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_epi64x(100);
+ let r = _mm256_permutex2var_epi64(a, idx, b);
+ let e = _mm256_set_epi64x(2, 100, 1, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutex2var_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_epi64x(100);
+ let r = _mm256_mask_permutex2var_epi64(a, 0, idx, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_permutex2var_epi64(a, 0b00001111, idx, b);
+ let e = _mm256_set_epi64x(2, 100, 1, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permutex2var_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_epi64x(100);
+ let r = _mm256_maskz_permutex2var_epi64(0, a, idx, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_permutex2var_epi64(0b00001111, a, idx, b);
+ let e = _mm256_set_epi64x(2, 100, 1, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask2_permutex2var_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_epi64x(100);
+ let r = _mm256_mask2_permutex2var_epi64(a, idx, 0, b);
+ assert_eq_m256i(r, idx);
+ let r = _mm256_mask2_permutex2var_epi64(a, idx, 0b00001111, b);
+ let e = _mm256_set_epi64x(2, 100, 1, 100);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_permutex2var_epi64() {
+ let a = _mm_set_epi64x(0, 1);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_epi64x(100);
+ let r = _mm_permutex2var_epi64(a, idx, b);
+ let e = _mm_set_epi64x(0, 100);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_permutex2var_epi64() {
+ let a = _mm_set_epi64x(0, 1);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_epi64x(100);
+ let r = _mm_mask_permutex2var_epi64(a, 0, idx, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_permutex2var_epi64(a, 0b00000011, idx, b);
+ let e = _mm_set_epi64x(0, 100);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_permutex2var_epi64() {
+ let a = _mm_set_epi64x(0, 1);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_epi64x(100);
+ let r = _mm_maskz_permutex2var_epi64(0, a, idx, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_permutex2var_epi64(0b00000011, a, idx, b);
+ let e = _mm_set_epi64x(0, 100);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask2_permutex2var_epi64() {
+ let a = _mm_set_epi64x(0, 1);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_epi64x(100);
+ let r = _mm_mask2_permutex2var_epi64(a, idx, 0, b);
+ assert_eq_m128i(r, idx);
+ let r = _mm_mask2_permutex2var_epi64(a, idx, 0b00000011, b);
+ let e = _mm_set_epi64x(0, 100);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_permutex2var_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_pd(100.);
+ let r = _mm512_permutex2var_pd(a, idx, b);
+ let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_permutex2var_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_pd(100.);
+ let r = _mm512_mask_permutex2var_pd(a, 0, idx, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_permutex2var_pd(a, 0b11111111, idx, b);
+ let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_permutex2var_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_pd(100.);
+ let r = _mm512_maskz_permutex2var_pd(0, a, idx, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_permutex2var_pd(0b00001111, a, idx, b);
+ let e = _mm512_set_pd(0., 0., 0., 0., 4., 100., 3., 100.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask2_permutex2var_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
+ let b = _mm512_set1_pd(100.);
+ let r = _mm512_mask2_permutex2var_pd(a, idx, 0, b);
+ assert_eq_m512d(r, _mm512_castsi512_pd(idx));
+ let r = _mm512_mask2_permutex2var_pd(a, idx, 0b11111111, b);
+ let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_permutex2var_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_pd(100.);
+ let r = _mm256_permutex2var_pd(a, idx, b);
+ let e = _mm256_set_pd(2., 100., 1., 100.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_permutex2var_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_pd(100.);
+ let r = _mm256_mask_permutex2var_pd(a, 0, idx, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_permutex2var_pd(a, 0b00001111, idx, b);
+ let e = _mm256_set_pd(2., 100., 1., 100.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_permutex2var_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_pd(100.);
+ let r = _mm256_maskz_permutex2var_pd(0, a, idx, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_permutex2var_pd(0b00001111, a, idx, b);
+ let e = _mm256_set_pd(2., 100., 1., 100.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask2_permutex2var_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2);
+ let b = _mm256_set1_pd(100.);
+ let r = _mm256_mask2_permutex2var_pd(a, idx, 0, b);
+ assert_eq_m256d(r, _mm256_castsi256_pd(idx));
+ let r = _mm256_mask2_permutex2var_pd(a, idx, 0b00001111, b);
+ let e = _mm256_set_pd(2., 100., 1., 100.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_permutex2var_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_pd(100.);
+ let r = _mm_permutex2var_pd(a, idx, b);
+ let e = _mm_set_pd(0., 100.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_permutex2var_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_pd(100.);
+ let r = _mm_mask_permutex2var_pd(a, 0, idx, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_permutex2var_pd(a, 0b00000011, idx, b);
+ let e = _mm_set_pd(0., 100.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_permutex2var_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_pd(100.);
+ let r = _mm_maskz_permutex2var_pd(0, a, idx, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_permutex2var_pd(0b00000011, a, idx, b);
+ let e = _mm_set_pd(0., 100.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask2_permutex2var_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let idx = _mm_set_epi64x(1, 1 << 1);
+ let b = _mm_set1_pd(100.);
+ let r = _mm_mask2_permutex2var_pd(a, idx, 0, b);
+ assert_eq_m128d(r, _mm_castsi128_pd(idx));
+ let r = _mm_mask2_permutex2var_pd(a, idx, 0b00000011, b);
+ let e = _mm_set_pd(0., 100.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_shuffle_pd() {
+ let a = _mm256_set_pd(1., 4., 5., 8.);
+ let b = _mm256_set_pd(2., 3., 6., 7.);
+ let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(2., 1., 6., 5.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_shuffle_pd() {
+ let a = _mm256_set_pd(1., 4., 5., 8.);
+ let b = _mm256_set_pd(2., 3., 6., 7.);
+ let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b);
+ let e = _mm256_set_pd(2., 1., 6., 5.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_shuffle_pd() {
+ let a = _mm_set_pd(1., 4.);
+ let b = _mm_set_pd(2., 3.);
+ let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00000011, a, b);
+ let e = _mm_set_pd(2., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_shuffle_pd() {
+ let a = _mm_set_pd(1., 4.);
+ let b = _mm_set_pd(2., 3.);
+ let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0b00000011, a, b);
+ let e = _mm_set_pd(2., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_shuffle_i64x2() {
+ let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16);
+ let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15);
+ let r = _mm512_shuffle_i64x2::<0b00_00_00_00>(a, b);
+ let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_shuffle_i64x2() {
+ let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16);
+ let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15);
+ let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0b11111111, a, b);
+ let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_shuffle_i64x2() {
+ let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16);
+ let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15);
+ let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0b00001111, a, b);
+ let e = _mm512_setr_epi64(1, 4, 1, 4, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_shuffle_i64x2() {
+ let a = _mm256_set_epi64x(1, 4, 5, 8);
+ let b = _mm256_set_epi64x(2, 3, 6, 7);
+ let r = _mm256_shuffle_i64x2::<0b00>(a, b);
+ let e = _mm256_set_epi64x(6, 7, 5, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_shuffle_i64x2() {
+ let a = _mm256_set_epi64x(1, 4, 5, 8);
+ let b = _mm256_set_epi64x(2, 3, 6, 7);
+ let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(6, 7, 5, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_shuffle_i64x2() {
+ let a = _mm256_set_epi64x(1, 4, 5, 8);
+ let b = _mm256_set_epi64x(2, 3, 6, 7);
+ let r = _mm256_maskz_shuffle_i64x2::<0b00>(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_shuffle_i64x2::<0b00>(0b00001111, a, b);
+ let e = _mm256_set_epi64x(6, 7, 5, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_shuffle_f64x2() {
+ let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.);
+ let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.);
+ let r = _mm512_shuffle_f64x2::<0b00_00_00_00>(a, b);
+ let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_shuffle_f64x2() {
+ let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.);
+ let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.);
+ let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0b11111111, a, b);
+ let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_shuffle_f64x2() {
+ let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.);
+ let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.);
+ let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0b00001111, a, b);
+ let e = _mm512_setr_pd(1., 4., 1., 4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_shuffle_f64x2() {
+ let a = _mm256_set_pd(1., 4., 5., 8.);
+ let b = _mm256_set_pd(2., 3., 6., 7.);
+ let r = _mm256_shuffle_f64x2::<0b00>(a, b);
+ let e = _mm256_set_pd(6., 7., 5., 8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_shuffle_f64x2() {
+ let a = _mm256_set_pd(1., 4., 5., 8.);
+ let b = _mm256_set_pd(2., 3., 6., 7.);
+ let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(6., 7., 5., 8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_shuffle_f64x2() {
+ let a = _mm256_set_pd(1., 4., 5., 8.);
+ let b = _mm256_set_pd(2., 3., 6., 7.);
+ let r = _mm256_maskz_shuffle_f64x2::<0b00>(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_shuffle_f64x2::<0b00>(0b00001111, a, b);
+ let e = _mm256_set_pd(6., 7., 5., 8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_movedup_pd() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_movedup_pd(a);
+ let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_movedup_pd() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_mask_movedup_pd(a, 0, a);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_movedup_pd(a, 0b11111111, a);
+ let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_movedup_pd() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_maskz_movedup_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_movedup_pd(0b00001111, a);
+ let e = _mm512_setr_pd(1., 1., 3., 3., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_movedup_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let r = _mm256_mask_movedup_pd(a, 0, a);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_movedup_pd(a, 0b00001111, a);
+ let e = _mm256_set_pd(2., 2., 4., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_movedup_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let r = _mm256_maskz_movedup_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_movedup_pd(0b00001111, a);
+ let e = _mm256_set_pd(2., 2., 4., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_movedup_pd() {
+ let a = _mm_set_pd(1., 2.);
+ let r = _mm_mask_movedup_pd(a, 0, a);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_movedup_pd(a, 0b00000011, a);
+ let e = _mm_set_pd(2., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_movedup_pd() {
+ let a = _mm_set_pd(1., 2.);
+ let r = _mm_maskz_movedup_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_movedup_pd(0b00000011, a);
+ let e = _mm_set_pd(2., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_inserti64x4() {
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm256_setr_epi64x(17, 18, 19, 20);
+ let r = _mm512_inserti64x4::<1>(a, b);
+ let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_inserti64x4() {
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm256_setr_epi64x(17, 18, 19, 20);
+ let r = _mm512_mask_inserti64x4::<1>(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_inserti64x4::<1>(a, 0b11111111, a, b);
+ let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_inserti64x4() {
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm256_setr_epi64x(17, 18, 19, 20);
+ let r = _mm512_maskz_inserti64x4::<1>(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_inserti64x4::<1>(0b00001111, a, b);
+ let e = _mm512_setr_epi64(1, 2, 3, 4, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_insertf64x4() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm256_setr_pd(17., 18., 19., 20.);
+ let r = _mm512_insertf64x4::<1>(a, b);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_insertf64x4() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm256_setr_pd(17., 18., 19., 20.);
+ let r = _mm512_mask_insertf64x4::<1>(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_insertf64x4::<1>(a, 0b11111111, a, b);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_insertf64x4() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm256_setr_pd(17., 18., 19., 20.);
+ let r = _mm512_maskz_insertf64x4::<1>(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_insertf64x4::<1>(0b00001111, a, b);
+ let e = _mm512_setr_pd(1., 2., 3., 4., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castpd128_pd512() {
+ let a = _mm_setr_pd(17., 18.);
+ let r = _mm512_castpd128_pd512(a);
+ let e = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castpd256_pd512() {
+ let a = _mm256_setr_pd(17., 18., 19., 20.);
+ let r = _mm512_castpd256_pd512(a);
+ let e = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_zextpd128_pd512() {
+ let a = _mm_setr_pd(17., 18.);
+ let r = _mm512_zextpd128_pd512(a);
+ let e = _mm512_setr_pd(17., 18., 0., 0., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_zextpd256_pd512() {
+ let a = _mm256_setr_pd(17., 18., 19., 20.);
+ let r = _mm512_zextpd256_pd512(a);
+ let e = _mm512_setr_pd(17., 18., 19., 20., 0., 0., 0., 0.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castpd512_pd128() {
+ let a = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.);
+ let r = _mm512_castpd512_pd128(a);
+ let e = _mm_setr_pd(17., 18.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castpd512_pd256() {
+ let a = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.);
+ let r = _mm512_castpd512_pd256(a);
+ let e = _mm256_setr_pd(17., 18., 19., 20.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castpd_ps() {
+ let a = _mm512_set1_pd(1.);
+ let r = _mm512_castpd_ps(a);
+ let e = _mm512_set_ps(
+ 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0,
+ 1.875, 0.0,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castpd_si512() {
+ let a = _mm512_set1_pd(1.);
+ let r = _mm512_castpd_si512(a);
+ let e = _mm512_set_epi32(
+ 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248,
+ 0, 1072693248, 0, 1072693248, 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castsi128_si512() {
+ let a = _mm_setr_epi64x(17, 18);
+ let r = _mm512_castsi128_si512(a);
+ let e = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castsi256_si512() {
+ let a = _mm256_setr_epi64x(17, 18, 19, 20);
+ let r = _mm512_castsi256_si512(a);
+ let e = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_zextsi128_si512() {
+ let a = _mm_setr_epi64x(17, 18);
+ let r = _mm512_zextsi128_si512(a);
+ let e = _mm512_setr_epi64(17, 18, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_zextsi256_si512() {
+ let a = _mm256_setr_epi64x(17, 18, 19, 20);
+ let r = _mm512_zextsi256_si512(a);
+ let e = _mm512_setr_epi64(17, 18, 19, 20, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castsi512_si128() {
+ let a = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1);
+ let r = _mm512_castsi512_si128(a);
+ let e = _mm_setr_epi64x(17, 18);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castsi512_si256() {
+ let a = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1);
+ let r = _mm512_castsi512_si256(a);
+ let e = _mm256_setr_epi64x(17, 18, 19, 20);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castsi512_ps() {
+ let a = _mm512_set1_epi64(1 << 62);
+ let r = _mm512_castsi512_ps(a);
+ let e = _mm512_set_ps(
+ 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0.,
+ );
+ assert_eq_m512(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_castsi512_pd() {
+ let a = _mm512_set1_epi64(1 << 62);
+ let r = _mm512_castsi512_pd(a);
+ let e = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_broadcastq_epi64() {
+ let a = _mm_setr_epi64x(17, 18);
+ let r = _mm512_broadcastq_epi64(a);
+ let e = _mm512_set1_epi64(17);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_broadcastq_epi64() {
+ let src = _mm512_set1_epi64(18);
+ let a = _mm_setr_epi64x(17, 18);
+ let r = _mm512_mask_broadcastq_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_broadcastq_epi64(src, 0b11111111, a);
+ let e = _mm512_set1_epi64(17);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_broadcastq_epi64() {
+ let a = _mm_setr_epi64x(17, 18);
+ let r = _mm512_maskz_broadcastq_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_broadcastq_epi64(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 17, 17, 17, 17);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_broadcastq_epi64() {
+ let src = _mm256_set1_epi64x(18);
+ let a = _mm_set_epi64x(17, 18);
+ let r = _mm256_mask_broadcastq_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_broadcastq_epi64(src, 0b00001111, a);
+ let e = _mm256_set1_epi64x(18);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_broadcastq_epi64() {
+ let a = _mm_set_epi64x(17, 18);
+ let r = _mm256_maskz_broadcastq_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_broadcastq_epi64(0b00001111, a);
+ let e = _mm256_set1_epi64x(18);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_broadcastq_epi64() {
+ let src = _mm_set1_epi64x(18);
+ let a = _mm_set_epi64x(17, 18);
+ let r = _mm_mask_broadcastq_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_broadcastq_epi64(src, 0b00000011, a);
+ let e = _mm_set1_epi64x(18);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_broadcastq_epi64() {
+ let a = _mm_set_epi64x(17, 18);
+ let r = _mm_maskz_broadcastq_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_broadcastq_epi64(0b00000011, a);
+ let e = _mm_set1_epi64x(18);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_broadcastsd_pd() {
+ let a = _mm_set_pd(17., 18.);
+ let r = _mm512_broadcastsd_pd(a);
+ let e = _mm512_set1_pd(18.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_broadcastsd_pd() {
+ let src = _mm512_set1_pd(18.);
+ let a = _mm_set_pd(17., 18.);
+ let r = _mm512_mask_broadcastsd_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_broadcastsd_pd(src, 0b11111111, a);
+ let e = _mm512_set1_pd(18.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_broadcastsd_pd() {
+ let a = _mm_set_pd(17., 18.);
+ let r = _mm512_maskz_broadcastsd_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_broadcastsd_pd(0b00001111, a);
+ let e = _mm512_set_pd(0., 0., 0., 0., 18., 18., 18., 18.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_broadcastsd_pd() {
+ let src = _mm256_set1_pd(18.);
+ let a = _mm_set_pd(17., 18.);
+ let r = _mm256_mask_broadcastsd_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_broadcastsd_pd(src, 0b00001111, a);
+ let e = _mm256_set1_pd(18.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_broadcastsd_pd() {
+ let a = _mm_set_pd(17., 18.);
+ let r = _mm256_maskz_broadcastsd_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_broadcastsd_pd(0b00001111, a);
+ let e = _mm256_set1_pd(18.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_broadcast_i64x4() {
+ let a = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm512_broadcast_i64x4(a);
+ let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_broadcast_i64x4() {
+ let src = _mm512_set1_epi64(18);
+ let a = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm512_mask_broadcast_i64x4(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_broadcast_i64x4(src, 0b11111111, a);
+ let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_broadcast_i64x4() {
+ let a = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm512_maskz_broadcast_i64x4(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_broadcast_i64x4(0b00001111, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 17, 18, 19, 20);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_broadcast_f64x4() {
+ let a = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm512_broadcast_f64x4(a);
+ let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_broadcast_f64x4() {
+ let src = _mm512_set1_pd(18.);
+ let a = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm512_mask_broadcast_f64x4(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_broadcast_f64x4(src, 0b11111111, a);
+ let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_broadcast_f64x4() {
+ let a = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm512_maskz_broadcast_f64x4(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_broadcast_f64x4(0b00001111, a);
+ let e = _mm512_set_pd(0., 0., 0., 0., 17., 18., 19., 20.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_blend_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let b = _mm512_set1_epi64(2);
+ let r = _mm512_mask_blend_epi64(0b11110000, a, b);
+ let e = _mm512_set_epi64(2, 2, 2, 2, 1, 1, 1, 1);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_blend_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let b = _mm256_set1_epi64x(2);
+ let r = _mm256_mask_blend_epi64(0b00001111, a, b);
+ let e = _mm256_set1_epi64x(2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_blend_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let b = _mm_set1_epi64x(2);
+ let r = _mm_mask_blend_epi64(0b00000011, a, b);
+ let e = _mm_set1_epi64x(2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_blend_pd() {
+ let a = _mm512_set1_pd(1.);
+ let b = _mm512_set1_pd(2.);
+ let r = _mm512_mask_blend_pd(0b11110000, a, b);
+ let e = _mm512_set_pd(2., 2., 2., 2., 1., 1., 1., 1.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_blend_pd() {
+ let a = _mm256_set1_pd(1.);
+ let b = _mm256_set1_pd(2.);
+ let r = _mm256_mask_blend_pd(0b00001111, a, b);
+ let e = _mm256_set1_pd(2.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_blend_pd() {
+ let a = _mm_set1_pd(1.);
+ let b = _mm_set1_pd(2.);
+ let r = _mm_mask_blend_pd(0b00000011, a, b);
+ let e = _mm_set1_pd(2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_unpackhi_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_unpackhi_epi64(a, b);
+ let e = _mm512_set_epi64(17, 1, 19, 3, 21, 5, 23, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_unpackhi_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_mask_unpackhi_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_unpackhi_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(17, 1, 19, 3, 21, 5, 23, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_unpackhi_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_maskz_unpackhi_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_unpackhi_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 21, 5, 23, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_unpackhi_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm256_mask_unpackhi_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_unpackhi_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(17, 1, 19, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_unpackhi_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm256_maskz_unpackhi_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_unpackhi_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(17, 1, 19, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_unpackhi_epi64() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm_set_epi64x(17, 18);
+ let r = _mm_mask_unpackhi_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_unpackhi_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(17, 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_unpackhi_epi64() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm_set_epi64x(17, 18);
+ let r = _mm_maskz_unpackhi_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_unpackhi_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(17, 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_unpackhi_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_unpackhi_pd(a, b);
+ let e = _mm512_set_pd(17., 1., 19., 3., 21., 5., 23., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_unpackhi_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_mask_unpackhi_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_unpackhi_pd(a, 0b11111111, a, b);
+ let e = _mm512_set_pd(17., 1., 19., 3., 21., 5., 23., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_unpackhi_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_maskz_unpackhi_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_unpackhi_pd(0b00001111, a, b);
+ let e = _mm512_set_pd(0., 0., 0., 0., 21., 5., 23., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_unpackhi_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm256_mask_unpackhi_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_unpackhi_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(17., 1., 19., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_unpackhi_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm256_maskz_unpackhi_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_unpackhi_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(17., 1., 19., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_unpackhi_pd() {
+ let a = _mm_set_pd(1., 2.);
+ let b = _mm_set_pd(17., 18.);
+ let r = _mm_mask_unpackhi_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_unpackhi_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(17., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_unpackhi_pd() {
+ let a = _mm_set_pd(1., 2.);
+ let b = _mm_set_pd(17., 18.);
+ let r = _mm_maskz_unpackhi_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_unpackhi_pd(0b00000011, a, b);
+ let e = _mm_set_pd(17., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_unpacklo_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_unpacklo_epi64(a, b);
+ let e = _mm512_set_epi64(18, 2, 20, 4, 22, 6, 24, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_unpacklo_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_mask_unpacklo_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_unpacklo_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(18, 2, 20, 4, 22, 6, 24, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_unpacklo_epi64() {
+ let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24);
+ let r = _mm512_maskz_unpacklo_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_unpacklo_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 22, 6, 24, 8);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_unpacklo_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm256_mask_unpacklo_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_unpacklo_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(18, 2, 20, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_unpacklo_epi64() {
+ let a = _mm256_set_epi64x(1, 2, 3, 4);
+ let b = _mm256_set_epi64x(17, 18, 19, 20);
+ let r = _mm256_maskz_unpacklo_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_unpacklo_epi64(0b00001111, a, b);
+ let e = _mm256_set_epi64x(18, 2, 20, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_unpacklo_epi64() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm_set_epi64x(17, 18);
+ let r = _mm_mask_unpacklo_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_unpacklo_epi64(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(18, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_unpacklo_epi64() {
+ let a = _mm_set_epi64x(1, 2);
+ let b = _mm_set_epi64x(17, 18);
+ let r = _mm_maskz_unpacklo_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_unpacklo_epi64(0b00000011, a, b);
+ let e = _mm_set_epi64x(18, 2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_unpacklo_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_unpacklo_pd(a, b);
+ let e = _mm512_set_pd(18., 2., 20., 4., 22., 6., 24., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_unpacklo_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_mask_unpacklo_pd(a, 0, a, b);
+ assert_eq_m512d(r, a);
+ let r = _mm512_mask_unpacklo_pd(a, 0b11111111, a, b);
+ let e = _mm512_set_pd(18., 2., 20., 4., 22., 6., 24., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_unpacklo_pd() {
+ let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.);
+ let r = _mm512_maskz_unpacklo_pd(0, a, b);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_unpacklo_pd(0b00001111, a, b);
+ let e = _mm512_set_pd(0., 0., 0., 0., 22., 6., 24., 8.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_unpacklo_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm256_mask_unpacklo_pd(a, 0, a, b);
+ assert_eq_m256d(r, a);
+ let r = _mm256_mask_unpacklo_pd(a, 0b00001111, a, b);
+ let e = _mm256_set_pd(18., 2., 20., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_unpacklo_pd() {
+ let a = _mm256_set_pd(1., 2., 3., 4.);
+ let b = _mm256_set_pd(17., 18., 19., 20.);
+ let r = _mm256_maskz_unpacklo_pd(0, a, b);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_unpacklo_pd(0b00001111, a, b);
+ let e = _mm256_set_pd(18., 2., 20., 4.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_unpacklo_pd() {
+ let a = _mm_set_pd(1., 2.);
+ let b = _mm_set_pd(17., 18.);
+ let r = _mm_mask_unpacklo_pd(a, 0, a, b);
+ assert_eq_m128d(r, a);
+ let r = _mm_mask_unpacklo_pd(a, 0b00000011, a, b);
+ let e = _mm_set_pd(18., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_unpacklo_pd() {
+ let a = _mm_set_pd(1., 2.);
+ let b = _mm_set_pd(17., 18.);
+ let r = _mm_maskz_unpacklo_pd(0, a, b);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_unpacklo_pd(0b00000011, a, b);
+ let e = _mm_set_pd(18., 2.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_alignr_epi64() {
+ let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1);
+ let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9);
+ let r = _mm512_alignr_epi64::<0>(a, b);
+ assert_eq_m512i(r, b);
+ let r = _mm512_alignr_epi64::<8>(a, b);
+ assert_eq_m512i(r, b);
+ let r = _mm512_alignr_epi64::<1>(a, b);
+ let e = _mm512_set_epi64(1, 16, 15, 14, 13, 12, 11, 10);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_alignr_epi64() {
+ let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1);
+ let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9);
+ let r = _mm512_mask_alignr_epi64::<1>(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_alignr_epi64::<1>(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(1, 16, 15, 14, 13, 12, 11, 10);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_alignr_epi64() {
+ let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1);
+ let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9);
+ let r = _mm512_maskz_alignr_epi64::<1>(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_alignr_epi64::<1>(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 13, 12, 11, 10);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_alignr_epi64() {
+ let a = _mm256_set_epi64x(4, 3, 2, 1);
+ let b = _mm256_set_epi64x(8, 7, 6, 5);
+ let r = _mm256_alignr_epi64::<0>(a, b);
+ let e = _mm256_set_epi64x(8, 7, 6, 5);
+ assert_eq_m256i(r, e);
+ let r = _mm256_alignr_epi64::<6>(a, b);
+ let e = _mm256_set_epi64x(6, 5, 4, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_alignr_epi64() {
+ let a = _mm256_set_epi64x(4, 3, 2, 1);
+ let b = _mm256_set_epi64x(8, 7, 6, 5);
+ let r = _mm256_mask_alignr_epi64::<1>(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_alignr_epi64::<0>(a, 0b00001111, a, b);
+ let e = _mm256_set_epi64x(8, 7, 6, 5);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_alignr_epi64() {
+ let a = _mm256_set_epi64x(4, 3, 2, 1);
+ let b = _mm256_set_epi64x(8, 7, 6, 5);
+ let r = _mm256_maskz_alignr_epi64::<1>(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_alignr_epi64::<0>(0b00001111, a, b);
+ let e = _mm256_set_epi64x(8, 7, 6, 5);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_alignr_epi64() {
+ let a = _mm_set_epi64x(2, 1);
+ let b = _mm_set_epi64x(4, 3);
+ let r = _mm_alignr_epi64::<0>(a, b);
+ let e = _mm_set_epi64x(4, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_alignr_epi64() {
+ let a = _mm_set_epi64x(2, 1);
+ let b = _mm_set_epi64x(4, 3);
+ let r = _mm_mask_alignr_epi64::<1>(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_alignr_epi64::<0>(a, 0b00000011, a, b);
+ let e = _mm_set_epi64x(4, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_alignr_epi64() {
+ let a = _mm_set_epi64x(2, 1);
+ let b = _mm_set_epi64x(4, 3);
+ let r = _mm_maskz_alignr_epi64::<1>(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_alignr_epi64::<0>(0b00000011, a, b);
+ let e = _mm_set_epi64x(4, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_and_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_and_epi64(a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_and_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_mask_and_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_and_epi64(a, 0b01111111, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_and_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_maskz_and_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_and_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_and_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 0);
+ let r = _mm256_mask_and_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_and_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_and_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 0);
+ let r = _mm256_maskz_and_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_and_epi64(0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_and_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 0);
+ let r = _mm_mask_and_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_and_epi64(a, 0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_and_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 0);
+ let r = _mm_maskz_and_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_and_epi64(0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_and_si512() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_and_epi64(a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_or_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_or_epi64(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0,
+ 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_or_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_mask_or_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_or_epi64(a, 0b11111111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0,
+ 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_or_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_maskz_or_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_or_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_or_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 13);
+ let r = _mm256_or_epi64(a, b);
+ let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_or_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 13);
+ let r = _mm256_mask_or_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_or_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_or_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 13);
+ let r = _mm256_maskz_or_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_or_epi64(0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_or_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 13);
+ let r = _mm_or_epi64(a, b);
+ let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_or_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 13);
+ let r = _mm_mask_or_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_or_epi64(a, 0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_or_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 13);
+ let r = _mm_maskz_or_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_or_epi64(0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_or_si512() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_or_epi64(a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0,
+ 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_xor_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_xor_epi64(a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_xor_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_mask_xor_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_xor_epi64() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_maskz_xor_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_xor_epi64(0b00001111, a, b);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_xor_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 13);
+ let r = _mm256_xor_epi64(a, b);
+ let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_xor_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 13);
+ let r = _mm256_mask_xor_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_xor_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_xor_epi64() {
+ let a = _mm256_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm256_set1_epi64x(1 << 13);
+ let r = _mm256_maskz_xor_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_xor_epi64(0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_xor_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 13);
+ let r = _mm_xor_epi64(a, b);
+ let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_xor_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 13);
+ let r = _mm_mask_xor_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_xor_epi64(a, 0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_xor_epi64() {
+ let a = _mm_set1_epi64x(1 << 0 | 1 << 15);
+ let b = _mm_set1_epi64x(1 << 13);
+ let r = _mm_maskz_xor_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_xor_epi64(0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_xor_si512() {
+ let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3);
+ let r = _mm512_xor_epi64(a, b);
+ let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_andnot_epi64() {
+ let a = _mm512_set1_epi64(0);
+ let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ let r = _mm512_andnot_epi64(a, b);
+ let e = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_andnot_epi64() {
+ let a = _mm512_set1_epi64(1 << 1 | 1 << 2);
+ let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ let r = _mm512_mask_andnot_epi64(a, 0, a, b);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_andnot_epi64(a, 0b11111111, a, b);
+ let e = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_andnot_epi64() {
+ let a = _mm512_set1_epi64(1 << 1 | 1 << 2);
+ let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ let r = _mm512_maskz_andnot_epi64(0, a, b);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_andnot_epi64(0b00001111, a, b);
+ #[rustfmt::skip]
+ let e = _mm512_set_epi64(
+ 0, 0, 0, 0,
+ 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_andnot_epi64() {
+ let a = _mm256_set1_epi64x(1 << 1 | 1 << 2);
+ let b = _mm256_set1_epi64x(1 << 3 | 1 << 4);
+ let r = _mm256_mask_andnot_epi64(a, 0, a, b);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_andnot_epi64(a, 0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 3 | 1 << 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_andnot_epi64() {
+ let a = _mm256_set1_epi64x(1 << 1 | 1 << 2);
+ let b = _mm256_set1_epi64x(1 << 3 | 1 << 4);
+ let r = _mm256_maskz_andnot_epi64(0, a, b);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_andnot_epi64(0b00001111, a, b);
+ let e = _mm256_set1_epi64x(1 << 3 | 1 << 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_andnot_epi64() {
+ let a = _mm_set1_epi64x(1 << 1 | 1 << 2);
+ let b = _mm_set1_epi64x(1 << 3 | 1 << 4);
+ let r = _mm_mask_andnot_epi64(a, 0, a, b);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_andnot_epi64(a, 0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 3 | 1 << 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_andnot_epi64() {
+ let a = _mm_set1_epi64x(1 << 1 | 1 << 2);
+ let b = _mm_set1_epi64x(1 << 3 | 1 << 4);
+ let r = _mm_maskz_andnot_epi64(0, a, b);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_andnot_epi64(0b00000011, a, b);
+ let e = _mm_set1_epi64x(1 << 3 | 1 << 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_andnot_si512() {
+ let a = _mm512_set1_epi64(0);
+ let b = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ let r = _mm512_andnot_si512(a, b);
+ let e = _mm512_set1_epi64(1 << 3 | 1 << 4);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_add_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let e: i64 = _mm512_reduce_add_epi64(a);
+ assert_eq!(8, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_add_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let e: i64 = _mm512_mask_reduce_add_epi64(0b11110000, a);
+ assert_eq!(4, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_add_pd() {
+ let a = _mm512_set1_pd(1.);
+ let e: f64 = _mm512_reduce_add_pd(a);
+ assert_eq!(8., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_add_pd() {
+ let a = _mm512_set1_pd(1.);
+ let e: f64 = _mm512_mask_reduce_add_pd(0b11110000, a);
+ assert_eq!(4., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_mul_epi64() {
+ let a = _mm512_set1_epi64(2);
+ let e: i64 = _mm512_reduce_mul_epi64(a);
+ assert_eq!(256, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_mul_epi64() {
+ let a = _mm512_set1_epi64(2);
+ let e: i64 = _mm512_mask_reduce_mul_epi64(0b11110000, a);
+ assert_eq!(16, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_mul_pd() {
+ let a = _mm512_set1_pd(2.);
+ let e: f64 = _mm512_reduce_mul_pd(a);
+ assert_eq!(256., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_mul_pd() {
+ let a = _mm512_set1_pd(2.);
+ let e: f64 = _mm512_mask_reduce_mul_pd(0b11110000, a);
+ assert_eq!(16., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_max_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i64 = _mm512_reduce_max_epi64(a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_max_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i64 = _mm512_mask_reduce_max_epi64(0b11110000, a);
+ assert_eq!(3, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_max_epu64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u64 = _mm512_reduce_max_epu64(a);
+ assert_eq!(7, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_max_epu64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u64 = _mm512_mask_reduce_max_epu64(0b11110000, a);
+ assert_eq!(3, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_max_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let e: f64 = _mm512_reduce_max_pd(a);
+ assert_eq!(7., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_max_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let e: f64 = _mm512_mask_reduce_max_pd(0b11110000, a);
+ assert_eq!(3., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_min_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i64 = _mm512_reduce_min_epi64(a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_min_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: i64 = _mm512_mask_reduce_min_epi64(0b11110000, a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_min_epu64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u64 = _mm512_reduce_min_epu64(a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_min_epu64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let e: u64 = _mm512_mask_reduce_min_epu64(0b11110000, a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_min_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let e: f64 = _mm512_reduce_min_pd(a);
+ assert_eq!(0., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_min_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let e: f64 = _mm512_mask_reduce_min_pd(0b11110000, a);
+ assert_eq!(0., e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_and_epi64() {
+ let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
+ let e: i64 = _mm512_reduce_and_epi64(a);
+ assert_eq!(0, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_and_epi64() {
+ let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
+ let e: i64 = _mm512_mask_reduce_and_epi64(0b11110000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_reduce_or_epi64() {
+ let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
+ let e: i64 = _mm512_reduce_or_epi64(a);
+ assert_eq!(3, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_reduce_or_epi64() {
+ let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2);
+ let e: i64 = _mm512_mask_reduce_or_epi64(0b11110000, a);
+ assert_eq!(1, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_extractf64x4_pd() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_extractf64x4_pd::<1>(a);
+ let e = _mm256_setr_pd(5., 6., 7., 8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_extractf64x4_pd() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let src = _mm256_set1_pd(100.);
+ let r = _mm512_mask_extractf64x4_pd::<1>(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm512_mask_extractf64x4_pd::<1>(src, 0b11111111, a);
+ let e = _mm256_setr_pd(5., 6., 7., 8.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_extractf64x4_pd() {
+ let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.);
+ let r = _mm512_maskz_extractf64x4_pd::<1>(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm512_maskz_extractf64x4_pd::<1>(0b00000001, a);
+ let e = _mm256_setr_pd(5., 0., 0., 0.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_extracti64x4_epi64() {
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_extracti64x4_epi64::<0x1>(a);
+ let e = _mm256_setr_epi64x(5, 6, 7, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_extracti64x4_epi64() {
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let src = _mm256_set1_epi64x(100);
+ let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0b11111111, a);
+ let e = _mm256_setr_epi64x(5, 6, 7, 8);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_extracti64x4_epi64() {
+ let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8);
+ let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0b00000001, a);
+ let e = _mm256_setr_epi64x(5, 0, 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_compress_epi64() {
+ let src = _mm512_set1_epi64(200);
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_compress_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_compress_epi64(src, 0b01010101, a);
+ let e = _mm512_set_epi64(200, 200, 200, 200, 1, 3, 5, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_compress_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_maskz_compress_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_compress_epi64(0b01010101, a);
+ let e = _mm512_set_epi64(0, 0, 0, 0, 1, 3, 5, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_compress_epi64() {
+ let src = _mm256_set1_epi64x(200);
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_mask_compress_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_compress_epi64(src, 0b00000101, a);
+ let e = _mm256_set_epi64x(200, 200, 1, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_compress_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_maskz_compress_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_compress_epi64(0b00000101, a);
+ let e = _mm256_set_epi64x(0, 0, 1, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_compress_epi64() {
+ let src = _mm_set1_epi64x(200);
+ let a = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_compress_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_compress_epi64(src, 0b00000001, a);
+ let e = _mm_set_epi64x(200, 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_compress_epi64() {
+ let a = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_compress_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_compress_epi64(0b00000001, a);
+ let e = _mm_set_epi64x(0, 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_compress_pd() {
+ let src = _mm512_set1_pd(200.);
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_mask_compress_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_compress_pd(src, 0b01010101, a);
+ let e = _mm512_set_pd(200., 200., 200., 200., 1., 3., 5., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_compress_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_maskz_compress_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_compress_pd(0b01010101, a);
+ let e = _mm512_set_pd(0., 0., 0., 0., 1., 3., 5., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_compress_pd() {
+ let src = _mm256_set1_pd(200.);
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_mask_compress_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_compress_pd(src, 0b00000101, a);
+ let e = _mm256_set_pd(200., 200., 1., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_compress_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_maskz_compress_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_compress_pd(0b00000101, a);
+ let e = _mm256_set_pd(0., 0., 1., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_compress_pd() {
+ let src = _mm_set1_pd(200.);
+ let a = _mm_set_pd(0., 1.);
+ let r = _mm_mask_compress_pd(src, 0, a);
+ assert_eq_m128d(r, src);
+ let r = _mm_mask_compress_pd(src, 0b00000001, a);
+ let e = _mm_set_pd(200., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_compress_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let r = _mm_maskz_compress_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_compress_pd(0b00000001, a);
+ let e = _mm_set_pd(0., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_expand_epi64() {
+ let src = _mm512_set1_epi64(200);
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_mask_expand_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_expand_epi64(src, 0b01010101, a);
+ let e = _mm512_set_epi64(200, 4, 200, 5, 200, 6, 200, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_expand_epi64() {
+ let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7);
+ let r = _mm512_maskz_expand_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_expand_epi64(0b01010101, a);
+ let e = _mm512_set_epi64(0, 4, 0, 5, 0, 6, 0, 7);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_expand_epi64() {
+ let src = _mm256_set1_epi64x(200);
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_mask_expand_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_expand_epi64(src, 0b00000101, a);
+ let e = _mm256_set_epi64x(200, 2, 200, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_expand_epi64() {
+ let a = _mm256_set_epi64x(0, 1, 2, 3);
+ let r = _mm256_maskz_expand_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_expand_epi64(0b00000101, a);
+ let e = _mm256_set_epi64x(0, 2, 0, 3);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_expand_epi64() {
+ let src = _mm_set1_epi64x(200);
+ let a = _mm_set_epi64x(0, 1);
+ let r = _mm_mask_expand_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_expand_epi64(src, 0b00000001, a);
+ let e = _mm_set_epi64x(200, 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_expand_epi64() {
+ let a = _mm_set_epi64x(0, 1);
+ let r = _mm_maskz_expand_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_expand_epi64(0b00000001, a);
+ let e = _mm_set_epi64x(0, 1);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_expand_pd() {
+ let src = _mm512_set1_pd(200.);
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_mask_expand_pd(src, 0, a);
+ assert_eq_m512d(r, src);
+ let r = _mm512_mask_expand_pd(src, 0b01010101, a);
+ let e = _mm512_set_pd(200., 4., 200., 5., 200., 6., 200., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_expand_pd() {
+ let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
+ let r = _mm512_maskz_expand_pd(0, a);
+ assert_eq_m512d(r, _mm512_setzero_pd());
+ let r = _mm512_maskz_expand_pd(0b01010101, a);
+ let e = _mm512_set_pd(0., 4., 0., 5., 0., 6., 0., 7.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_expand_pd() {
+ let src = _mm256_set1_pd(200.);
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_mask_expand_pd(src, 0, a);
+ assert_eq_m256d(r, src);
+ let r = _mm256_mask_expand_pd(src, 0b00000101, a);
+ let e = _mm256_set_pd(200., 2., 200., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_expand_pd() {
+ let a = _mm256_set_pd(0., 1., 2., 3.);
+ let r = _mm256_maskz_expand_pd(0, a);
+ assert_eq_m256d(r, _mm256_setzero_pd());
+ let r = _mm256_maskz_expand_pd(0b00000101, a);
+ let e = _mm256_set_pd(0., 2., 0., 3.);
+ assert_eq_m256d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_expand_pd() {
+ let src = _mm_set1_pd(200.);
+ let a = _mm_set_pd(0., 1.);
+ let r = _mm_mask_expand_pd(src, 0, a);
+ assert_eq_m128d(r, src);
+ let r = _mm_mask_expand_pd(src, 0b00000001, a);
+ let e = _mm_set_pd(200., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_expand_pd() {
+ let a = _mm_set_pd(0., 1.);
+ let r = _mm_maskz_expand_pd(0, a);
+ assert_eq_m128d(r, _mm_setzero_pd());
+ let r = _mm_maskz_expand_pd(0b00000001, a);
+ let e = _mm_set_pd(0., 1.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_loadu_epi64() {
+ let a = &[4, 3, 2, 5, -8, -9, -64, -50];
+ let p = a.as_ptr();
+ let r = _mm512_loadu_epi64(black_box(p));
+ let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_loadu_epi64() {
+ let a = &[4, 3, 2, 5];
+ let p = a.as_ptr();
+ let r = _mm256_loadu_epi64(black_box(p));
+ let e = _mm256_setr_epi64x(4, 3, 2, 5);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_loadu_epi64() {
+ let a = &[4, 3];
+ let p = a.as_ptr();
+ let r = _mm_loadu_epi64(black_box(p));
+ let e = _mm_setr_epi64x(4, 3);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi64_storeu_epi16() {
+ let a = _mm512_set1_epi64(9);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() {
+ let a = _mm256_set1_epi64x(9);
+ let mut r = _mm_set1_epi16(0);
+ _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_storeu_epi16() {
+ let a = _mm_set1_epi64x(9);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_undefined_si128();
+ _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set1_epi16(u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(
+ 0,
+ 0,
+ 0,
+ 0,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ u16::MAX as i16,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi64_storeu_epi8() {
+ let a = _mm512_set1_epi64(9);
+ let mut r = _mm_set1_epi8(0);
+ _mm512_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_storeu_epi8() {
+ let a = _mm256_set1_epi64x(9);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_storeu_epi8() {
+ let a = _mm_set1_epi64x(9);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_storeu_epi8() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_storeu_epi8() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ i8::MAX, i8::MAX, i8::MAX, i8::MAX,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_storeu_epi8() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_storeu_epi8() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_storeu_epi8() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_storeu_epi8() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi8(0);
+ _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ #[rustfmt::skip]
+ let e = _mm_set_epi8(
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, u8::MAX as i8, u8::MAX as i8,
+ );
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() {
+ let a = _mm512_set1_epi64(9);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm256_set1_epi32(9);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() {
+ let a = _mm256_set1_epi64x(9);
+ let mut r = _mm_set1_epi32(0);
+ _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi32(9, 9, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtepi64_storeu_epi32() {
+ let a = _mm_set1_epi64x(9);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm_set_epi32(0, 0, 9, 9);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm256_set1_epi32(i32::MAX);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi32(0);
+ _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a);
+ let e = _mm_set1_epi32(i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() {
+ let a = _mm512_set1_epi64(i64::MAX);
+ let mut r = _mm256_undefined_si256();
+ _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a);
+ let e = _mm256_set1_epi32(u32::MAX as i32);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() {
+ let a = _mm256_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi32(0);
+ _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a);
+ let e = _mm_set1_epi32(u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() {
+ let a = _mm_set1_epi64x(i64::MAX);
+ let mut r = _mm_set1_epi16(0);
+ _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a);
+ let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_storeu_epi64() {
+ let a = _mm512_set1_epi64(9);
+ let mut r = _mm512_set1_epi64(0);
+ _mm512_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_storeu_epi64() {
+ let a = _mm256_set1_epi64x(9);
+ let mut r = _mm256_set1_epi64x(0);
+ _mm256_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+ assert_eq_m256i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_storeu_epi64() {
+ let a = _mm_set1_epi64x(9);
+ let mut r = _mm_set1_epi64x(0);
+ _mm_storeu_epi64(&mut r as *mut _ as *mut i64, a);
+ assert_eq_m128i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_load_epi64() {
+ #[repr(align(64))]
+ struct Align {
+ data: [i64; 8], // 64 bytes
+ }
+ let a = Align {
+ data: [4, 3, 2, 5, -8, -9, -64, -50],
+ };
+ let p = (a.data).as_ptr();
+ let r = _mm512_load_epi64(black_box(p));
+ let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_load_epi64() {
+ #[repr(align(64))]
+ struct Align {
+ data: [i64; 4],
+ }
+ let a = Align { data: [4, 3, 2, 5] };
+ let p = (a.data).as_ptr();
+ let r = _mm256_load_epi64(black_box(p));
+ let e = _mm256_set_epi64x(5, 2, 3, 4);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_load_epi64() {
+ #[repr(align(64))]
+ struct Align {
+ data: [i64; 2],
+ }
+ let a = Align { data: [4, 3] };
+ let p = (a.data).as_ptr();
+ let r = _mm_load_epi64(black_box(p));
+ let e = _mm_set_epi64x(3, 4);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_store_epi64() {
+ let a = _mm512_set1_epi64(9);
+ let mut r = _mm512_set1_epi64(0);
+ _mm512_store_epi64(&mut r as *mut _ as *mut i64, a);
+ assert_eq_m512i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_store_epi64() {
+ let a = _mm256_set1_epi64x(9);
+ let mut r = _mm256_set1_epi64x(0);
+ _mm256_store_epi64(&mut r as *mut _ as *mut i64, a);
+ assert_eq_m256i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_store_epi64() {
+ let a = _mm_set1_epi64x(9);
+ let mut r = _mm_set1_epi64x(0);
+ _mm_store_epi64(&mut r as *mut _ as *mut i64, a);
+ assert_eq_m128i(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_load_pd() {
+ #[repr(align(64))]
+ struct Align {
+ data: [f64; 8], // 64 bytes
+ }
+ let a = Align {
+ data: [4., 3., 2., 5., -8., -9., -64., -50.],
+ };
+ let p = (a.data).as_ptr();
+ let r = _mm512_load_pd(black_box(p));
+ let e = _mm512_setr_pd(4., 3., 2., 5., -8., -9., -64., -50.);
+ assert_eq_m512d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_store_pd() {
+ let a = _mm512_set1_pd(9.);
+ let mut r = _mm512_undefined_pd();
+ _mm512_store_pd(&mut r as *mut _ as *mut f64, a);
+ assert_eq_m512d(r, a);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_test_epi64_mask() {
+ let a = _mm512_set1_epi64(1 << 0);
+ let b = _mm512_set1_epi64(1 << 0 | 1 << 1);
+ let r = _mm512_test_epi64_mask(a, b);
+ let e: __mmask8 = 0b11111111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_test_epi64_mask() {
+ let a = _mm512_set1_epi64(1 << 0);
+ let b = _mm512_set1_epi64(1 << 0 | 1 << 1);
+ let r = _mm512_mask_test_epi64_mask(0, a, b);
+ assert_eq!(r, 0);
+ let r = _mm512_mask_test_epi64_mask(0b11111111, a, b);
+ let e: __mmask8 = 0b11111111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_test_epi64_mask() {
+ let a = _mm256_set1_epi64x(1 << 0);
+ let b = _mm256_set1_epi64x(1 << 0 | 1 << 1);
+ let r = _mm256_test_epi64_mask(a, b);
+ let e: __mmask8 = 0b00001111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_test_epi64_mask() {
+ let a = _mm256_set1_epi64x(1 << 0);
+ let b = _mm256_set1_epi64x(1 << 0 | 1 << 1);
+ let r = _mm256_mask_test_epi64_mask(0, a, b);
+ assert_eq!(r, 0);
+ let r = _mm256_mask_test_epi64_mask(0b00001111, a, b);
+ let e: __mmask8 = 0b00001111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_test_epi64_mask() {
+ let a = _mm_set1_epi64x(1 << 0);
+ let b = _mm_set1_epi64x(1 << 0 | 1 << 1);
+ let r = _mm_test_epi64_mask(a, b);
+ let e: __mmask8 = 0b00000011;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_test_epi64_mask() {
+ let a = _mm_set1_epi64x(1 << 0);
+ let b = _mm_set1_epi64x(1 << 0 | 1 << 1);
+ let r = _mm_mask_test_epi64_mask(0, a, b);
+ assert_eq!(r, 0);
+ let r = _mm_mask_test_epi64_mask(0b00000011, a, b);
+ let e: __mmask8 = 0b00000011;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_testn_epi64_mask() {
+ let a = _mm512_set1_epi64(1 << 0);
+ let b = _mm512_set1_epi64(1 << 0 | 1 << 1);
+ let r = _mm512_testn_epi64_mask(a, b);
+ let e: __mmask8 = 0b00000000;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_testn_epi64_mask() {
+ let a = _mm512_set1_epi64(1 << 0);
+ let b = _mm512_set1_epi64(1 << 1);
+ let r = _mm512_mask_testn_epi64_mask(0, a, b);
+ assert_eq!(r, 0);
+ let r = _mm512_mask_testn_epi64_mask(0b11111111, a, b);
+ let e: __mmask8 = 0b11111111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_testn_epi64_mask() {
+ let a = _mm256_set1_epi64x(1 << 0);
+ let b = _mm256_set1_epi64x(1 << 1);
+ let r = _mm256_testn_epi64_mask(a, b);
+ let e: __mmask8 = 0b00001111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_testn_epi64_mask() {
+ let a = _mm256_set1_epi64x(1 << 0);
+ let b = _mm256_set1_epi64x(1 << 1);
+ let r = _mm256_mask_testn_epi64_mask(0, a, b);
+ assert_eq!(r, 0);
+ let r = _mm256_mask_testn_epi64_mask(0b11111111, a, b);
+ let e: __mmask8 = 0b00001111;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_testn_epi64_mask() {
+ let a = _mm_set1_epi64x(1 << 0);
+ let b = _mm_set1_epi64x(1 << 1);
+ let r = _mm_testn_epi64_mask(a, b);
+ let e: __mmask8 = 0b00000011;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_testn_epi64_mask() {
+ let a = _mm_set1_epi64x(1 << 0);
+ let b = _mm_set1_epi64x(1 << 1);
+ let r = _mm_mask_testn_epi64_mask(0, a, b);
+ assert_eq!(r, 0);
+ let r = _mm_mask_testn_epi64_mask(0b11111111, a, b);
+ let e: __mmask8 = 0b00000011;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_stream_pd() {
+ #[repr(align(64))]
+ struct Memory {
+ pub data: [f64; 8],
+ }
+ let a = _mm512_set1_pd(7.0);
+ let mut mem = Memory { data: [-1.0; 8] };
+
+ _mm512_stream_pd(&mut mem.data[0] as *mut f64, a);
+ for i in 0..8 {
+ assert_eq!(mem.data[i], get_m512d(a, i));
+ }
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_stream_si512() {
+ #[repr(align(64))]
+ struct Memory {
+ pub data: [i64; 8],
+ }
+ let a = _mm512_set1_epi64(7);
+ let mut mem = Memory { data: [-1; 8] };
+
+ _mm512_stream_si512(&mut mem.data[0] as *mut i64, a);
+ for i in 0..8 {
+ assert_eq!(mem.data[i], get_m512i(a, i));
+ }
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_mask_set1_epi64() {
+ let src = _mm512_set1_epi64(2);
+ let a: i64 = 11;
+ let r = _mm512_mask_set1_epi64(src, 0, a);
+ assert_eq_m512i(r, src);
+ let r = _mm512_mask_set1_epi64(src, 0b11111111, a);
+ let e = _mm512_set1_epi64(11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm512_maskz_set1_epi64() {
+ let a: i64 = 11;
+ let r = _mm512_maskz_set1_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_set1_epi64(0b11111111, a);
+ let e = _mm512_set1_epi64(11);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_mask_set1_epi64() {
+ let src = _mm256_set1_epi64x(2);
+ let a: i64 = 11;
+ let r = _mm256_mask_set1_epi64(src, 0, a);
+ assert_eq_m256i(r, src);
+ let r = _mm256_mask_set1_epi64(src, 0b00001111, a);
+ let e = _mm256_set1_epi64x(11);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm256_maskz_set1_epi64() {
+ let a: i64 = 11;
+ let r = _mm256_maskz_set1_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_set1_epi64(0b00001111, a);
+ let e = _mm256_set1_epi64x(11);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_mask_set1_epi64() {
+ let src = _mm_set1_epi64x(2);
+ let a: i64 = 11;
+ let r = _mm_mask_set1_epi64(src, 0, a);
+ assert_eq_m128i(r, src);
+ let r = _mm_mask_set1_epi64(src, 0b00000011, a);
+ let e = _mm_set1_epi64x(11);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f,avx512vl")]
+ unsafe fn test_mm_maskz_set1_epi64() {
+ let a: i64 = 11;
+ let r = _mm_maskz_set1_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_set1_epi64(0b00000011, a);
+ let e = _mm_set1_epi64x(11);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtsd_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtss_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundi64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsi64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundsi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvti64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvti64_ss(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvti64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvti64_sd(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsd_si64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvt_roundsd_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvt_roundsd_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsd_u64() {
+ let a = _mm_set_pd(1., f64::MAX);
+ let r = _mm_cvt_roundsd_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtsd_u64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtsd_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvt_roundss_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundss_si64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvt_roundss_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
+ let e: i64 = -1;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvt_roundss_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtss_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvttsd_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundsd_i64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtt_roundsd_i64::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundsd_si64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtt_roundsd_si64::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundsd_u64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvtt_roundsd_u64::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttsd_u64() {
+ let a = _mm_set_pd(1., -1.5);
+ let r = _mm_cvttsd_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvttss_i64(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundss_i64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtt_roundss_i64::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundss_si64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtt_roundss_si64::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e: i64 = -2;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtt_roundss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvtt_roundss_u64::<_MM_FROUND_CUR_DIRECTION>(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvttss_u64() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let r = _mm_cvttss_u64(a);
+ let e: u64 = u64::MAX;
+ assert_eq!(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtu64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvtu64_ss(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvtu64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvtu64_sd(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundu64_ss() {
+ let a = _mm_set_ps(0., -0.5, 1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvt_roundu64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_ps(0., -0.5, 1., 9.);
+ assert_eq_m128(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundu64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: u64 = 9;
+ let r = _mm_cvt_roundu64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundi64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+
+ #[simd_test(enable = "avx512f")]
+ unsafe fn test_mm_cvt_roundsi64_sd() {
+ let a = _mm_set_pd(1., -1.5);
+ let b: i64 = 9;
+ let r = _mm_cvt_roundsi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b);
+ let e = _mm_set_pd(1., 9.);
+ assert_eq_m128d(r, e);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
new file mode 100644
index 000000000..9f71a8d38
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs
@@ -0,0 +1,183 @@
+//! Bit Manipulation Instruction (BMI) Set 1.0.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Extracts bits in range [`start`, `start` + `length`) from `a` into
+/// the least significant bits of the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(bextr))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 {
+ _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64)
+}
+
+/// Extracts bits of `a` specified by `control` into
+/// the least significant bits of the result.
+///
+/// Bits `[7,0]` of `control` specify the index to the first bit in the range
+/// to be extracted, and bits `[15,8]` specify the length of the range.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(bextr))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 {
+ x86_bmi_bextr_64(a, control)
+}
+
+/// Bitwise logical `AND` of inverted `a` with `b`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(andn))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 {
+ !a & b
+}
+
+/// Extracts lowest set isolated bit.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(blsi))]
+#[cfg(not(target_arch = "x86"))] // generates lots of instructions
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _blsi_u64(x: u64) -> u64 {
+ x & x.wrapping_neg()
+}
+
+/// Gets mask up to lowest set bit.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(blsmsk))]
+#[cfg(not(target_arch = "x86"))] // generates lots of instructions
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _blsmsk_u64(x: u64) -> u64 {
+ x ^ (x.wrapping_sub(1_u64))
+}
+
+/// Resets the lowest set bit of `x`.
+///
+/// If `x` is sets CF.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(blsr))]
+#[cfg(not(target_arch = "x86"))] // generates lots of instructions
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _blsr_u64(x: u64) -> u64 {
+ x & (x.wrapping_sub(1))
+}
+
+/// Counts the number of trailing least significant zero bits.
+///
+/// When the source operand is `0`, it returns its size in bits.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(tzcnt))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _tzcnt_u64(x: u64) -> u64 {
+ x.trailing_zeros() as u64
+}
+
+/// Counts the number of trailing least significant zero bits.
+///
+/// When the source operand is `0`, it returns its size in bits.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64)
+#[inline]
+#[target_feature(enable = "bmi1")]
+#[cfg_attr(test, assert_instr(tzcnt))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 {
+ x.trailing_zeros() as i64
+}
+
+extern "C" {
+ #[link_name = "llvm.x86.bmi.bextr.64"]
+ fn x86_bmi_bextr_64(x: u64, y: u64) -> u64;
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::{x86::*, x86_64::*};
+
+ #[simd_test(enable = "bmi1")]
+ unsafe fn test_bextr_u64() {
+ let r = _bextr_u64(0b0101_0000u64, 4, 4);
+ assert_eq!(r, 0b0000_0101u64);
+ }
+
+ #[simd_test(enable = "bmi1")]
+ unsafe fn test_andn_u64() {
+ assert_eq!(_andn_u64(0, 0), 0);
+ assert_eq!(_andn_u64(0, 1), 1);
+ assert_eq!(_andn_u64(1, 0), 0);
+ assert_eq!(_andn_u64(1, 1), 0);
+
+ let r = _andn_u64(0b0000_0000u64, 0b0000_0000u64);
+ assert_eq!(r, 0b0000_0000u64);
+
+ let r = _andn_u64(0b0000_0000u64, 0b1111_1111u64);
+ assert_eq!(r, 0b1111_1111u64);
+
+ let r = _andn_u64(0b1111_1111u64, 0b0000_0000u64);
+ assert_eq!(r, 0b0000_0000u64);
+
+ let r = _andn_u64(0b1111_1111u64, 0b1111_1111u64);
+ assert_eq!(r, 0b0000_0000u64);
+
+ let r = _andn_u64(0b0100_0000u64, 0b0101_1101u64);
+ assert_eq!(r, 0b0001_1101u64);
+ }
+
+ #[simd_test(enable = "bmi1")]
+ unsafe fn test_blsi_u64() {
+ assert_eq!(_blsi_u64(0b1101_0000u64), 0b0001_0000u64);
+ }
+
+ #[simd_test(enable = "bmi1")]
+ unsafe fn test_blsmsk_u64() {
+ let r = _blsmsk_u64(0b0011_0000u64);
+ assert_eq!(r, 0b0001_1111u64);
+ }
+
+ #[simd_test(enable = "bmi1")]
+ unsafe fn test_blsr_u64() {
+ // TODO: test the behavior when the input is `0`.
+ let r = _blsr_u64(0b0011_0000u64);
+ assert_eq!(r, 0b0010_0000u64);
+ }
+
+ #[simd_test(enable = "bmi1")]
+ unsafe fn test_tzcnt_u64() {
+ assert_eq!(_tzcnt_u64(0b0000_0001u64), 0u64);
+ assert_eq!(_tzcnt_u64(0b0000_0000u64), 64u64);
+ assert_eq!(_tzcnt_u64(0b1001_0000u64), 4u64);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
new file mode 100644
index 000000000..356d95a3d
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
@@ -0,0 +1,139 @@
+//! Bit Manipulation Instruction (BMI) Set 2.0.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Unsigned multiply without affecting flags.
+///
+/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
+/// the low half and the high half of the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64)
+#[inline]
+#[cfg_attr(test, assert_instr(mul))]
+#[target_feature(enable = "bmi2")]
+#[cfg(not(target_arch = "x86"))] // calls an intrinsic
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
+ let result: u128 = (a as u128) * (b as u128);
+ *hi = (result >> 64) as u64;
+ result as u64
+}
+
+/// Zeroes higher bits of `a` >= `index`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64)
+#[inline]
+#[target_feature(enable = "bmi2")]
+#[cfg_attr(test, assert_instr(bzhi))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
+ x86_bmi2_bzhi_64(a, index as u64)
+}
+
+/// Scatter contiguous low order bits of `a` to the result at the positions
+/// specified by the `mask`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64)
+#[inline]
+#[target_feature(enable = "bmi2")]
+#[cfg_attr(test, assert_instr(pdep))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
+ x86_bmi2_pdep_64(a, mask)
+}
+
+/// Gathers the bits of `x` specified by the `mask` into the contiguous low
+/// order bit positions of the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64)
+#[inline]
+#[target_feature(enable = "bmi2")]
+#[cfg_attr(test, assert_instr(pext))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
+ x86_bmi2_pext_64(a, mask)
+}
+
+extern "C" {
+ #[link_name = "llvm.x86.bmi.bzhi.64"]
+ fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
+ #[link_name = "llvm.x86.bmi.pdep.64"]
+ fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64;
+ #[link_name = "llvm.x86.bmi.pext.64"]
+ fn x86_bmi2_pext_64(x: u64, y: u64) -> u64;
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86_64::*;
+
+ #[simd_test(enable = "bmi2")]
+ unsafe fn test_pext_u64() {
+ let n = 0b1011_1110_1001_0011u64;
+
+ let m0 = 0b0110_0011_1000_0101u64;
+ let s0 = 0b0000_0000_0011_0101u64;
+
+ let m1 = 0b1110_1011_1110_1111u64;
+ let s1 = 0b0001_0111_0100_0011u64;
+
+ assert_eq!(_pext_u64(n, m0), s0);
+ assert_eq!(_pext_u64(n, m1), s1);
+ }
+
+ #[simd_test(enable = "bmi2")]
+ unsafe fn test_pdep_u64() {
+ let n = 0b1011_1110_1001_0011u64;
+
+ let m0 = 0b0110_0011_1000_0101u64;
+ let s0 = 0b0000_0010_0000_0101u64;
+
+ let m1 = 0b1110_1011_1110_1111u64;
+ let s1 = 0b1110_1001_0010_0011u64;
+
+ assert_eq!(_pdep_u64(n, m0), s0);
+ assert_eq!(_pdep_u64(n, m1), s1);
+ }
+
+ #[simd_test(enable = "bmi2")]
+ unsafe fn test_bzhi_u64() {
+ let n = 0b1111_0010u64;
+ let s = 0b0001_0010u64;
+ assert_eq!(_bzhi_u64(n, 5), s);
+ }
+
+ #[simd_test(enable = "bmi2")]
+ #[rustfmt::skip]
+ unsafe fn test_mulx_u64() {
+ let a: u64 = 9_223_372_036_854_775_800;
+ let b: u64 = 100;
+ let mut hi = 0;
+ let lo = _mulx_u64(a, b, &mut hi);
+ /*
+result = 922337203685477580000 =
+0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000
+ ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+ assert_eq!(
+ lo,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64
+ );
+ assert_eq!(hi, 0b00110001u64);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
new file mode 100644
index 000000000..90a209ce3
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs
@@ -0,0 +1,29 @@
+//! Byte swap intrinsics.
+
+#![allow(clippy::module_name_repetitions)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Returns an integer with the reversed byte order of x
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64)
+#[inline]
+#[cfg_attr(test, assert_instr(bswap))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _bswap64(x: i64) -> i64 {
+ x.swap_bytes()
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_bswap64() {
+ unsafe {
+ assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E);
+ assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000);
+ }
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
new file mode 100644
index 000000000..53da9d02f
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs
@@ -0,0 +1,135 @@
+use crate::arch::asm;
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+// x32 wants to use a 32-bit address size, but asm! defaults to using the full
+// register name (e.g. rax). We have to explicitly override the placeholder to
+// use the 32-bit register name in that case.
+#[cfg(target_pointer_width = "32")]
+macro_rules! bt {
+ ($inst:expr) => {
+ concat!($inst, " {b}, ({p:e})")
+ };
+}
+#[cfg(target_pointer_width = "64")]
+macro_rules! bt {
+ ($inst:expr) => {
+ concat!($inst, " {b}, ({p})")
+ };
+}
+
+/// Returns the bit in position `b` of the memory addressed by `p`.
+#[inline]
+#[cfg_attr(test, assert_instr(bt))]
+#[stable(feature = "simd_x86_bittest", since = "1.55.0")]
+pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 {
+ let r: u8;
+ asm!(
+ bt!("btq"),
+ "setc {r}",
+ p = in(reg) p,
+ b = in(reg) b,
+ r = out(reg_byte) r,
+ options(readonly, nostack, pure, att_syntax)
+ );
+ r
+}
+
+/// Returns the bit in position `b` of the memory addressed by `p`, then sets the bit to `1`.
+#[inline]
+#[cfg_attr(test, assert_instr(bts))]
+#[stable(feature = "simd_x86_bittest", since = "1.55.0")]
+pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 {
+ let r: u8;
+ asm!(
+ bt!("btsq"),
+ "setc {r}",
+ p = in(reg) p,
+ b = in(reg) b,
+ r = out(reg_byte) r,
+ options(nostack, att_syntax)
+ );
+ r
+}
+
+/// Returns the bit in position `b` of the memory addressed by `p`, then resets that bit to `0`.
+#[inline]
+#[cfg_attr(test, assert_instr(btr))]
+#[stable(feature = "simd_x86_bittest", since = "1.55.0")]
+pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 {
+ let r: u8;
+ asm!(
+ bt!("btrq"),
+ "setc {r}",
+ p = in(reg) p,
+ b = in(reg) b,
+ r = out(reg_byte) r,
+ options(nostack, att_syntax)
+ );
+ r
+}
+
+/// Returns the bit in position `b` of the memory addressed by `p`, then inverts that bit.
+#[inline]
+#[cfg_attr(test, assert_instr(btc))]
+#[stable(feature = "simd_x86_bittest", since = "1.55.0")]
+pub unsafe fn _bittestandcomplement64(p: *mut i64, b: i64) -> u8 {
+ let r: u8;
+ asm!(
+ bt!("btcq"),
+ "setc {r}",
+ p = in(reg) p,
+ b = in(reg) b,
+ r = out(reg_byte) r,
+ options(nostack, att_syntax)
+ );
+ r
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86_64::*;
+
+ #[test]
+ fn test_bittest64() {
+ unsafe {
+ let a = 0b0101_0000i64;
+ assert_eq!(_bittest64(&a as _, 4), 1);
+ assert_eq!(_bittest64(&a as _, 5), 0);
+ }
+ }
+
+ #[test]
+ fn test_bittestandset64() {
+ unsafe {
+ let mut a = 0b0101_0000i64;
+ assert_eq!(_bittestandset64(&mut a as _, 4), 1);
+ assert_eq!(_bittestandset64(&mut a as _, 4), 1);
+ assert_eq!(_bittestandset64(&mut a as _, 5), 0);
+ assert_eq!(_bittestandset64(&mut a as _, 5), 1);
+ }
+ }
+
+ #[test]
+ fn test_bittestandreset64() {
+ unsafe {
+ let mut a = 0b0101_0000i64;
+ assert_eq!(_bittestandreset64(&mut a as _, 4), 1);
+ assert_eq!(_bittestandreset64(&mut a as _, 4), 0);
+ assert_eq!(_bittestandreset64(&mut a as _, 5), 0);
+ assert_eq!(_bittestandreset64(&mut a as _, 5), 0);
+ }
+ }
+
+ #[test]
+ fn test_bittestandcomplement64() {
+ unsafe {
+ let mut a = 0b0101_0000i64;
+ assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1);
+ assert_eq!(_bittestandcomplement64(&mut a as _, 4), 0);
+ assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1);
+ assert_eq!(_bittestandcomplement64(&mut a as _, 5), 0);
+ assert_eq!(_bittestandcomplement64(&mut a as _, 5), 1);
+ }
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
new file mode 100644
index 000000000..391daed20
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs
@@ -0,0 +1,73 @@
+use crate::sync::atomic::Ordering;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Compares and exchange 16 bytes (128 bits) of data atomically.
+///
+/// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64`
+/// processors. It performs an atomic compare-and-swap, updating the `ptr`
+/// memory location to `val` if the current value in memory equals `old`.
+///
+/// # Return value
+///
+/// This function returns the previous value at the memory location. If it is
+/// equal to `old` then the memory was updated to `new`.
+///
+/// # Memory Orderings
+///
+/// This atomic operations has the same semantics of memory orderings as
+/// `AtomicUsize::compare_exchange` does, only operating on 16 bytes of memory
+/// instead of just a pointer.
+///
+/// For more information on memory orderings here see the `compare_exchange`
+/// documentation for other `Atomic*` types in the standard library.
+///
+/// # Unsafety
+///
+/// This method is unsafe because it takes a raw pointer and will attempt to
+/// read and possibly write the memory at the pointer. The pointer must also be
+/// aligned on a 16-byte boundary.
+///
+/// This method also requires the `cmpxchg16b` CPU feature to be available at
+/// runtime to work correctly. If the CPU running the binary does not actually
+/// support `cmpxchg16b` and the program enters an execution path that
+/// eventually would reach this function the behavior is undefined.
+///
+/// The `success` ordering must also be stronger or equal to `failure`, or this
+/// function call is undefined. See the `Atomic*` documentation's
+/// `compare_exchange` function for more information. When `compare_exchange`
+/// panics, this is undefined behavior. Currently this function aborts the
+/// process with an undefined instruction.
+#[inline]
+#[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))]
+#[target_feature(enable = "cmpxchg16b")]
+pub unsafe fn cmpxchg16b(
+ dst: *mut u128,
+ old: u128,
+ new: u128,
+ success: Ordering,
+ failure: Ordering,
+) -> u128 {
+ use crate::{intrinsics, sync::atomic::Ordering::*};
+
+ debug_assert!(dst as usize % 16 == 0);
+
+ let (val, _ok) = match (success, failure) {
+ (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new),
+ (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new),
+ (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new),
+ (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new),
+ (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new),
+ (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new),
+ (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new),
+ (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new),
+ (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new),
+
+ // The above block is all copied from libcore, and this statement is
+ // also copied from libcore except that it's a panic in libcore and we
+ // have a little bit more of a lightweight panic here.
+ _ => crate::core_arch::x86::ud2(),
+ };
+ val
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
new file mode 100644
index 000000000..d02702046
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs
@@ -0,0 +1,112 @@
+//! FXSR floating-point context fast save and restore.
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.fxsave64"]
+ fn fxsave64(p: *mut u8);
+ #[link_name = "llvm.x86.fxrstor64"]
+ fn fxrstor64(p: *const u8);
+}
+
+/// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the
+/// 512-byte-long 16-byte-aligned memory region `mem_addr`.
+///
+/// A misaligned destination operand raises a general-protection (#GP) or an
+/// alignment check exception (#AC).
+///
+/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor].
+///
+/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
+/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64)
+#[inline]
+#[target_feature(enable = "fxsr")]
+#[cfg_attr(test, assert_instr(fxsave64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _fxsave64(mem_addr: *mut u8) {
+ fxsave64(mem_addr)
+}
+
+/// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the
+/// 512-byte-long 16-byte-aligned memory region `mem_addr`.
+///
+/// The contents of this memory region should have been written to by a
+/// previous
+/// `_fxsave` or `_fxsave64` intrinsic.
+///
+/// A misaligned destination operand raises a general-protection (#GP) or an
+/// alignment check exception (#AC).
+///
+/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor].
+///
+/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html
+/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64)
+#[inline]
+#[target_feature(enable = "fxsr")]
+#[cfg_attr(test, assert_instr(fxrstor64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _fxrstor64(mem_addr: *const u8) {
+ fxrstor64(mem_addr)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86_64::*;
+ use std::{cmp::PartialEq, fmt};
+ use stdarch_test::simd_test;
+
+ #[repr(align(16))]
+ struct FxsaveArea {
+ data: [u8; 512], // 512 bytes
+ }
+
+ impl FxsaveArea {
+ fn new() -> FxsaveArea {
+ FxsaveArea { data: [0; 512] }
+ }
+ fn ptr(&mut self) -> *mut u8 {
+ &mut self.data[0] as *mut _ as *mut u8
+ }
+ }
+
+ impl PartialEq<FxsaveArea> for FxsaveArea {
+ fn eq(&self, other: &FxsaveArea) -> bool {
+ for i in 0..self.data.len() {
+ if self.data[i] != other.data[i] {
+ return false;
+ }
+ }
+ true
+ }
+ }
+
+ impl fmt::Debug for FxsaveArea {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "[")?;
+ for i in 0..self.data.len() {
+ write!(f, "{}", self.data[i])?;
+ if i != self.data.len() - 1 {
+ write!(f, ", ")?;
+ }
+ }
+ write!(f, "]")
+ }
+ }
+
+ #[simd_test(enable = "fxsr")]
+ unsafe fn fxsave64() {
+ let mut a = FxsaveArea::new();
+ let mut b = FxsaveArea::new();
+
+ fxsr::_fxsave64(a.ptr());
+ fxsr::_fxrstor64(a.ptr());
+ fxsr::_fxsave64(b.ptr());
+ assert_eq!(a, b);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/macros.rs b/library/stdarch/crates/core_arch/src/x86_64/macros.rs
new file mode 100644
index 000000000..a3ea0e821
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/macros.rs
@@ -0,0 +1,36 @@
+//! Utility macros.
+
+// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is
+// not a round number.
+pub(crate) struct ValidateConstRound<const IMM: i32>;
+impl<const IMM: i32> ValidateConstRound<IMM> {
+ pub(crate) const VALID: () = {
+ assert!(
+ IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11,
+ "Invalid IMM value"
+ );
+ };
+}
+
+#[allow(unused)]
+macro_rules! static_assert_rounding {
+ ($imm:ident) => {
+ let _ = $crate::core_arch::x86_64::macros::ValidateConstRound::<$imm>::VALID;
+ };
+}
+
+// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is
+// not a sae number.
+pub(crate) struct ValidateConstSae<const IMM: i32>;
+impl<const IMM: i32> ValidateConstSae<IMM> {
+ pub(crate) const VALID: () = {
+ assert!(IMM == 4 || IMM == 8, "Invalid IMM value");
+ };
+}
+
+#[allow(unused)]
+macro_rules! static_assert_sae {
+ ($imm:ident) => {
+ let _ = $crate::core_arch::x86_64::macros::ValidateConstSae::<$imm>::VALID;
+ };
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/mod.rs b/library/stdarch/crates/core_arch/src/x86_64/mod.rs
new file mode 100644
index 000000000..461874ece
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/mod.rs
@@ -0,0 +1,55 @@
+//! `x86_64` intrinsics
+
+#[macro_use]
+mod macros;
+
+mod fxsr;
+pub use self::fxsr::*;
+
+mod sse;
+pub use self::sse::*;
+
+mod sse2;
+pub use self::sse2::*;
+
+mod sse41;
+pub use self::sse41::*;
+
+mod sse42;
+pub use self::sse42::*;
+
+mod xsave;
+pub use self::xsave::*;
+
+mod abm;
+pub use self::abm::*;
+
+mod avx;
+pub use self::avx::*;
+
+mod bmi;
+pub use self::bmi::*;
+
+mod bmi2;
+pub use self::bmi2::*;
+
+mod avx2;
+pub use self::avx2::*;
+
+mod avx512f;
+pub use self::avx512f::*;
+
+mod bswap;
+pub use self::bswap::*;
+
+mod rdrand;
+pub use self::rdrand::*;
+
+mod cmpxchg16b;
+pub use self::cmpxchg16b::*;
+
+mod adx;
+pub use self::adx::*;
+
+mod bt;
+pub use self::bt::*;
diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
new file mode 100644
index 000000000..e5ec933fb
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs
@@ -0,0 +1,44 @@
+//! RDRAND and RDSEED instructions for returning random numbers from an Intel
+//! on-chip hardware random number generator which has been seeded by an
+//! on-chip entropy source.
+
+#![allow(clippy::module_name_repetitions)]
+
+#[allow(improper_ctypes)]
+extern "unadjusted" {
+ #[link_name = "llvm.x86.rdrand.64"]
+ fn x86_rdrand64_step() -> (u64, i32);
+ #[link_name = "llvm.x86.rdseed.64"]
+ fn x86_rdseed64_step() -> (u64, i32);
+}
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Read a hardware generated 64-bit random value and store the result in val.
+/// Returns 1 if a random value was generated, and 0 otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step)
+#[inline]
+#[target_feature(enable = "rdrand")]
+#[cfg_attr(test, assert_instr(rdrand))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 {
+ let (v, flag) = x86_rdrand64_step();
+ *val = v;
+ flag
+}
+
+/// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store
+/// in val. Return 1 if a random value was generated, and 0 otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step)
+#[inline]
+#[target_feature(enable = "rdseed")]
+#[cfg_attr(test, assert_instr(rdseed))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 {
+ let (v, flag) = x86_rdseed64_step();
+ *val = v;
+ flag
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
new file mode 100644
index 000000000..ca6799c90
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs
@@ -0,0 +1,148 @@
+//! `x86_64` Streaming SIMD Extensions (SSE)
+
+use crate::core_arch::x86::*;
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.sse.cvtss2si64"]
+ fn cvtss2si64(a: __m128) -> i64;
+ #[link_name = "llvm.x86.sse.cvttss2si64"]
+ fn cvttss2si64(a: __m128) -> i64;
+ #[link_name = "llvm.x86.sse.cvtsi642ss"]
+ fn cvtsi642ss(a: __m128, b: i64) -> __m128;
+}
+
+/// Converts the lowest 32 bit float in the input vector to a 64 bit integer.
+///
+/// The result is rounded according to the current rounding mode. If the result
+/// cannot be represented as a 64 bit integer the result will be
+/// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation
+/// floating point exception if unmasked (see
+/// [`_mm_setcsr`](fn._mm_setcsr.html)).
+///
+/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64)
+#[inline]
+#[target_feature(enable = "sse")]
+#[cfg_attr(test, assert_instr(cvtss2si))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 {
+ cvtss2si64(a)
+}
+
+/// Converts the lowest 32 bit float in the input vector to a 64 bit integer
+/// with truncation.
+///
+/// The result is rounded always using truncation (round towards zero). If the
+/// result cannot be represented as a 64 bit integer the result will be
+/// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating
+/// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)).
+///
+/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64)
+#[inline]
+#[target_feature(enable = "sse")]
+#[cfg_attr(test, assert_instr(cvttss2si))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 {
+ cvttss2si64(a)
+}
+
+/// Converts a 64 bit integer to a 32 bit float. The result vector is the input
+/// vector `a` with the lowest 32 bit float replaced by the converted integer.
+///
+/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit
+/// input).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss)
+#[inline]
+#[target_feature(enable = "sse")]
+#[cfg_attr(test, assert_instr(cvtsi2ss))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 {
+ cvtsi642ss(a, b)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::arch::x86_64::*;
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "sse")]
+ unsafe fn test_mm_cvtss_si64() {
+ let inputs = &[
+ (42.0f32, 42i64),
+ (-31.4, -31),
+ (-33.5, -34),
+ (-34.5, -34),
+ (4.0e10, 40_000_000_000),
+ (4.0e-10, 0),
+ (f32::NAN, i64::MIN),
+ (2147483500.1, 2147483520),
+ (9.223371e18, 9223370937343148032),
+ ];
+ for i in 0..inputs.len() {
+ let (xi, e) = inputs[i];
+ let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
+ let r = _mm_cvtss_si64(x);
+ assert_eq!(
+ e, r,
+ "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}",
+ i, x, r, e
+ );
+ }
+ }
+
+ #[simd_test(enable = "sse")]
+ unsafe fn test_mm_cvttss_si64() {
+ let inputs = &[
+ (42.0f32, 42i64),
+ (-31.4, -31),
+ (-33.5, -33),
+ (-34.5, -34),
+ (10.999, 10),
+ (-5.99, -5),
+ (4.0e10, 40_000_000_000),
+ (4.0e-10, 0),
+ (f32::NAN, i64::MIN),
+ (2147483500.1, 2147483520),
+ (9.223371e18, 9223370937343148032),
+ (9.223372e18, i64::MIN),
+ ];
+ for i in 0..inputs.len() {
+ let (xi, e) = inputs[i];
+ let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0);
+ let r = _mm_cvttss_si64(x);
+ assert_eq!(
+ e, r,
+ "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}",
+ i, x, r, e
+ );
+ }
+ }
+
+ #[simd_test(enable = "sse")]
+ unsafe fn test_mm_cvtsi64_ss() {
+ let inputs = &[
+ (4555i64, 4555.0f32),
+ (322223333, 322223330.0),
+ (-432, -432.0),
+ (-322223333, -322223330.0),
+ (9223372036854775807, 9.223372e18),
+ (-9223372036854775808, -9.223372e18),
+ ];
+
+ for i in 0..inputs.len() {
+ let (x, f) = inputs[i];
+ let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
+ let r = _mm_cvtsi64_ss(a, x);
+ let e = _mm_setr_ps(f, 6.0, 7.0, 8.0);
+ assert_eq_m128(e, r);
+ }
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
new file mode 100644
index 000000000..f487a067f
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs
@@ -0,0 +1,209 @@
+//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2)
+
+use crate::{
+ core_arch::{simd_llvm::*, x86::*},
+ intrinsics,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.sse2.cvtsd2si64"]
+ fn cvtsd2si64(a: __m128d) -> i64;
+ #[link_name = "llvm.x86.sse2.cvttsd2si64"]
+ fn cvttsd2si64(a: __m128d) -> i64;
+}
+
+/// Converts the lower double-precision (64-bit) floating-point element in a to
+/// a 64-bit integer.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(cvtsd2si))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 {
+ cvtsd2si64(a)
+}
+
+/// Alias for `_mm_cvtsd_si64`
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(cvtsd2si))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 {
+ _mm_cvtsd_si64(a)
+}
+
+/// Converts the lower double-precision (64-bit) floating-point element in `a`
+/// to a 64-bit integer with truncation.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(cvttsd2si))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 {
+ cvttsd2si64(a)
+}
+
+/// Alias for `_mm_cvttsd_si64`
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(cvttsd2si))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 {
+ _mm_cvttsd_si64(a)
+}
+
+/// Stores a 64-bit integer value in the specified memory location.
+/// To minimize caching, the data is flagged as non-temporal (unlikely to be
+/// used again soon).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(movnti))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) {
+ intrinsics::nontemporal_store(mem_addr, a);
+}
+
+/// Returns a vector whose lowest element is `a` and all higher elements are
+/// `0`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i {
+ _mm_set_epi64x(0, a)
+}
+
+/// Returns a vector whose lowest element is `a` and all higher elements are
+/// `0`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i {
+ _mm_cvtsi64_si128(a)
+}
+
+/// Returns the lowest element of `a`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 {
+ simd_extract(a.as_i64x2(), 0)
+}
+
+/// Returns the lowest element of `a`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(all(test, not(windows)), assert_instr(movq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 {
+ _mm_cvtsi128_si64(a)
+}
+
+/// Returns `a` with its lower element replaced by `b` after converting it to
+/// an `f64`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(cvtsi2sd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d {
+ simd_insert(a, 0, b as f64)
+}
+
+/// Returns `a` with its lower element replaced by `b` after converting it to
+/// an `f64`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd)
+#[inline]
+#[target_feature(enable = "sse2")]
+#[cfg_attr(test, assert_instr(cvtsi2sd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d {
+ _mm_cvtsi64_sd(a, b)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::arch::x86_64::*;
+ use std::boxed;
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvtsd_si64() {
+ let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0));
+ assert_eq!(r, -2_i64);
+
+ let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN));
+ assert_eq!(r, i64::MIN);
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvtsd_si64x() {
+ let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN));
+ assert_eq!(r, i64::MIN);
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvttsd_si64() {
+ let a = _mm_setr_pd(-1.1, 2.2);
+ let r = _mm_cvttsd_si64(a);
+ assert_eq!(r, -1_i64);
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvttsd_si64x() {
+ let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN);
+ let r = _mm_cvttsd_si64x(a);
+ assert_eq!(r, i64::MIN);
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_stream_si64() {
+ let a: i64 = 7;
+ let mut mem = boxed::Box::<i64>::new(-1);
+ _mm_stream_si64(&mut *mem as *mut i64, a);
+ assert_eq!(a, *mem);
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvtsi64_si128() {
+ let r = _mm_cvtsi64_si128(5);
+ assert_eq_m128i(r, _mm_setr_epi64x(5, 0));
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvtsi128_si64() {
+ let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0));
+ assert_eq!(r, 5);
+ }
+
+ #[simd_test(enable = "sse2")]
+ unsafe fn test_mm_cvtsi64_sd() {
+ let a = _mm_set1_pd(3.5);
+ let r = _mm_cvtsi64_sd(a, 5);
+ assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5));
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
new file mode 100644
index 000000000..3d1ea0cf6
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs
@@ -0,0 +1,62 @@
+//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1)
+
+use crate::{
+ core_arch::{simd_llvm::*, x86::*},
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Extracts an 64-bit integer from `a` selected with `IMM1`
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64)
+#[inline]
+#[target_feature(enable = "sse4.1")]
+#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(pextrq, IMM1 = 1))]
+#[rustc_legacy_const_generics(1)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_extract_epi64<const IMM1: i32>(a: __m128i) -> i64 {
+ static_assert_imm1!(IMM1);
+ simd_extract(a.as_i64x2(), IMM1 as u32)
+}
+
+/// Returns a copy of `a` with the 64-bit integer from `i` inserted at a
+/// location specified by `IMM1`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64)
+#[inline]
+#[target_feature(enable = "sse4.1")]
+#[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_insert_epi64<const IMM1: i32>(a: __m128i, i: i64) -> __m128i {
+ static_assert_imm1!(IMM1);
+ transmute(simd_insert(a.as_i64x2(), IMM1 as u32, i))
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::arch::x86_64::*;
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "sse4.1")]
+ unsafe fn test_mm_extract_epi64() {
+ let a = _mm_setr_epi64x(0, 1);
+ let r = _mm_extract_epi64::<1>(a);
+ assert_eq!(r, 1);
+ let r = _mm_extract_epi64::<0>(a);
+ assert_eq!(r, 0);
+ }
+
+ #[simd_test(enable = "sse4.1")]
+ unsafe fn test_mm_insert_epi64() {
+ let a = _mm_set1_epi64x(0);
+ let e = _mm_setr_epi64x(0, 32);
+ let r = _mm_insert_epi64::<1>(a, 32);
+ assert_eq_m128i(r, e);
+ let e = _mm_setr_epi64x(32, 0);
+ let r = _mm_insert_epi64::<0>(a, 32);
+ assert_eq_m128i(r, e);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
new file mode 100644
index 000000000..6b5d087c1
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs
@@ -0,0 +1,37 @@
+//! `x86_64`'s Streaming SIMD Extensions 4.2 (SSE4.2)
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.sse42.crc32.64.64"]
+ fn crc32_64_64(crc: u64, v: u64) -> u64;
+}
+
+/// Starting with the initial value in `crc`, return the accumulated
+/// CRC32-C value for unsigned 64-bit integer `v`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(crc32))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 {
+ crc32_64_64(crc, v)
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::arch::x86_64::*;
+
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_crc32_u64() {
+ let crc = 0x7819dccd3e824;
+ let v = 0x2a22b845fed;
+ let i = _mm_crc32_u64(crc, v);
+ assert_eq!(i, 0xbb6cdc6c);
+ }
+}
diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
new file mode 100644
index 000000000..2afd3e433
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs
@@ -0,0 +1,227 @@
+//! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics
+
+#![allow(clippy::module_name_repetitions)]
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.xsave64"]
+ fn xsave64(p: *mut u8, hi: u32, lo: u32);
+ #[link_name = "llvm.x86.xrstor64"]
+ fn xrstor64(p: *const u8, hi: u32, lo: u32);
+ #[link_name = "llvm.x86.xsaveopt64"]
+ fn xsaveopt64(p: *mut u8, hi: u32, lo: u32);
+ #[link_name = "llvm.x86.xsavec64"]
+ fn xsavec64(p: *mut u8, hi: u32, lo: u32);
+ #[link_name = "llvm.x86.xsaves64"]
+ fn xsaves64(p: *mut u8, hi: u32, lo: u32);
+ #[link_name = "llvm.x86.xrstors64"]
+ fn xrstors64(p: *const u8, hi: u32, lo: u32);
+}
+
+/// Performs a full or partial save of the enabled processor states to memory at
+/// `mem_addr`.
+///
+/// State is saved based on bits `[62:0]` in `save_mask` and XCR0.
+/// `mem_addr` must be aligned on a 64-byte boundary.
+///
+/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of
+/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64)
+#[inline]
+#[target_feature(enable = "xsave")]
+#[cfg_attr(test, assert_instr(xsave64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) {
+ xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
+}
+
+/// Performs a full or partial restore of the enabled processor states using
+/// the state information stored in memory at `mem_addr`.
+///
+/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and
+/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
+/// boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64)
+#[inline]
+#[target_feature(enable = "xsave")]
+#[cfg_attr(test, assert_instr(xrstor64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) {
+ xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
+}
+
+/// Performs a full or partial save of the enabled processor states to memory at
+/// `mem_addr`.
+///
+/// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`.
+/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize
+/// the manner in which data is saved. The performance of this instruction will
+/// be equal to or better than using the `XSAVE64` instruction.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64)
+#[inline]
+#[target_feature(enable = "xsave,xsaveopt")]
+#[cfg_attr(test, assert_instr(xsaveopt64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) {
+ xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
+}
+
+/// Performs a full or partial save of the enabled processor states to memory
+/// at `mem_addr`.
+///
+/// `xsavec` differs from `xsave` in that it uses compaction and that it may
+/// use init optimization. State is saved based on bits `[62:0]` in `save_mask`
+/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64)
+#[inline]
+#[target_feature(enable = "xsave,xsavec")]
+#[cfg_attr(test, assert_instr(xsavec64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) {
+ xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
+}
+
+/// Performs a full or partial save of the enabled processor states to memory at
+/// `mem_addr`
+///
+/// `xsaves` differs from xsave in that it can save state components
+/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the
+/// modified optimization. State is saved based on bits `[62:0]` in `save_mask`
+/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64)
+#[inline]
+#[target_feature(enable = "xsave,xsaves")]
+#[cfg_attr(test, assert_instr(xsaves64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) {
+ xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32);
+}
+
+/// Performs a full or partial restore of the enabled processor states using the
+/// state information stored in memory at `mem_addr`.
+///
+/// `xrstors` differs from `xrstor` in that it can restore state components
+/// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore
+/// from an `xsave` area in which the extended region is in the standard form.
+/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and
+/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte
+/// boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64)
+#[inline]
+#[target_feature(enable = "xsave,xsaves")]
+#[cfg_attr(test, assert_instr(xrstors64))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) {
+ xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32);
+}
+
+// FIXME: https://github.com/rust-lang/stdarch/issues/209
+// All these tests fail with Intel SDE.
+/*
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::x86_64::xsave;
+ use stdarch_test::simd_test;
+ use std::fmt;
+
+ // FIXME: https://github.com/rust-lang/stdarch/issues/209
+ #[repr(align(64))]
+ struct XsaveArea {
+ // max size for 256-bit registers is 800 bytes:
+ // see https://software.intel.com/en-us/node/682996
+ // max size for 512-bit registers is 2560 bytes:
+ // FIXME: add source
+ data: [u8; 2560],
+ }
+
+ impl XsaveArea {
+ fn new() -> XsaveArea {
+ XsaveArea { data: [0; 2560] }
+ }
+ fn ptr(&mut self) -> *mut u8 {
+ &mut self.data[0] as *mut _ as *mut u8
+ }
+ }
+
+ impl PartialEq<XsaveArea> for XsaveArea {
+ fn eq(&self, other: &XsaveArea) -> bool {
+ for i in 0..self.data.len() {
+ if self.data[i] != other.data[i] {
+ return false;
+ }
+ }
+ true
+ }
+ }
+
+ impl fmt::Debug for XsaveArea {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ write!(f, "[")?;
+ for i in 0..self.data.len() {
+ write!(f, "{}", self.data[i])?;
+ if i != self.data.len() - 1 {
+ write!(f, ", ")?;
+ }
+ }
+ write!(f, "]")
+ }
+ }
+
+ #[simd_test(enable = "xsave")]
+ unsafe fn xsave64() {
+ let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
+ let mut a = XsaveArea::new();
+ let mut b = XsaveArea::new();
+
+ xsave::_xsave64(a.ptr(), m);
+ xsave::_xrstor64(a.ptr(), m);
+ xsave::_xsave64(b.ptr(), m);
+ assert_eq!(a, b);
+ }
+
+ #[simd_test(enable = "xsave,xsaveopt")]
+ unsafe fn xsaveopt64() {
+ let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
+ let mut a = XsaveArea::new();
+ let mut b = XsaveArea::new();
+
+ xsave::_xsaveopt64(a.ptr(), m);
+ xsave::_xrstor64(a.ptr(), m);
+ xsave::_xsaveopt64(b.ptr(), m);
+ assert_eq!(a, b);
+ }
+
+ #[simd_test(enable = "xsave,xsavec")]
+ unsafe fn xsavec64() {
+ let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
+ let mut a = XsaveArea::new();
+ let mut b = XsaveArea::new();
+
+ xsave::_xsavec64(a.ptr(), m);
+ xsave::_xrstor64(a.ptr(), m);
+ xsave::_xsavec64(b.ptr(), m);
+ assert_eq!(a, b);
+ }
+
+ #[simd_test(enable = "xsave,xsaves")]
+ unsafe fn xsaves64() {
+ let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers
+ let mut a = XsaveArea::new();
+ let mut b = XsaveArea::new();
+
+ xsave::_xsaves64(a.ptr(), m);
+ xsave::_xrstors64(a.ptr(), m);
+ xsave::_xsaves64(b.ptr(), m);
+ assert_eq!(a, b);
+ }
+}
+*/