From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:02:58 +0200 Subject: Adding upstream version 1.64.0+dfsg1. Signed-off-by: Daniel Baumann --- library/stdarch/crates/core_arch/src/x86_64/abm.rs | 62 + library/stdarch/crates/core_arch/src/x86_64/adx.rs | 148 + library/stdarch/crates/core_arch/src/x86_64/avx.rs | 48 + .../stdarch/crates/core_arch/src/x86_64/avx2.rs | 47 + .../stdarch/crates/core_arch/src/x86_64/avx512f.rs | 12346 +++++++++++++++++++ library/stdarch/crates/core_arch/src/x86_64/bmi.rs | 183 + .../stdarch/crates/core_arch/src/x86_64/bmi2.rs | 139 + .../stdarch/crates/core_arch/src/x86_64/bswap.rs | 29 + library/stdarch/crates/core_arch/src/x86_64/bt.rs | 135 + .../crates/core_arch/src/x86_64/cmpxchg16b.rs | 73 + .../stdarch/crates/core_arch/src/x86_64/fxsr.rs | 112 + .../stdarch/crates/core_arch/src/x86_64/macros.rs | 36 + library/stdarch/crates/core_arch/src/x86_64/mod.rs | 55 + .../stdarch/crates/core_arch/src/x86_64/rdrand.rs | 44 + library/stdarch/crates/core_arch/src/x86_64/sse.rs | 148 + .../stdarch/crates/core_arch/src/x86_64/sse2.rs | 209 + .../stdarch/crates/core_arch/src/x86_64/sse41.rs | 62 + .../stdarch/crates/core_arch/src/x86_64/sse42.rs | 37 + .../stdarch/crates/core_arch/src/x86_64/xsave.rs | 227 + 19 files changed, 14140 insertions(+) create mode 100644 library/stdarch/crates/core_arch/src/x86_64/abm.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/adx.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/avx.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/avx2.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/avx512f.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/bmi.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/bmi2.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/bswap.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/bt.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/fxsr.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/macros.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/mod.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/rdrand.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/sse.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/sse2.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/sse41.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/sse42.rs create mode 100644 library/stdarch/crates/core_arch/src/x86_64/xsave.rs (limited to 'library/stdarch/crates/core_arch/src/x86_64') diff --git a/library/stdarch/crates/core_arch/src/x86_64/abm.rs b/library/stdarch/crates/core_arch/src/x86_64/abm.rs new file mode 100644 index 000000000..988074d67 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/abm.rs @@ -0,0 +1,62 @@ +//! Advanced Bit Manipulation (ABM) instructions +//! +//! The POPCNT and LZCNT have their own CPUID bits to indicate support. +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +//! System Instructions][amd64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Counts the leading most significant zero bits. +/// +/// When the operand is zero, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_lzcnt_u64) +#[inline] +#[target_feature(enable = "lzcnt")] +#[cfg_attr(test, assert_instr(lzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _lzcnt_u64(x: u64) -> u64 { + x.leading_zeros() as u64 +} + +/// Counts the bits that are set. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_popcnt64) +#[inline] +#[target_feature(enable = "popcnt")] +#[cfg_attr(test, assert_instr(popcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _popcnt64(x: i64) -> i32 { + x.count_ones() as i32 +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::arch::x86_64::*; + + #[simd_test(enable = "lzcnt")] + unsafe fn test_lzcnt_u64() { + assert_eq!(_lzcnt_u64(0b0101_1010), 57); + } + + #[simd_test(enable = "popcnt")] + unsafe fn test_popcnt64() { + assert_eq!(_popcnt64(0b0101_1010), 4); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/adx.rs b/library/stdarch/crates/core_arch/src/x86_64/adx.rs new file mode 100644 index 000000000..a54d71136 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/adx.rs @@ -0,0 +1,148 @@ +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "unadjusted" { + #[link_name = "llvm.x86.addcarry.64"] + fn llvm_addcarry_u64(a: u8, b: u64, c: u64) -> (u8, u64); + #[link_name = "llvm.x86.addcarryx.u64"] + fn llvm_addcarryx_u64(a: u8, b: u64, c: u64, d: *mut u8) -> u8; + #[link_name = "llvm.x86.subborrow.64"] + fn llvm_subborrow_u64(a: u8, b: u64, c: u64) -> (u8, u64); +} + +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry flag), and store the unsigned 64-bit result in `out`, and the carry-out +/// is returned (carry or overflow flag). +#[inline] +#[cfg_attr(test, assert_instr(adc))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _addcarry_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + let (a, b) = llvm_addcarry_u64(c_in, a, b); + *out = b; + a +} + +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in` +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and +/// the carry-out is returned (carry or overflow flag). +#[inline] +#[target_feature(enable = "adx")] +#[cfg_attr(test, assert_instr(adc))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _addcarryx_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + llvm_addcarryx_u64(c_in, a, b, out as *mut _ as *mut u8) +} + +/// Adds unsigned 64-bit integers `a` and `b` with unsigned 8-bit carry-in `c_in`. +/// (carry or overflow flag), and store the unsigned 64-bit result in `out`, and +/// the carry-out is returned (carry or overflow flag). +#[inline] +#[cfg_attr(test, assert_instr(sbb))] +#[stable(feature = "simd_x86_adx", since = "1.33.0")] +pub unsafe fn _subborrow_u64(c_in: u8, a: u64, b: u64, out: &mut u64) -> u8 { + let (a, b) = llvm_subborrow_u64(c_in, a, b); + *out = b; + a +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86_64::*; + + #[test] + fn test_addcarry_u64() { + unsafe { + let a = u64::MAX; + let mut out = 0; + + let r = _addcarry_u64(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u64(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u64(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u64(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); + } + } + + #[simd_test(enable = "adx")] + unsafe fn test_addcarryx_u64() { + let a = u64::MAX; + let mut out = 0; + + let r = _addcarry_u64(0, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, a, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, a); + + let r = _addcarry_u64(1, a, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 1); + + let r = _addcarry_u64(1, a, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, 0); + + let r = _addcarry_u64(0, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 7); + + let r = _addcarry_u64(1, 3, 4, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 8); + } + + #[test] + fn test_subborrow_u64() { + unsafe { + let a = u64::MAX; + let mut out = 0; + + let r = _subborrow_u64(0, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u64(0, 0, 0, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 0); + + let r = _subborrow_u64(1, 0, 1, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a - 1); + + let r = _subborrow_u64(1, 0, 0, &mut out); + assert_eq!(r, 1); + assert_eq!(out, a); + + let r = _subborrow_u64(0, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 4); + + let r = _subborrow_u64(1, 7, 3, &mut out); + assert_eq!(r, 0); + assert_eq!(out, 3); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx.rs b/library/stdarch/crates/core_arch/src/x86_64/avx.rs new file mode 100644 index 000000000..7ba26371c --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx.rs @@ -0,0 +1,48 @@ +//! Advanced Vector Extensions (AVX) +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. - [AMD64 Architecture +//! Programmer's Manual, Volume 3: General-Purpose and System +//! Instructions][amd64_ref]. +//! +//! [Wikipedia][wiki] provides a quick overview of the instructions available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wiki]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions + +use crate::{ + core_arch::{simd_llvm::*, x86::*}, + mem::transmute, +}; + +/// Copies `a` to result, and insert the 64-bit integer `i` into result +/// at the location specified by `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_insert_epi64) +#[inline] +#[rustc_legacy_const_generics(2)] +#[target_feature(enable = "avx")] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_insert_epi64(a: __m256i, i: i64) -> __m256i { + static_assert_imm2!(INDEX); + transmute(simd_insert(a.as_i64x4(), INDEX as u32, i)) +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + + #[simd_test(enable = "avx")] + unsafe fn test_mm256_insert_epi64() { + let a = _mm256_setr_epi64x(1, 2, 3, 4); + let r = _mm256_insert_epi64::<3>(a, 0); + let e = _mm256_setr_epi64x(1, 2, 3, 0); + assert_eq_m256i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx2.rs b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs new file mode 100644 index 000000000..14447a137 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx2.rs @@ -0,0 +1,47 @@ +//! Advanced Vector Extensions 2 (AVX) +//! +//! AVX2 expands most AVX commands to 256-bit wide vector registers and +//! adds [FMA](https://en.wikipedia.org/wiki/Fused_multiply-accumulate). +//! +//! The references are: +//! +//! - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +//! Instruction Set Reference, A-Z][intel64_ref]. +//! - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +//! System Instructions][amd64_ref]. +//! +//! Wikipedia's [AVX][wiki_avx] and [FMA][wiki_fma] pages provide a quick +//! overview of the instructions available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +//! [wiki_avx]: https://en.wikipedia.org/wiki/Advanced_Vector_Extensions +//! [wiki_fma]: https://en.wikipedia.org/wiki/Fused_multiply-accumulate + +use crate::core_arch::{simd_llvm::*, x86::*}; + +/// Extracts a 64-bit integer from `a`, selected with `INDEX`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_extract_epi64) +#[inline] +#[target_feature(enable = "avx2")] +#[rustc_legacy_const_generics(1)] +// This intrinsic has no corresponding instruction. +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm256_extract_epi64(a: __m256i) -> i64 { + static_assert_imm2!(INDEX); + simd_extract(a.as_i64x4(), INDEX as u32) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "avx2")] + unsafe fn test_mm256_extract_epi64() { + let a = _mm256_setr_epi64x(0, 1, 2, 3); + let r = _mm256_extract_epi64::<3>(a); + assert_eq!(r, 3); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs new file mode 100644 index 000000000..5eed0502c --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/avx512f.rs @@ -0,0 +1,12346 @@ +use crate::{ + core_arch::{simd::*, simd_llvm::*, x86::*, x86_64::*}, + mem::transmute, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_i64&expand=1792) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si))] +pub unsafe fn _mm_cvtsd_i64(a: __m128d) -> i64 { + _mm_cvtsd_si64(a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_i64&expand=1894) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si))] +pub unsafe fn _mm_cvtss_i64(a: __m128) -> i64 { + _mm_cvtss_si64(a) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_u64&expand=1902) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi))] +pub unsafe fn _mm_cvtss_u64(a: __m128) -> u64 { + transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_u64&expand=1800) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi))] +pub unsafe fn _mm_cvtsd_u64(a: __m128d) -> u64 { + transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=mm_cvti32_ss&expand=1643) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2ss))] +pub unsafe fn _mm_cvti64_ss(a: __m128, b: i64) -> __m128 { + let b = b as f32; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvti64_sd&expand=1644) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2sd))] +pub unsafe fn _mm_cvti64_sd(a: __m128d, b: i64) -> __m128d { + let b = b as f64; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_ss&expand=2035) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtusi2ss))] +pub unsafe fn _mm_cvtu64_ss(a: __m128, b: u64) -> __m128 { + let b = b as f32; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtu64_sd&expand=2034) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtusi2sd))] +pub unsafe fn _mm_cvtu64_sd(a: __m128d, b: u64) -> __m128d { + let b = b as f64; + let r = simd_insert(a, 0, b); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_i64&expand=2016) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si))] +pub unsafe fn _mm_cvttsd_i64(a: __m128d) -> i64 { + transmute(vcvtsd2si64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_u64&expand=2021) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi))] +pub unsafe fn _mm_cvttsd_u64(a: __m128d) -> u64 { + transmute(vcvtsd2usi64(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=#text=_mm_cvttss_i64&expand=2023) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si))] +pub unsafe fn _mm_cvttss_i64(a: __m128) -> i64 { + transmute(vcvtss2si64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_u64&expand=2027) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi))] +pub unsafe fn _mm_cvttss_u64(a: __m128) -> u64 { + transmute(vcvtss2usi64(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION)) +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_sd&expand=1313) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi64_sd(a: __m128d, b: i64) -> __m128d { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsi2sd64(a, b, ROUNDING); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_sd&expand=1367) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi64_sd(a: __m128d, b: i64) -> __m128d { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsi2sd64(a, b, ROUNDING); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundi64_ss&expand=1314) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundi64_ss(a: __m128, b: i64) -> __m128 { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss64(a, b, ROUNDING); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a double-precision (64-bit) floating-point element, store the result in the lower element of dst, and copy the upper element from a to the upper element of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_sd&expand=1379) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtusi2sd, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu64_sd(a: __m128d, b: u64) -> __m128d { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtusi2sd64(a, b, ROUNDING); + transmute(r) +} + +/// Convert the signed 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst. +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsi64_ss&expand=1368) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundsi64_ss(a: __m128, b: i64) -> __m128 { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtsi2ss64(a, b, ROUNDING); + transmute(r) +} + +/// Convert the unsigned 64-bit integer b to a single-precision (32-bit) floating-point element, store the result in the lower element of dst, and copy the upper 3 packed elements from a to the upper elements of dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundu64_ss&expand=1380) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtusi2ss, ROUNDING = 8))] +#[rustc_legacy_const_generics(2)] +pub unsafe fn _mm_cvt_roundu64_ss(a: __m128, b: u64) -> __m128 { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtusi2ss64(a, b, ROUNDING); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_si64&expand=1360) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_si64(a: __m128d) -> i64 { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsd2si64(a, ROUNDING); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_i64&expand=1358) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_i64(a: __m128d) -> i64 { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsd2si64(a, ROUNDING); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundsd_u64&expand=1365) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundsd_u64(a: __m128d) -> u64 { + static_assert_rounding!(ROUNDING); + let a = a.as_f64x2(); + let r = vcvtsd2usi64(a, ROUNDING); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_si64&expand=1375) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_si64(a: __m128) -> i64 { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtss2si64(a, ROUNDING); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_i64&expand=1370) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_i64(a: __m128) -> i64 { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtss2si64(a, ROUNDING); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer, and store the result in dst.\ +/// Rounding is done according to the rounding\[3:0\] parameter, which can be one of:\ +/// (_MM_FROUND_TO_NEAREST_INT |_MM_FROUND_NO_EXC) // round to nearest, and suppress exceptions\ +/// (_MM_FROUND_TO_NEG_INF |_MM_FROUND_NO_EXC) // round down, and suppress exceptions\ +/// (_MM_FROUND_TO_POS_INF |_MM_FROUND_NO_EXC) // round up, and suppress exceptions\ +/// (_MM_FROUND_TO_ZERO |_MM_FROUND_NO_EXC) // truncate, and suppress exceptions\ +/// _MM_FROUND_CUR_DIRECTION // use MXCSR.RC; see _MM_SET_ROUNDING_MODE +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvt_roundss_u64&expand=1377) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi, ROUNDING = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvt_roundss_u64(a: __m128) -> u64 { + static_assert_rounding!(ROUNDING); + let a = a.as_f32x4(); + let r = vcvtss2usi64(a, ROUNDING); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_si64&expand=1931) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_si64(a: __m128d) -> i64 { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let r = vcvtsd2si64(a, SAE); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_i64&expand=1929) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_i64(a: __m128d) -> i64 { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let r = vcvtsd2si64(a, SAE); + transmute(r) +} + +/// Convert the lower double-precision (64-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundsd_u64&expand=1933) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtsd2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundsd_u64(a: __m128d) -> u64 { + static_assert_sae!(SAE); + let a = a.as_f64x2(); + let r = vcvtsd2usi64(a, SAE); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_i64&expand=1935) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_i64(a: __m128) -> i64 { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let r = vcvtss2si64(a, SAE); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to a 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_si64&expand=1937) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2si, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_si64(a: __m128) -> i64 { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let r = vcvtss2si64(a, SAE); + transmute(r) +} + +/// Convert the lower single-precision (32-bit) floating-point element in a to an unsigned 64-bit integer with truncation, and store the result in dst.\ +/// Exceptions can be suppressed by passing _MM_FROUND_NO_EXC in the sae parameter. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtt_roundss_u64&expand=1939) +#[inline] +#[target_feature(enable = "avx512f")] +#[cfg_attr(test, assert_instr(vcvtss2usi, SAE = 8))] +#[rustc_legacy_const_generics(1)] +pub unsafe fn _mm_cvtt_roundss_u64(a: __m128) -> u64 { + static_assert_sae!(SAE); + let a = a.as_f32x4(); + let r = vcvtss2usi64(a, SAE); + transmute(r) +} + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.avx512.vcvtss2si64"] + fn vcvtss2si64(a: f32x4, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.vcvtss2usi64"] + fn vcvtss2usi64(a: f32x4, rounding: i32) -> u64; + #[link_name = "llvm.x86.avx512.vcvtsd2si64"] + fn vcvtsd2si64(a: f64x2, rounding: i32) -> i64; + #[link_name = "llvm.x86.avx512.vcvtsd2usi64"] + fn vcvtsd2usi64(a: f64x2, rounding: i32) -> u64; + + #[link_name = "llvm.x86.avx512.cvtsi2ss64"] + fn vcvtsi2ss64(a: f32x4, b: i64, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.cvtsi2sd64"] + fn vcvtsi2sd64(a: f64x2, b: i64, rounding: i32) -> f64x2; + #[link_name = "llvm.x86.avx512.cvtusi642ss"] + fn vcvtusi2ss64(a: f32x4, b: u64, rounding: i32) -> f32x4; + #[link_name = "llvm.x86.avx512.cvtusi642sd"] + fn vcvtusi2sd64(a: f64x2, b: u64, rounding: i32) -> f64x2; +} + +#[cfg(test)] +mod tests { + + use stdarch_test::simd_test; + + use crate::core_arch::x86::*; + use crate::core_arch::x86_64::*; + use crate::hint::black_box; + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_epi64() { + let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_abs_epi64(a); + let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MAX.wrapping_add(1), 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_epi64() { + let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_mask_abs_epi64(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_abs_epi64(a, 0b11111111, a); + let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_abs_epi64() { + let a = _mm512_set_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let r = _mm512_maskz_abs_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_abs_epi64(0b11111111, a); + let e = _mm512_set_epi64(0, 1, 1, i64::MAX, i64::MIN, 100, 100, 32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_abs_epi64() { + let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100); + let r = _mm256_abs_epi64(a); + let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_abs_epi64() { + let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100); + let r = _mm256_mask_abs_epi64(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_abs_epi64(a, 0b00001111, a); + let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_abs_epi64() { + let a = _mm256_set_epi64x(i64::MAX, i64::MIN, 100, -100); + let r = _mm256_maskz_abs_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_abs_epi64(0b00001111, a); + let e = _mm256_set_epi64x(i64::MAX, i64::MAX.wrapping_add(1), 100, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_abs_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let r = _mm512_abs_pd(a); + let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MAX, 100., 100., 32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_abs_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let r = _mm512_mask_abs_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_abs_pd(a, 0b00001111, a); + let e = _mm512_setr_pd(0., 1., 1., f64::MAX, f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mov_epi64() { + let src = _mm512_set1_epi64(1); + let a = _mm512_set1_epi64(2); + let r = _mm512_mask_mov_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_mov_epi64(src, 0b11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mov_epi64() { + let a = _mm512_set1_epi64(2); + let r = _mm512_maskz_mov_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mov_epi64(0b11111111, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mov_epi64() { + let src = _mm256_set1_epi64x(1); + let a = _mm256_set1_epi64x(2); + let r = _mm256_mask_mov_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_mov_epi64(src, 0b00001111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mov_epi64() { + let a = _mm256_set1_epi64x(2); + let r = _mm256_maskz_mov_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mov_epi64(0b00001111, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mov_epi64() { + let src = _mm_set1_epi64x(1); + let a = _mm_set1_epi64x(2); + let r = _mm_mask_mov_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_mov_epi64(src, 0b00000011, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mov_epi64() { + let a = _mm_set1_epi64x(2); + let r = _mm_maskz_mov_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mov_epi64(0b00000011, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mov_pd() { + let src = _mm512_set1_pd(1.); + let a = _mm512_set1_pd(2.); + let r = _mm512_mask_mov_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_mov_pd(src, 0b11111111, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mov_pd() { + let a = _mm512_set1_pd(2.); + let r = _mm512_maskz_mov_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mov_pd(0b11111111, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mov_pd() { + let src = _mm256_set1_pd(1.); + let a = _mm256_set1_pd(2.); + let r = _mm256_mask_mov_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_mov_pd(src, 0b00001111, a); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mov_pd() { + let a = _mm256_set1_pd(2.); + let r = _mm256_maskz_mov_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_mov_pd(0b00001111, a); + assert_eq_m256d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mov_pd() { + let src = _mm_set1_pd(1.); + let a = _mm_set1_pd(2.); + let r = _mm_mask_mov_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_mov_pd(src, 0b00000011, a); + assert_eq_m128d(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mov_pd() { + let a = _mm_set1_pd(2.); + let r = _mm_maskz_mov_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_mov_pd(0b00000011, a); + assert_eq_m128d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_add_epi64(a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN + 1, 101, -99, -31); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_mask_add_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_add_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, i64::MIN, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_maskz_add_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_add_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 0, i64::MIN, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_add_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_mask_add_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_add_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_add_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_maskz_add_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_add_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(2, 0, i64::MIN, i64::MIN + 1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_add_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_mask_add_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_add_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_add_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_maskz_add_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_add_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(i64::MIN, i64::MIN + 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_add_pd(a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN + 1., 101., -99., -31.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_add_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_add_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_maskz_add_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_add_pd(0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 0., f64::MAX, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_add_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_mask_add_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_add_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_add_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_maskz_add_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_add_pd(0b00001111, a, b); + let e = _mm256_set_pd(2., 0., f64::MAX, f64::MIN + 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_add_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_mask_add_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_add_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::MAX, f64::MIN + 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_add_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_maskz_add_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_add_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::MAX, f64::MIN + 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_sub_epi64(a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MAX, 99, -101, -33); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_mask_sub_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_sub_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, i64::MIN, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_epi64() { + let a = _mm512_setr_epi64(0, 1, -1, i64::MAX, i64::MIN, 100, -100, -32); + let b = _mm512_set1_epi64(1); + let r = _mm512_maskz_sub_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sub_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(-1, 0, -2, i64::MAX - 1, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sub_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_mask_sub_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_sub_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sub_epi64() { + let a = _mm256_set_epi64x(1, -1, i64::MAX, i64::MIN); + let b = _mm256_set1_epi64x(1); + let r = _mm256_maskz_sub_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sub_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(0, -2, i64::MAX - 1, i64::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sub_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_mask_sub_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_sub_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sub_epi64() { + let a = _mm_set_epi64x(i64::MAX, i64::MIN); + let b = _mm_set1_epi64x(1); + let r = _mm_maskz_sub_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sub_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(i64::MAX - 1, i64::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_sub_pd(a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 99., -101., -33.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_sub_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_sub_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., f64::MIN, 100., -100., -32.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_pd() { + let a = _mm512_setr_pd(0., 1., -1., f64::MAX, f64::MIN, 100., -100., -32.); + let b = _mm512_set1_pd(1.); + let r = _mm512_maskz_sub_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sub_pd(0b00001111, a, b); + let e = _mm512_setr_pd(-1., 0., -2., f64::MAX - 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sub_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_mask_sub_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_sub_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sub_pd() { + let a = _mm256_set_pd(1., -1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(1.); + let r = _mm256_maskz_sub_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_sub_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., -2., f64::MAX - 1., f64::MIN); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sub_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_mask_sub_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_sub_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::MAX - 1., f64::MIN); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sub_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(1.); + let r = _mm_maskz_sub_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_sub_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::MAX - 1., f64::MIN); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mul_epi32(a, b); + let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_mul_epi32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mul_epi32(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, + 7, 5, 3, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_epi32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mul_epi32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mul_epi32(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_mask_mul_epi32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mul_epi32(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_epi32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_mul_epi32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mul_epi32(0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_mask_mul_epi32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mul_epi32(a, 0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_epi32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_mul_epi32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mul_epi32(0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_epu32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mul_epu32(a, b); + let e = _mm512_set_epi64(15, 13, 11, 9, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_epu32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_mask_mul_epu32(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mul_epu32(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, 1 | 1 << 32, + 7, 5, 3, 1, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_epu32() { + let a = _mm512_set1_epi32(1); + let b = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16); + let r = _mm512_maskz_mul_epu32(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_mul_epu32(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 7, 5, 3, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_epu32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_mask_mul_epu32(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_mul_epu32(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_epu32() { + let a = _mm256_set1_epi32(1); + let b = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm256_maskz_mul_epu32(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_mul_epu32(0b00001111, a, b); + let e = _mm256_set_epi64x(2, 4, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_epu32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_mask_mul_epu32(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_mul_epu32(a, 0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_epu32() { + let a = _mm_set1_epi32(1); + let b = _mm_set_epi32(1, 2, 3, 4); + let r = _mm_maskz_mul_epu32(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_mul_epu32(0b00000011, a, b); + let e = _mm_set_epi64x(2, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mullox_epi64() { + let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32); + let b = _mm512_set1_epi64(2); + let r = _mm512_mullox_epi64(a, b); + let e = _mm512_setr_epi64(0, 2, -2, 0, -2, 200, -200, -64); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mullox_epi64() { + let a = _mm512_setr_epi64(0, 1, i64::MAX, i64::MIN, i64::MAX, 100, -100, -32); + let b = _mm512_set1_epi64(2); + let r = _mm512_mask_mullox_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_mullox_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 2, -2, 0, i64::MAX, 100, -100, -32); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mul_pd(a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 2., f64::INFINITY, f64::NEG_INFINITY, + f64::INFINITY, f64::NEG_INFINITY, -200., -64., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mask_mul_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_mul_pd(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 2., f64::INFINITY, f64::NEG_INFINITY, + f64::MAX, f64::MIN, -100., -32., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_set1_pd(2.); + let r = _mm512_maskz_mul_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mul_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_mul_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(2.); + let r = _mm256_mask_mul_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_mul_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_mul_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set1_pd(2.); + let r = _mm256_maskz_mul_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_mul_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., 2., f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_mul_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(2.); + let r = _mm_mask_mul_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_mul_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_mul_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set1_pd(2.); + let r = _mm_maskz_mul_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_mul_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_div_pd(a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0.5, f64::INFINITY, f64::NEG_INFINITY, + f64::INFINITY, f64::NEG_INFINITY, -50., -16., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_mask_div_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_div_pd(a, 0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0.5, f64::INFINITY, f64::NEG_INFINITY, + f64::MAX, f64::MIN, -100., -32., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_pd() { + let a = _mm512_setr_pd(0., 1., f64::MAX, f64::MIN, f64::MAX, f64::MIN, -100., -32.); + let b = _mm512_setr_pd(2., 2., 0., 0., 0., 0., 2., 2.); + let r = _mm512_maskz_div_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_div_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_div_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set_pd(2., 2., 0., 0.); + let r = _mm256_mask_div_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_div_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_div_pd() { + let a = _mm256_set_pd(0., 1., f64::MAX, f64::MIN); + let b = _mm256_set_pd(2., 2., 0., 0.); + let r = _mm256_maskz_div_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_div_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., 0.5, f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_div_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set_pd(0., 0.); + let r = _mm_mask_div_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_div_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_div_pd() { + let a = _mm_set_pd(f64::MAX, f64::MIN); + let b = _mm_set_pd(0., 0.); + let r = _mm_maskz_div_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_div_pd(0b00000011, a, b); + let e = _mm_set_pd(f64::INFINITY, f64::NEG_INFINITY); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epi64(a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_max_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_max_epi64(a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_max_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_max_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_max_epi64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_max_epi64(a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epi64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_mask_max_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epi64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_maskz_max_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_max_pd(a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_max_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_max_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_max_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_max_pd(0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_mask_max_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_max_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(3., 2., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_maskz_max_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_max_pd(0b00001111, a, b); + let e = _mm256_set_pd(3., 2., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_pd() { + let a = _mm_set_pd(2., 3.); + let b = _mm_set_pd(3., 2.); + let r = _mm_mask_max_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_max_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(3., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_pd() { + let a = _mm_set_pd(2., 3.); + let b = _mm_set_pd(3., 2.); + let r = _mm_maskz_max_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_max_pd(0b00000011, a, b); + let e = _mm_set_pd(3., 3.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_max_epu64(a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_max_epu64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_max_epu64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_max_epu64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_max_epu64(0b00001111, a, b); + let e = _mm512_setr_epi64(7, 6, 5, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_max_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_max_epu64(a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_max_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_max_epu64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_max_epu64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_max_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_max_epu64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_max_epu64(0b00001111, a, b); + let e = _mm256_set_epi64x(3, 2, 2, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_max_epu64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_max_epu64(a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_max_epu64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_mask_max_epu64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_max_epu64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_max_epu64() { + let a = _mm_set_epi64x(2, 3); + let b = _mm_set_epi64x(3, 2); + let r = _mm_maskz_max_epu64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_max_epu64(0b00000011, a, b); + let e = _mm_set_epi64x(3, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epi64(a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epi64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epi64(0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_min_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_min_epi64(a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_min_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_min_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_min_pd(a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_min_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_min_pd(a, 0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_min_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_min_pd(0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_mask_min_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_min_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(0., 1., 1., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_maskz_min_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_min_pd(0b00001111, a, b); + let e = _mm256_set_pd(0., 1., 1., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(1., 0.); + let r = _mm_mask_min_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_min_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set_pd(1., 0.); + let r = _mm_maskz_min_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_min_pd(0b00000011, a, b); + let e = _mm_set_pd(0., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_min_epu64(a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 3, 2, 1, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_mask_min_epu64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_min_epu64(a, 0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_epu64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let b = _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0); + let r = _mm512_maskz_min_epu64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_min_epu64(0b00001111, a, b); + let e = _mm512_setr_epi64(0, 1, 2, 3, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_min_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_min_epu64(a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_min_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_min_epu64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_min_epu64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_min_epu64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_min_epu64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_min_epu64(0b00001111, a, b); + let e = _mm256_set_epi64x(0, 1, 1, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_min_epu64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(1, 0); + let r = _mm_min_epu64(a, b); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_min_epu64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(1, 0); + let r = _mm_mask_min_epu64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_min_epu64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_min_epu64() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(1, 0); + let r = _mm_maskz_min_epu64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_min_epu64(0b00000011, a, b); + let e = _mm_set_epi64x(0, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_sqrt_pd(a); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_mask_sqrt_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_sqrt_pd(a, 0b00001111, a); + let e = _mm512_setr_pd(0., 1., 2., 3., 16., 25., 36., 49.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_pd() { + let a = _mm512_setr_pd(0., 1., 4., 9., 16., 25., 36., 49.); + let r = _mm512_maskz_sqrt_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sqrt_pd(0b00001111, a); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sqrt_pd() { + let a = _mm256_set_pd(0., 1., 4., 9.); + let r = _mm256_mask_sqrt_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_sqrt_pd(a, 0b00001111, a); + let e = _mm256_set_pd(0., 1., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sqrt_pd() { + let a = _mm256_set_pd(0., 1., 4., 9.); + let r = _mm256_maskz_sqrt_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_sqrt_pd(0b00001111, a); + let e = _mm256_set_pd(0., 1., 2., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sqrt_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_mask_sqrt_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_sqrt_pd(a, 0b00000011, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sqrt_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_maskz_sqrt_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_sqrt_pd(0b00000011, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let r = _mm512_fmadd_pd(a, b, c); + let e = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let r = _mm512_mask_fmadd_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmadd_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(1., 2., 3., 4., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let r = _mm512_maskz_fmadd_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmadd_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmadd_pd() { + let a = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmadd_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmadd_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(1., 2., 3., 4., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmadd_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmadd_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmadd_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmadd_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmadd_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmadd_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(1., 2., 3., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmadd_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmadd_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmadd_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmadd_pd(0b00000011, a, b, c); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmadd_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmadd_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmsub_pd(a, b, c); + let e = _mm512_setr_pd(-1., 0., 1., 2., 3., 4., 5., 6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmsub_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsub_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(-1., 0., 1., 2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmsub_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsub_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(-1., 0., 1., 2., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmsub_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsub_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(-1., 0., 1., 2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmsub_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmsub_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(-1., 0., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmsub_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmsub_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(-1., 0., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmsub_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmsub_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(-1., 0., 1., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmsub_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmsub_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(-1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmsub_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmsub_pd(0b00000011, a, b, c); + let e = _mm_set_pd(-1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmsub_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmsub_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(-1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmaddsub_pd(a, b, c); + let e = _mm512_setr_pd(-1., 2., 1., 4., 3., 6., 5., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmaddsub_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmaddsub_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(-1., 2., 1., 4., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmaddsub_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmaddsub_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(-1., 2., 1., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmaddsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmaddsub_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmaddsub_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(-1., 2., 1., 4., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmaddsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmaddsub_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmaddsub_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(1., 0., 3., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmaddsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmaddsub_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmaddsub_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(1., 0., 3., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmaddsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmaddsub_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmaddsub_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(1., 0., 3., 2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmaddsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmaddsub_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmaddsub_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmaddsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmaddsub_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmaddsub_pd(0b00000011, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmaddsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmaddsub_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmaddsub_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmsubadd_pd(a, b, c); + let e = _mm512_setr_pd(1., 0., 3., 2., 5., 4., 7., 6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmsubadd_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsubadd_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(1., 0., 3., 2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmsubadd_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsubadd_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(1., 0., 3., 2., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsubadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fmsubadd_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsubadd_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(1., 0., 3., 2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fmsubadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fmsubadd_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fmsubadd_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(-1., 2., 1., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fmsubadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fmsubadd_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fmsubadd_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(-1., 2., 1., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fmsubadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fmsubadd_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fmsubadd_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(-1., 2., 1., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fmsubadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fmsubadd_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fmsubadd_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(-1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fmsubadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fmsubadd_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fmsubadd_pd(0b00000011, a, b, c); + let e = _mm_set_pd(-1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fmsubadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fmsubadd_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fmsubadd_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(-1., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fnmadd_pd(a, b, c); + let e = _mm512_setr_pd(1., 0., -1., -2., -3., -4., -5., -6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fnmadd_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmadd_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(1., 0., -1., -2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fnmadd_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmadd_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(1., 0., -1., -2., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmadd_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fnmadd_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmadd_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(1., 0., -1., -2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fnmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fnmadd_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fnmadd_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(1., 0., -1., -2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fnmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fnmadd_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fnmadd_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(1., 0., -1., -2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fnmadd_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fnmadd_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fnmadd_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(1., 0., -1., -2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fnmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fnmadd_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmadd_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fnmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fnmadd_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fnmadd_pd(0b00000011, a, b, c); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fnmadd_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fnmadd_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmadd_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(1., 0.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fnmsub_pd(a, b, c); + let e = _mm512_setr_pd(-1., -2., -3., -4., -5., -6., -7., -8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fnmsub_pd(a, 0, b, c); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmsub_pd(a, 0b00001111, b, c); + let e = _mm512_setr_pd(-1., -2., -3., -4., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fnmsub_pd(0, a, b, c); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmsub_pd(0b00001111, a, b, c); + let e = _mm512_setr_pd(-1., -2., -3., -4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmsub_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let c = _mm512_setr_pd(1., 1., 1., 1., 2., 2., 2., 2.); + let r = _mm512_mask3_fnmsub_pd(a, b, c, 0); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmsub_pd(a, b, c, 0b00001111); + let e = _mm512_setr_pd(-1., -2., -3., -4., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fnmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask_fnmsub_pd(a, 0, b, c); + assert_eq_m256d(r, a); + let r = _mm256_mask_fnmsub_pd(a, 0b00001111, b, c); + let e = _mm256_set_pd(-1., -2., -3., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fnmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_maskz_fnmsub_pd(0, a, b, c); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_fnmsub_pd(0b00001111, a, b, c); + let e = _mm256_set_pd(-1., -2., -3., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask3_fnmsub_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set_pd(0., 1., 2., 3.); + let c = _mm256_set1_pd(1.); + let r = _mm256_mask3_fnmsub_pd(a, b, c, 0); + assert_eq_m256d(r, c); + let r = _mm256_mask3_fnmsub_pd(a, b, c, 0b00001111); + let e = _mm256_set_pd(-1., -2., -3., -4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fnmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask_fnmsub_pd(a, 0, b, c); + assert_eq_m128d(r, a); + let r = _mm_mask_fnmsub_pd(a, 0b00000011, b, c); + let e = _mm_set_pd(-1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fnmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_maskz_fnmsub_pd(0, a, b, c); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_fnmsub_pd(0b00000011, a, b, c); + let e = _mm_set_pd(-1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask3_fnmsub_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set_pd(0., 1.); + let c = _mm_set1_pd(1.); + let r = _mm_mask3_fnmsub_pd(a, b, c, 0); + assert_eq_m128d(r, c); + let r = _mm_mask3_fnmsub_pd(a, b, c, 0b00000011); + let e = _mm_set_pd(-1., -2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rcp14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_rcp14_pd(a); + let e = _mm512_set1_pd(0.3333320617675781); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rcp14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_rcp14_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_rcp14_pd(a, 0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 3., 3., 3., 3., + 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rcp14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_rcp14_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_rcp14_pd(0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0., 0., 0., + 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, 0.3333320617675781, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rcp14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_rcp14_pd(a); + let e = _mm256_set1_pd(0.3333320617675781); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rcp14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_mask_rcp14_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_rcp14_pd(a, 0b00001111, a); + let e = _mm256_set1_pd(0.3333320617675781); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rcp14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_maskz_rcp14_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_rcp14_pd(0b00001111, a); + let e = _mm256_set1_pd(0.3333320617675781); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rcp14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_rcp14_pd(a); + let e = _mm_set1_pd(0.3333320617675781); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rcp14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_mask_rcp14_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_rcp14_pd(a, 0b00000011, a); + let e = _mm_set1_pd(0.3333320617675781); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rcp14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_maskz_rcp14_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_rcp14_pd(0b00000011, a); + let e = _mm_set1_pd(0.3333320617675781); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rsqrt14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_rsqrt14_pd(a); + let e = _mm512_set1_pd(0.5773391723632813); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rsqrt14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_rsqrt14_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_rsqrt14_pd(a, 0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 3., 3., 3., 3., + 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rsqrt14_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_rsqrt14_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_rsqrt14_pd(0b11110000, a); + #[rustfmt::skip] + let e = _mm512_setr_pd( + 0., 0., 0., 0., + 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, 0.5773391723632813, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rsqrt14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_mask_rsqrt14_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_rsqrt14_pd(a, 0b00001111, a); + let e = _mm256_set1_pd(0.5773391723632813); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rsqrt14_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_maskz_rsqrt14_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_rsqrt14_pd(0b00001111, a); + let e = _mm256_set1_pd(0.5773391723632813); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rsqrt14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_mask_rsqrt14_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_rsqrt14_pd(a, 0b00000011, a); + let e = _mm_set1_pd(0.5773391723632813); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rsqrt14_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_maskz_rsqrt14_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_rsqrt14_pd(0b00000011, a); + let e = _mm_set1_pd(0.5773391723632813); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getexp_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_getexp_pd(a); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getexp_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_getexp_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getexp_pd(a, 0b11110000, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getexp_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_getexp_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getexp_pd(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_getexp_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_getexp_pd(a); + let e = _mm256_set1_pd(1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_getexp_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_mask_getexp_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_getexp_pd(a, 0b00001111, a); + let e = _mm256_set1_pd(1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_getexp_pd() { + let a = _mm256_set1_pd(3.); + let r = _mm256_maskz_getexp_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_getexp_pd(0b00001111, a); + let e = _mm256_set1_pd(1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_getexp_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_getexp_pd(a); + let e = _mm_set1_pd(1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_getexp_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_mask_getexp_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_getexp_pd(a, 0b00000011, a); + let e = _mm_set1_pd(1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_getexp_pd() { + let a = _mm_set1_pd(3.); + let r = _mm_maskz_getexp_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_getexp_pd(0b00000011, a); + let e = _mm_set1_pd(1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_roundscale_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_roundscale_pd::<0b00_00_00_00>(a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_roundscale_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + let e = _mm512_set1_pd(1.1); + assert_eq_m512d(r, e); + let r = _mm512_mask_roundscale_pd::<0b00_00_00_00>(a, 0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_roundscale_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_roundscale_pd::<0b00_00_00_00>(0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_roundscale_pd() { + let a = _mm256_set1_pd(1.1); + let r = _mm256_roundscale_pd::<0b00_00_00_00>(a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_roundscale_pd() { + let a = _mm256_set1_pd(1.1); + let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00001111, a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_roundscale_pd() { + let a = _mm256_set1_pd(1.1); + let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_roundscale_pd::<0b00_00_00_00>(0b00001111, a); + let e = _mm256_set1_pd(1.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_roundscale_pd() { + let a = _mm_set1_pd(1.1); + let r = _mm_roundscale_pd::<0b00_00_00_00>(a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_roundscale_pd() { + let a = _mm_set1_pd(1.1); + let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0, a); + let e = _mm_set1_pd(1.1); + assert_eq_m128d(r, e); + let r = _mm_mask_roundscale_pd::<0b00_00_00_00>(a, 0b00000011, a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_roundscale_pd() { + let a = _mm_set1_pd(1.1); + let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_roundscale_pd::<0b00_00_00_00>(0b00000011, a); + let e = _mm_set1_pd(1.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_scalef_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_scalef_pd(a, b); + let e = _mm512_set1_pd(8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_scalef_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_mask_scalef_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_scalef_pd(a, 0b11110000, a, b); + let e = _mm512_set_pd(8., 8., 8., 8., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_scalef_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_maskz_scalef_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_scalef_pd(0b11110000, a, b); + let e = _mm512_set_pd(8., 8., 8., 8., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_scalef_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(3.); + let r = _mm256_scalef_pd(a, b); + let e = _mm256_set1_pd(8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_scalef_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(3.); + let r = _mm256_mask_scalef_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_scalef_pd(a, 0b00001111, a, b); + let e = _mm256_set1_pd(8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_scalef_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(3.); + let r = _mm256_maskz_scalef_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_scalef_pd(0b00001111, a, b); + let e = _mm256_set1_pd(8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_scalef_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_scalef_pd(a, b); + let e = _mm_set1_pd(8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_scalef_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_mask_scalef_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_scalef_pd(a, 0b00000011, a, b); + let e = _mm_set1_pd(8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_scalef_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(3.); + let r = _mm_maskz_scalef_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_scalef_pd(0b00000011, a, b); + let e = _mm_set1_pd(8.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fixupimm_pd() { + let a = _mm512_set1_pd(f64::NAN); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_fixupimm_pd::<5>(a, b, c); + let e = _mm512_set1_pd(0.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fixupimm_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_mask_fixupimm_pd::<5>(a, 0b11110000, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fixupimm_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_maskz_fixupimm_pd::<5>(0b11110000, a, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_fixupimm_pd() { + let a = _mm256_set1_pd(f64::NAN); + let b = _mm256_set1_pd(f64::MAX); + let c = _mm256_set1_epi64x(i32::MAX as i64); + let r = _mm256_fixupimm_pd::<5>(a, b, c); + let e = _mm256_set1_pd(0.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_fixupimm_pd() { + let a = _mm256_set1_pd(f64::NAN); + let b = _mm256_set1_pd(f64::MAX); + let c = _mm256_set1_epi64x(i32::MAX as i64); + let r = _mm256_mask_fixupimm_pd::<5>(a, 0b00001111, b, c); + let e = _mm256_set1_pd(0.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_fixupimm_pd() { + let a = _mm256_set1_pd(f64::NAN); + let b = _mm256_set1_pd(f64::MAX); + let c = _mm256_set1_epi64x(i32::MAX as i64); + let r = _mm256_maskz_fixupimm_pd::<5>(0b00001111, a, b, c); + let e = _mm256_set1_pd(0.0); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_fixupimm_pd() { + let a = _mm_set1_pd(f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_fixupimm_pd::<5>(a, b, c); + let e = _mm_set1_pd(0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_fixupimm_pd() { + let a = _mm_set1_pd(f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_mask_fixupimm_pd::<5>(a, 0b00000011, b, c); + let e = _mm_set1_pd(0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_fixupimm_pd() { + let a = _mm_set1_pd(f64::NAN); + let b = _mm_set1_pd(f64::MAX); + let c = _mm_set1_epi64x(i32::MAX as i64); + let r = _mm_maskz_fixupimm_pd::<5>(0b00000011, a, b, c); + let e = _mm_set1_pd(0.0); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ternarylogic_epi64() { + let a = _mm512_set1_epi64(1 << 2); + let b = _mm512_set1_epi64(1 << 1); + let c = _mm512_set1_epi64(1 << 0); + let r = _mm512_ternarylogic_epi64::<8>(a, b, c); + let e = _mm512_set1_epi64(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ternarylogic_epi64() { + let src = _mm512_set1_epi64(1 << 2); + let a = _mm512_set1_epi64(1 << 1); + let b = _mm512_set1_epi64(1 << 0); + let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0, a, b); + assert_eq_m512i(r, src); + let r = _mm512_mask_ternarylogic_epi64::<8>(src, 0b11111111, a, b); + let e = _mm512_set1_epi64(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ternarylogic_epi64() { + let a = _mm512_set1_epi64(1 << 2); + let b = _mm512_set1_epi64(1 << 1); + let c = _mm512_set1_epi64(1 << 0); + let r = _mm512_maskz_ternarylogic_epi64::<8>(0, a, b, c); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_ternarylogic_epi64::<8>(0b11111111, a, b, c); + let e = _mm512_set1_epi64(0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ternarylogic_epi64() { + let a = _mm256_set1_epi64x(1 << 2); + let b = _mm256_set1_epi64x(1 << 1); + let c = _mm256_set1_epi64x(1 << 0); + let r = _mm256_ternarylogic_epi64::<8>(a, b, c); + let e = _mm256_set1_epi64x(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_ternarylogic_epi64() { + let src = _mm256_set1_epi64x(1 << 2); + let a = _mm256_set1_epi64x(1 << 1); + let b = _mm256_set1_epi64x(1 << 0); + let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0, a, b); + assert_eq_m256i(r, src); + let r = _mm256_mask_ternarylogic_epi64::<8>(src, 0b00001111, a, b); + let e = _mm256_set1_epi64x(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_ternarylogic_epi64() { + let a = _mm256_set1_epi64x(1 << 2); + let b = _mm256_set1_epi64x(1 << 1); + let c = _mm256_set1_epi64x(1 << 0); + let r = _mm256_maskz_ternarylogic_epi64::<9>(0, a, b, c); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_ternarylogic_epi64::<8>(0b00001111, a, b, c); + let e = _mm256_set1_epi64x(0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ternarylogic_epi64() { + let a = _mm_set1_epi64x(1 << 2); + let b = _mm_set1_epi64x(1 << 1); + let c = _mm_set1_epi64x(1 << 0); + let r = _mm_ternarylogic_epi64::<8>(a, b, c); + let e = _mm_set1_epi64x(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_ternarylogic_epi64() { + let src = _mm_set1_epi64x(1 << 2); + let a = _mm_set1_epi64x(1 << 1); + let b = _mm_set1_epi64x(1 << 0); + let r = _mm_mask_ternarylogic_epi64::<8>(src, 0, a, b); + assert_eq_m128i(r, src); + let r = _mm_mask_ternarylogic_epi64::<8>(src, 0b00000011, a, b); + let e = _mm_set1_epi64x(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_ternarylogic_epi64() { + let a = _mm_set1_epi64x(1 << 2); + let b = _mm_set1_epi64x(1 << 1); + let c = _mm_set1_epi64x(1 << 0); + let r = _mm_maskz_ternarylogic_epi64::<9>(0, a, b, c); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_ternarylogic_epi64::<8>(0b00000011, a, b, c); + let e = _mm_set1_epi64x(0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getmant_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a); + let e = _mm512_set1_pd(1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getmant_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b11110000, a); + let e = _mm512_setr_pd(10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getmant_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_getmant_pd() { + let a = _mm256_set1_pd(10.); + let r = _mm256_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a); + let e = _mm256_set1_pd(1.25); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_getmant_pd() { + let a = _mm256_set1_pd(10.); + let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00001111, a); + let e = _mm256_set1_pd(1.25); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_getmant_pd() { + let a = _mm256_set1_pd(10.); + let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00001111, a); + let e = _mm256_set1_pd(1.25); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_getmant_pd() { + let a = _mm_set1_pd(10.); + let r = _mm_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a); + let e = _mm_set1_pd(1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_getmant_pd() { + let a = _mm_set1_pd(10.); + let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(a, 0b00000011, a); + let e = _mm_set1_pd(1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_getmant_pd() { + let a = _mm_set1_pd(10.); + let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_getmant_pd::<_MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC>(0b00000011, a); + let e = _mm_set1_pd(1.25); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtps_pd(a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm512_set1_pd(0.); + let r = _mm512_mask_cvtps_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtps_pd(src, 0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtps_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvtps_pd(0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpslo_pd() { + let v2 = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100., + ); + let r = _mm512_cvtpslo_pd(v2); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpslo_pd() { + let v2 = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 100., 100., 100., 100., 100., 100., 100., 100., + ); + let src = _mm512_set1_pd(0.); + let r = _mm512_mask_cvtpslo_pd(src, 0, v2); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtpslo_pd(src, 0b00001111, v2); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_ps(a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_ps(0.); + let r = _mm512_mask_cvtpd_ps(src, 0, a); + assert_eq_m256(r, src); + let r = _mm512_mask_cvtpd_ps(src, 0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtpd_ps(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm512_maskz_cvtpd_ps(0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_ps() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_ps(0.); + let r = _mm256_mask_cvtpd_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm256_mask_cvtpd_ps(src, 0b00001111, a); + let e = _mm_set_ps(4., -5.5, 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_ps() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvtpd_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm256_maskz_cvtpd_ps(0b00001111, a); + let e = _mm_set_ps(4., -5.5, 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_ps() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_ps(0.); + let r = _mm_mask_cvtpd_ps(src, 0, a); + assert_eq_m128(r, src); + let r = _mm_mask_cvtpd_ps(src, 0b00000011, a); + let e = _mm_set_ps(0., 0., 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_ps() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtpd_ps(0, a); + assert_eq_m128(r, _mm_setzero_ps()); + let r = _mm_maskz_cvtpd_ps(0b00000011, a); + let e = _mm_set_ps(0., 0., 6., -7.5); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_epi32(a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtpd_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtpd_epi32(src, 0b11111111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtpd_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtpd_epi32(0b11111111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epi32() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtpd_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, -6, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epi32() { + let a = _mm256_set_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvtpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtpd_epi32(0b00001111, a); + let e = _mm_set_epi32(4, -6, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtpd_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvtpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtpd_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let r = _mm512_cvtpd_epu32(a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtpd_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtpd_epu32(src, 0b11111111, a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtpd_epu32() { + let a = _mm512_setr_pd(0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5); + let r = _mm512_maskz_cvtpd_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtpd_epu32(0b11111111, a); + let e = _mm256_setr_epi32(0, 2, 2, 4, 4, 6, 6, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_cvtpd_epu32(a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtpd_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_maskz_cvtpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtpd_epu32(0b00001111, a); + let e = _mm_set_epi32(4, 6, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_cvtpd_epu32(a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtpd_epu32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_maskz_cvtpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtpd_epu32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtpd_pslo() { + let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtpd_pslo(v2); + let e = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtpd_pslo() { + let v2 = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm512_set1_ps(0.); + let r = _mm512_mask_cvtpd_pslo(src, 0, v2); + assert_eq_m512(r, src); + let r = _mm512_mask_cvtpd_pslo(src, 0b00001111, v2); + let e = _mm512_setr_ps( + 0., -1.5, 2., -3.5, 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi8_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi8_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi8_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi8_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi8_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepi8_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi8_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepi8_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi8_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepi8_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi8_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu8_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu8_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu8_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu8_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu8_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu8_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu8_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu8_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu8_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu8_epi64() { + let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu8_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu8_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi16_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi16_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi16_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi16_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi16_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepi16_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi16_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepi16_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi16_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepi16_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi16_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu16_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu16_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu16_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu16_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu16_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm256_maskz_cvtepu16_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu16_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu16_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu16_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu16_epi64() { + let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm_maskz_cvtepu16_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu16_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepi32_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepi32_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepi32_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepi32_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepi32_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(8, 9, 10, 11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let r = _mm256_maskz_cvtepi32_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepi32_epi64(0b00001111, a); + let e = _mm256_set_epi64x(8, 9, 10, 11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let src = _mm_set1_epi64x(0); + let r = _mm_mask_cvtepi32_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi32_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(10, 11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_epi64() { + let a = _mm_set_epi32(8, 9, 10, 11); + let r = _mm_maskz_cvtepi32_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi32_epi64(0b00000011, a); + let e = _mm_set_epi64x(10, 11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32_epi64(a); + let e = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_epi64(-1); + let r = _mm512_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_cvtepu32_epi64(src, 0b00001111, a); + let e = _mm512_set_epi64(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu32_epi64() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu32_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_cvtepu32_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_epi64x(-1); + let r = _mm256_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_cvtepu32_epi64(src, 0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepu32_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_cvtepu32_epi64(0b00001111, a); + let e = _mm256_set_epi64x(12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_epi64x(-1); + let r = _mm_mask_cvtepu32_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepu32_epi64(src, 0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_epi64() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepu32_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepu32_epi64(0b00000011, a); + let e = _mm_set_epi64x(14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepi32_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepi32_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi32_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvtepi32_pd(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_pd(-1.); + let r = _mm256_mask_cvtepi32_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_cvtepi32_pd(src, 0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepi32_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_cvtepi32_pd(0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_pd(-1.); + let r = _mm_mask_cvtepi32_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_cvtepi32_pd(src, 0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepi32_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_cvtepi32_pd(0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepu32_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepu32_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepu32_pd() { + let a = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepu32_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvtepu32_pd(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_cvtepu32_pd(a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm256_set1_pd(-1.); + let r = _mm256_mask_cvtepu32_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_cvtepu32_pd(src, 0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm256_maskz_cvtepu32_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_cvtepu32_pd(0b00001111, a); + let e = _mm256_set_pd(12., 13., 14., 15.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_cvtepu32_pd(a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let src = _mm_set1_pd(-1.); + let r = _mm_mask_cvtepu32_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_cvtepu32_pd(src, 0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepu32_pd() { + let a = _mm_set_epi32(12, 13, 14, 15); + let r = _mm_maskz_cvtepu32_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_cvtepu32_pd(0b00000011, a); + let e = _mm_set_pd(14., 15.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi32lo_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepi32lo_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepi32lo_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepu32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepu32lo_pd(a); + let e = _mm512_set_pd(8., 9., 10., 11., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepu32lo_pd() { + let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm512_set1_pd(-1.); + let r = _mm512_mask_cvtepu32lo_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvtepu32lo_pd(src, 0b00001111, a); + let e = _mm512_set_pd(-1., -1., -1., -1., 12., 13., 14., 15.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi64_epi32() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi64_epi32(a); + let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_epi32() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm256_set1_epi32(-1); + let r = _mm512_mask_cvtepi64_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtepi64_epi32(src, 0b00001111, a); + let e = _mm256_set_epi32(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi64_epi32() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi64_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtepi64_epi32(0b00001111, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi64_epi32() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_cvtepi64_epi32(a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_epi32() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_epi32() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let r = _mm256_maskz_cvtepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(1, 2, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi64_epi32() { + let a = _mm_set_epi64x(3, 4); + let r = _mm_cvtepi64_epi32(a); + let e = _mm_set_epi32(0, 0, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_epi32() { + let a = _mm_set_epi64x(3, 4); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_epi32() { + let a = _mm_set_epi64x(3, 4); + let r = _mm_maskz_cvtepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi64_epi16() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi64_epi16(a); + let e = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_epi16() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set1_epi16(-1); + let r = _mm512_mask_cvtepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi64_epi16() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi64_epi16() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_cvtepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_epi16() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi64_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_epi16() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_maskz_cvtepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi64_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi64_epi16() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_cvtepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_epi16() { + let a = _mm_set_epi64x(14, 15); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi64_epi16(src, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_epi16() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_maskz_cvtepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi64_epi16(0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtepi64_epi8() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_cvtepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_epi8() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_cvtepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtepi64_epi8() { + let a = _mm512_set_epi64(8, 9, 10, 11, 12, 13, 14, 15); + let r = _mm512_maskz_cvtepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtepi64_epi8() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_cvtepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_epi8() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtepi64_epi8() { + let a = _mm256_set_epi64x(12, 13, 14, 15); + let r = _mm256_maskz_cvtepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 13, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtepi64_epi8() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_cvtepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_epi8() { + let a = _mm_set_epi64x(14, 15); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtepi64_epi8() { + let a = _mm_set_epi64x(14, 15); + let r = _mm_maskz_cvtepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 14, 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_cvtsepi64_epi32(a); + let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, i32::MIN, i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let src = _mm256_set1_epi32(-1); + let r = _mm512_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtsepi64_epi32(src, 0b00001111, a); + let e = _mm256_set_epi32(-1, -1, -1, -1, 4, 5, i32::MIN, i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_maskz_cvtsepi64_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtsepi64_epi32(0b00001111, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 4, 5, i32::MIN, i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi32(a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi32(-1); + let r = _mm256_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi32(a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtsepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi32() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MIN, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi16(-1); + let r = _mm512_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtsepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtsepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtsepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi16(src, 0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi16() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi16(0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MIN, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtsepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtsepi64_epi8(src, 0b00001111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + -1, -1, -1, -1, + 4, 5, i8::MIN, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtsepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MAX); + let r = _mm512_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtsepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtsepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtsepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, i64::MIN, i64::MAX); + let r = _mm256_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtsepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_cvtsepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtsepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtsepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtsepi64_epi8() { + let a = _mm_set_epi64x(i64::MIN, i64::MAX); + let r = _mm_maskz_cvtsepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtsepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MIN, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_cvtusepi64_epi32(a); + let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let src = _mm256_set1_epi32(-1); + let r = _mm512_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtusepi64_epi32(src, 0b00001111, a); + let e = _mm256_set_epi32(-1, -1, -1, -1, 4, 5, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi64_epi32() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_maskz_cvtusepi64_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtusepi64_epi32(0b00001111, a); + let e = _mm256_set_epi32(0, 0, 0, 0, 4, 5, -1, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi32(a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi32() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi32(a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvtusepi64_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi32() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let src = _mm_set1_epi16(-1); + let r = _mm512_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtusepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(-1, -1, -1, -1, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi64_epi16() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtusepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm256_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi16(src, 0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi16() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi16(0b00001111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi16(a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi16(0); + let r = _mm_mask_cvtusepi64_epi16(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi16(src, 0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi16() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi16(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi16(0b00000011, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 6, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtusepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let src = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, -1, -1, -1, -1); + let r = _mm512_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm512_mask_cvtusepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -1, -1, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtusepi64_epi8() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, i64::MIN, i64::MIN); + let r = _mm512_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm512_maskz_cvtusepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, -1, -1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm256_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvtusepi64_epi8(src, 0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvtusepi64_epi8() { + let a = _mm256_set_epi64x(4, 5, 6, i64::MAX); + let r = _mm256_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvtusepi64_epi8(0b00001111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_cvtusepi64_epi8(a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let src = _mm_set1_epi8(0); + let r = _mm_mask_cvtusepi64_epi8(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvtusepi64_epi8(src, 0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvtusepi64_epi8() { + let a = _mm_set_epi64x(6, i64::MAX); + let r = _mm_maskz_cvtusepi64_epi8(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvtusepi64_epi8(0b00000011, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, u8::MAX as i8); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtt_roundpd_epi32::<_MM_FROUND_NO_EXC>(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvtt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvtt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvtt_roundpd_epu32::<_MM_FROUND_NO_EXC>(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvttpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvttpd_epi32(a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvttpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvttpd_epi32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvttpd_epi32(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvttpd_epi32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvttpd_epi32(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -3, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epi32() { + let a = _mm256_setr_pd(4., -5.5, 6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvttpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvttpd_epi32(src, 0b00001111, a); + let e = _mm_setr_epi32(4, -5, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epi32() { + let a = _mm256_setr_pd(4., -5.5, 6., -7.5); + let r = _mm256_maskz_cvttpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvttpd_epi32(0b00001111, a); + let e = _mm_setr_epi32(4, -5, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttpd_epi32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttpd_epi32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epi32() { + let a = _mm_set_pd(6., -7.5); + let r = _mm_maskz_cvttpd_epi32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttpd_epi32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, -7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvttpd_epu32(a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvttpd_epu32(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvttpd_epu32(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvttpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvttpd_epu32(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvttpd_epu32(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_cvttpd_epu32(a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm256_mask_cvttpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm256_mask_cvttpd_epu32(src, 0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_cvttpd_epu32() { + let a = _mm256_set_pd(4., 5.5, 6., 7.5); + let r = _mm256_maskz_cvttpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm256_maskz_cvttpd_epu32(0b00001111, a); + let e = _mm_set_epi32(4, 5, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_cvttpd_epu32(a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let src = _mm_set1_epi32(0); + let r = _mm_mask_cvttpd_epu32(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_cvttpd_epu32(src, 0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_cvttpd_epu32() { + let a = _mm_set_pd(6., 7.5); + let r = _mm_maskz_cvttpd_epu32(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_cvttpd_epu32(0b00000011, a); + let e = _mm_set_epi32(0, 0, 6, 7); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_add_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(-1.); + let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + let r = _mm512_add_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_add_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(-1.); + let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_add_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(-1.); + let r = + _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_add_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sub_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(1.); + let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + let r = _mm512_sub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sub_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(1.); + let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd(8., 9.5, 10., 11.5, 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sub_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.000000000000000007); + let b = _mm512_set1_pd(1.); + let r = + _mm512_maskz_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd(0., 0., 0., 0., 11., 12.5, 13., -1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mul_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.); + let b = _mm512_set1_pd(0.1); + let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd( + 0.8, + 0.9500000000000001, + 1., + 1.1500000000000001, + 1.2000000000000002, + 1.35, + 1.4000000000000001, + 0., + ); + assert_eq_m512d(r, e); + let r = _mm512_mul_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_setr_pd(0.8, 0.95, 1.0, 1.15, 1.2, 1.3499999999999999, 1.4, 0.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_mul_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.); + let b = _mm512_set1_pd(0.1); + let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 8., + 9.5, + 10., + 11.5, + 1.2000000000000002, + 1.35, + 1.4000000000000001, + 0., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_mul_round_pd() { + let a = _mm512_setr_pd(8., 9.5, 10., 11.5, 12., 13.5, 14., 0.); + let b = _mm512_set1_pd(0.1); + let r = + _mm512_maskz_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_mul_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 0., + 0., + 0., + 0., + 1.2000000000000002, + 1.35, + 1.4000000000000001, + 0., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_div_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pd(0.3333333333333333); + assert_eq_m512d(r, e); + let r = _mm512_div_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pd(0.3333333333333333); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_div_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 1., + 1., + 1., + 1., + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_div_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = + _mm512_maskz_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_div_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_setr_pd( + 0., + 0., + 0., + 0., + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + 0.3333333333333333, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sqrt_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_pd(1.7320508075688772); + assert_eq_m512d(r, e); + let r = _mm512_sqrt_round_pd::<{ _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC }>(a); + let e = _mm512_set1_pd(1.7320508075688774); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sqrt_round_pd() { + let a = _mm512_set1_pd(3.); + let r = + _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, + ); + let e = _mm512_setr_pd( + 3., + 3., + 3., + 3., + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sqrt_round_pd() { + let a = _mm512_set1_pd(3.); + let r = + _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_sqrt_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, + ); + let e = _mm512_setr_pd( + 0., + 0., + 0., + 0., + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + 1.7320508075688772, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-1.); + assert_eq_m512d(r, e); + let r = _mm512_fmadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + -1., + -1., + -1., + -1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-1.); + assert_eq_m512d(r, e); + let r = _mm512_fmsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(-0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + -1., + -1., + -1., + -1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask3_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(-1., -1., -1., -1., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = + _mm512_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd(1., -1., 1., -1., 1., -1., 1., -1.); + assert_eq_m512d(r, e); + let r = _mm512_fmaddsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd( + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + 1., + -1., + 1., + -1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(1., -1., 1., -1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmaddsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmaddsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(1., -1., 1., -1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = + _mm512_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd(-1., 1., -1., 1., -1., 1., -1., 1.); + assert_eq_m512d(r, e); + let r = _mm512_fmsubadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_setr_pd( + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + -0.9999999999999999, + 1., + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + -1., + 1., + -1., + 1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(-1., 1., -1., 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fmsubadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fmsubadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(-1., 1., -1., 1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = + _mm512_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + let r = _mm512_fnmadd_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + 1., + 1., + 1., + 1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_maskz_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmadd_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(1.); + let r = _mm512_mask3_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmadd_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = + _mm512_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + let r = _mm512_fnmsub_round_pd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b, c); + let e = _mm512_set1_pd(0.9999999999999999); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, b, c, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b00001111, b, c, + ); + let e = _mm512_setr_pd( + 1., + 1., + 1., + 1., + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + 0.000000000000000007, + ); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_maskz_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, c, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b00001111, a, b, c, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask3_fnmsub_round_pd() { + let a = _mm512_set1_pd(0.000000000000000007); + let b = _mm512_set1_pd(1.); + let c = _mm512_set1_pd(-1.); + let r = _mm512_mask3_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0, + ); + assert_eq_m512d(r, c); + let r = _mm512_mask3_fnmsub_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, b, c, 0b00001111, + ); + let e = _mm512_setr_pd(1., 1., 1., 1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_max_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_max_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_max_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_max_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a, b); + let e = _mm512_setr_pd(7., 6., 5., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_min_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 3., 2., 1., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_min_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_min_round_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.); + let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_min_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a, b); + let e = _mm512_setr_pd(0., 1., 2., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getexp_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_set1_pd(1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getexp_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(a, 0b11110000, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getexp_round_pd() { + let a = _mm512_set1_pd(3.); + let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getexp_round_pd::<_MM_FROUND_CUR_DIRECTION>(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_roundscale_round_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_roundscale_round_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0, a); + let e = _mm512_set1_pd(1.1); + assert_eq_m512d(r, e); + let r = _mm512_mask_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(a, 0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_roundscale_round_pd() { + let a = _mm512_set1_pd(1.1); + let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_roundscale_round_pd::<0, _MM_FROUND_CUR_DIRECTION>(0b11111111, a); + let e = _mm512_set1_pd(1.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_scalef_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm512_set1_pd(8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_scalef_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0, a, b, + ); + assert_eq_m512d(r, a); + let r = _mm512_mask_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + a, 0b11110000, a, b, + ); + let e = _mm512_set_pd(8., 8., 8., 8., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_scalef_round_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(3.); + let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0, a, b, + ); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_scalef_round_pd::<{ _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC }>( + 0b11110000, a, b, + ); + let e = _mm512_set_pd(8., 8., 8., 8., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_fixupimm_round_pd() { + let a = _mm512_set1_pd(f64::NAN); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(a, b, c); + let e = _mm512_set1_pd(0.0); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_fixupimm_round_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_mask_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(a, 0b11110000, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_fixupimm_round_pd() { + let a = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, 1., 1., 1., 1.); + let b = _mm512_set1_pd(f64::MAX); + let c = _mm512_set1_epi64(i32::MAX as i64); + let r = _mm512_maskz_fixupimm_round_pd::<5, _MM_FROUND_CUR_DIRECTION>(0b11110000, a, b, c); + let e = _mm512_set_pd(0., 0., 0., 0., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_getmant_round_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a); + let e = _mm512_set1_pd(1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_getmant_round_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_mask_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(a, 0b11110000, a); + let e = _mm512_setr_pd(10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_getmant_round_pd() { + let a = _mm512_set1_pd(10.); + let r = _mm512_maskz_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_getmant_round_pd::< + _MM_MANT_NORM_1_2, + _MM_MANT_SIGN_SRC, + _MM_FROUND_CUR_DIRECTION, + >(0b11110000, a); + let e = _mm512_setr_pd(0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm512_set1_pd(0.); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundps_pd() { + let a = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_cvt_roundps_pd::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm512_setr_pd(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_ps(0.); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m256(r, src); + let r = _mm512_mask_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundpd_ps() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m256(r, _mm256_setzero_ps()); + let r = _mm512_maskz_cvt_roundpd_ps::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm256_setr_ps(0., -1.5, 2., -3.5, 0., 0., 0., 0.); + assert_eq_m256(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundpd_epi32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvt_roundpd_epi32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm256_setr_epi32(0, -2, 2, -4, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cvt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let src = _mm256_set1_epi32(0); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(src, 0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_cvt_roundpd_epu32() { + let a = _mm512_setr_pd(0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_cvt_roundpd_epu32::<_MM_FROUND_CUR_DIRECTION>(0b00001111, a); + let e = _mm256_setr_epi32(0, -1, 2, -1, 0, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setzero_pd() { + assert_eq_m512d(_mm512_setzero_pd(), _mm512_set1_pd(0.)); + } + + unsafe fn test_mm512_set1_epi64() { + let r = _mm512_set_epi64(2, 2, 2, 2, 2, 2, 2, 2); + assert_eq_m512i(r, _mm512_set1_epi64(2)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set1_pd() { + let expected = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.); + assert_eq_m512d(expected, _mm512_set1_pd(2.)); + } + + unsafe fn test_mm512_set4_epi64() { + let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1); + assert_eq_m512i(r, _mm512_set4_epi64(4, 3, 2, 1)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set4_pd() { + let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.); + assert_eq_m512d(r, _mm512_set4_pd(4., 3., 2., 1.)); + } + + unsafe fn test_mm512_setr4_epi64() { + let r = _mm512_set_epi64(4, 3, 2, 1, 4, 3, 2, 1); + assert_eq_m512i(r, _mm512_setr4_epi64(1, 2, 3, 4)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr4_pd() { + let r = _mm512_set_pd(4., 3., 2., 1., 4., 3., 2., 1.); + assert_eq_m512d(r, _mm512_setr4_pd(1., 2., 3., 4.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmplt_pd_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmplt_pd_mask(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnlt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + assert_eq!(_mm512_cmpnlt_pd_mask(a, b), !_mm512_cmplt_pd_mask(a, b)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnlt_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmpnlt_pd_mask(mask, a, b), 0b01111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + assert_eq!(_mm512_cmple_pd_mask(a, b), 0b00100101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmple_pd_mask(mask, a, b), 0b00100000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpnle_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmpnle_pd_mask(b, a); + assert_eq!(m, 0b00001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpnle_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., f64::MAX, f64::NAN, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmpnle_pd_mask(mask, b, a); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let m = _mm512_cmpeq_pd_mask(b, a); + assert_eq!(m, 0b11001101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_pd_mask(mask, b, a); + assert_eq!(r, 0b01001000); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let m = _mm512_cmpneq_pd_mask(b, a); + assert_eq!(m, 0b00110010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, f64::NAN, -100.); + let b = _mm512_set_pd(0., 1., 13., 42., f64::MAX, f64::MIN, f64::NAN, -100.); + let mask = 0b01111010; + let r = _mm512_mask_cmpneq_pd_mask(mask, b, a); + assert_eq!(r, 0b00110010) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmp_pd_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_pd_mask() { + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_pd_mask() { + let a = _mm256_set_pd(0., 1., -1., 13.); + let b = _mm256_set1_pd(1.); + let m = _mm256_cmp_pd_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_pd_mask() { + let a = _mm256_set_pd(0., 1., -1., 13.); + let b = _mm256_set1_pd(1.); + let mask = 0b11111111; + let r = _mm256_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_pd_mask() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_pd(1.); + let m = _mm_cmp_pd_mask::<_CMP_LT_OQ>(a, b); + assert_eq!(m, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_pd_mask() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_pd(1.); + let mask = 0b11111111; + let r = _mm_mask_cmp_pd_mask::<_CMP_LT_OQ>(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_round_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let m = _mm512_cmp_round_pd_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_round_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(0., 1., -1., 13., f64::MAX, f64::MIN, 100., -100.); + let b = _mm512_set1_pd(-1.); + let mask = 0b01100110; + let r = _mm512_mask_cmp_round_pd_mask::<_CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let m = _mm512_cmpord_pd_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let mask = 0b11000011; + let m = _mm512_mask_cmpord_pd_mask(mask, a, b); + assert_eq!(m, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpunord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let m = _mm512_cmpunord_pd_mask(a, b); + + assert_eq!(m, 0b11111010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpunord_pd_mask() { + #[rustfmt::skip] + let a = _mm512_set_pd(f64::NAN, f64::MAX, f64::NAN, f64::MIN, f64::NAN, -1., f64::NAN, 0.); + #[rustfmt::skip] + let b = _mm512_set_pd(f64::NAN, f64::NAN, f64::NAN, f64::NAN, f64::MIN, f64::MAX, -1., 0.); + let mask = 0b00001111; + let m = _mm512_mask_cmpunord_pd_mask(mask, a, b); + assert_eq!(m, 0b000001010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epu64_mask(a, b); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmplt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 100); + let b = _mm256_set1_epi64x(2); + let r = _mm256_cmplt_epu64_mask(a, b); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 100); + let b = _mm256_set1_epi64x(2); + let mask = 0b11111111; + let r = _mm256_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmplt_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(2); + let r = _mm_cmplt_epu64_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmplt_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(2); + let mask = 0b11111111; + let r = _mm_mask_cmplt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmpgt_epu64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmpgt_epu64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpgt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set1_epi64x(1); + let r = _mm256_cmpgt_epu64_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmpgt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpgt_epu64_mask() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set1_epi64x(1); + let r = _mm_cmpgt_epu64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epu64_mask() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmpgt_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmple_epu64_mask(a, b), + !_mm512_cmpgt_epu64_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmple_epu64_mask(mask, a, b), 0b01111010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmple_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 1); + let b = _mm256_set1_epi64x(1); + let r = _mm256_cmple_epu64_mask(a, b); + assert_eq!(r, 0b00001101) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmple_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, 1); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmple_epu64_mask(mask, a, b); + assert_eq!(r, 0b00001101) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmple_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let r = _mm_cmple_epu64_mask(a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmple_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmple_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmpge_epu64_mask(a, b), + !_mm512_cmplt_epu64_mask(a, b) + ); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b11111111; + let r = _mm512_mask_cmpge_epu64_mask(mask, a, b); + assert_eq!(r, 0b00110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpge_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64); + let b = _mm256_set1_epi64x(1); + let r = _mm256_cmpge_epu64_mask(a, b); + assert_eq!(r, 0b00000111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, 2, u64::MAX as i64); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmpge_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpge_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let r = _mm_cmpge_epu64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpge_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmpge_epu64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpeq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let m = _mm256_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpeq_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let m = _mm_cmpeq_epu64_mask(b, a); + assert_eq!(m, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let mask = 0b11111111; + let r = _mm_mask_cmpeq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpneq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpneq_epu64_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epu64_mask(b, a)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, -100, 100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpneq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00110010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpneq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let r = _mm256_cmpneq_epu64_mask(b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, u64::MAX as i64); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpneq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpneq_epu64_mask() { + let a = _mm_set_epi64x(-1, u64::MAX as i64); + let b = _mm_set_epi64x(13, 42); + let r = _mm_cmpneq_epu64_mask(b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epu64_mask() { + let a = _mm_set_epi64x(-1, u64::MAX as i64); + let b = _mm_set_epi64x(13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpneq_epu64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epu64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + let r = _mm512_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 100); + let b = _mm256_set1_epi64x(1); + let m = _mm256_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00001000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_epu64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 100); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00001000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let m = _mm_cmp_epu64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_epu64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmp_epu64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmplt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmplt_epi64_mask(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmplt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmplt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, -13); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmplt_epi64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmplt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, -13); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmplt_epi64_mask() { + let a = _mm_set_epi64x(-1, -13); + let b = _mm_set1_epi64x(-1); + let r = _mm_cmplt_epi64_mask(a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmplt_epi64_mask() { + let a = _mm_set_epi64x(-1, -13); + let b = _mm_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm_mask_cmplt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000001); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpgt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmpgt_epi64_mask(b, a); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpgt_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmpgt_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpgt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmpgt_epi64_mask(a, b); + assert_eq!(r, 0b00001101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpgt_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmpgt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00001101); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpgt_epi64_mask() { + let a = _mm_set_epi64x(0, -1); + let b = _mm_set1_epi64x(-1); + let r = _mm_cmpgt_epi64_mask(a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpgt_epi64_mask() { + let a = _mm_set_epi64x(0, -1); + let b = _mm_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm_mask_cmpgt_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmple_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmple_epi64_mask(a, b), + !_mm512_cmpgt_epi64_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmple_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01111010; + assert_eq!(_mm512_mask_cmple_epi64_mask(mask, a, b), 0b00110000); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmple_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmple_epi64_mask(a, b); + assert_eq!(r, 0b00000010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmple_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmple_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmple_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let r = _mm_cmple_epi64_mask(a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmple_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmple_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpge_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + assert_eq!( + _mm512_cmpge_epi64_mask(a, b), + !_mm512_cmplt_epi64_mask(a, b) + ) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpge_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, u64::MAX as i64, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b11111111; + let r = _mm512_mask_cmpge_epi64_mask(mask, a, b); + assert_eq!(r, 0b11111010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpge_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let r = _mm256_cmpge_epi64_mask(a, b); + assert_eq!(r, 0b00001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpge_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, i64::MAX); + let b = _mm256_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm256_mask_cmpge_epi64_mask(mask, a, b); + assert_eq!(r, 0b00001111); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpge_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(-1); + let r = _mm_cmpge_epi64_mask(a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpge_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(-1); + let mask = 0b11111111; + let r = _mm_mask_cmpge_epi64_mask(mask, a, b); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmpeq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b11001111); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpeq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b01001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpeq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let m = _mm256_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpeq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00001100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpeq_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let m = _mm_cmpeq_epi64_mask(b, a); + assert_eq!(m, 0b00000011); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpeq_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set_epi64x(0, 1); + let mask = 0b11111111; + let r = _mm_mask_cmpeq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000011); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_set_epi64() { + let r = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, _mm512_set_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_setr_epi64() { + let r = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + assert_eq_m512i(r, _mm512_setr_epi64(7, 6, 5, 4, 3, 2, 1, 0)) + } + + unsafe fn test_mm512_cmpneq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let m = _mm512_cmpneq_epi64_mask(b, a); + assert_eq!(m, !_mm512_cmpeq_epi64_mask(b, a)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmpneq_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, -100, 100); + let b = _mm512_set_epi64(0, 1, 13, 42, i64::MAX, i64::MIN, 100, -100); + let mask = 0b01111010; + let r = _mm512_mask_cmpneq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00110010) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmpneq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let r = _mm256_cmpneq_epi64_mask(b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmpneq_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set_epi64x(0, 1, 13, 42); + let mask = 0b11111111; + let r = _mm256_mask_cmpneq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmpneq_epi64_mask() { + let a = _mm_set_epi64x(-1, 13); + let b = _mm_set_epi64x(13, 42); + let r = _mm_cmpneq_epi64_mask(b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmpneq_epi64_mask() { + let a = _mm_set_epi64x(-1, 13); + let b = _mm_set_epi64x(13, 42); + let mask = 0b11111111; + let r = _mm_mask_cmpneq_epi64_mask(mask, b, a); + assert_eq!(r, 0b00000011) + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_cmp_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let m = _mm512_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000101); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cmp_epi64_mask() { + let a = _mm512_set_epi64(0, 1, -1, 13, i64::MAX, i64::MIN, 100, -100); + let b = _mm512_set1_epi64(-1); + let mask = 0b01100110; + let r = _mm512_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000100); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_cmp_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(1); + let m = _mm256_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cmp_epi64_mask() { + let a = _mm256_set_epi64x(0, 1, -1, 13); + let b = _mm256_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm256_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00001010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_cmp_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let m = _mm_cmp_epi64_mask::<_MM_CMPINT_LT>(a, b); + assert_eq!(m, 0b00000010); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cmp_epi64_mask() { + let a = _mm_set_epi64x(0, 1); + let b = _mm_set1_epi64x(1); + let mask = 0b11111111; + let r = _mm_mask_cmp_epi64_mask::<_MM_CMPINT_LT>(mask, a, b); + assert_eq!(r, 0b00000010); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_pd() { + let mut arr = [0f64; 128]; + for i in 0..128 { + arr[i] = i as f64; + } + // A multiplier of 8 is word-addressing + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i32gather_pd::<8>(index, arr.as_ptr() as *const u8); + assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_pd() { + let mut arr = [0f64; 128]; + for i in 0..128 { + arr[i] = i as f64; + } + let src = _mm512_set1_pd(2.); + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i32gather_pd::<8>(src, mask, index, arr.as_ptr() as *const u8); + assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_pd() { + let mut arr = [0f64; 128]; + for i in 0..128 { + arr[i] = i as f64; + } + // A multiplier of 8 is word-addressing + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_pd::<8>(index, arr.as_ptr() as *const u8); + assert_eq_m512d(r, _mm512_setr_pd(0., 16., 32., 48., 64., 80., 96., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_pd() { + let mut arr = [0f64; 128]; + for i in 0..128 { + arr[i] = i as f64; + } + let src = _mm512_set1_pd(2.); + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i64gather_pd::<8>(src, mask, index, arr.as_ptr() as *const u8); + assert_eq_m512d(r, _mm512_setr_pd(2., 16., 2., 48., 2., 80., 2., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_ps() { + let mut arr = [0f32; 128]; + for i in 0..128 { + arr[i] = i as f32; + } + // A multiplier of 4 is word-addressing + #[rustfmt::skip] + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_ps::<4>(index, arr.as_ptr() as *const u8); + assert_eq_m256(r, _mm256_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_ps() { + let mut arr = [0f32; 128]; + for i in 0..128 { + arr[i] = i as f32; + } + let src = _mm256_set1_ps(2.); + let mask = 0b10101010; + #[rustfmt::skip] + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 4 is word-addressing + let r = _mm512_mask_i64gather_ps::<4>(src, mask, index, arr.as_ptr() as *const u8); + assert_eq_m256(r, _mm256_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + // A multiplier of 8 is word-addressing + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i32gather_epi64::<8>(index, arr.as_ptr() as *const u8); + assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + let src = _mm512_set1_epi64(2); + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i32gather_epi64::<8>(src, mask, index, arr.as_ptr() as *const u8); + assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + // A multiplier of 8 is word-addressing + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_epi64::<8>(index, arr.as_ptr() as *const u8); + assert_eq_m512i(r, _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_epi64() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + let src = _mm512_set1_epi64(2); + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i64gather_epi64::<8>(src, mask, index, arr.as_ptr() as *const u8); + assert_eq_m512i(r, _mm512_setr_epi64(2, 16, 2, 48, 2, 80, 2, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64gather_epi32() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + // A multiplier of 8 is word-addressing + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let r = _mm512_i64gather_epi32::<8>(index, arr.as_ptr() as *const u8); + assert_eq_m256i(r, _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64gather_epi32() { + let mut arr = [0i64; 128]; + for i in 0..128i64 { + arr[i as usize] = i; + } + let src = _mm256_set1_epi32(2); + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + // A multiplier of 8 is word-addressing + let r = _mm512_mask_i64gather_epi32::<8>(src, mask, index, arr.as_ptr() as *const u8); + assert_eq_m256i(r, _mm256_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112)); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_pd() { + let mut arr = [0f64; 128]; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_i32scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, index, src); + let mut expected = [0f64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_pd() { + let mut arr = [0f64; 128]; + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_mask_i32scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + let mut expected = [0f64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2. * (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_pd() { + let mut arr = [0f64; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_i64scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, index, src); + let mut expected = [0f64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_pd() { + let mut arr = [0f64; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 8 is word-addressing + _mm512_mask_i64scatter_pd::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + let mut expected = [0f64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2. * (i + 1) as f64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_ps() { + let mut arr = [0f32; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 4 is word-addressing + _mm512_i64scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, index, src); + let mut expected = [0f32; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_ps() { + let mut arr = [0f32; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_ps(1., 2., 3., 4., 5., 6., 7., 8.); + // A multiplier of 4 is word-addressing + _mm512_mask_i64scatter_ps::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src); + let mut expected = [0f32; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2. * (i + 1) as f32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i32scatter_epi64() { + let mut arr = [0i64; 128]; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src); + let mut expected = [0i64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i32scatter_epi64() { + let mut arr = [0i64; 128]; + let mask = 0b10101010; + let index = _mm256_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_mask_i32scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + let mut expected = [0i64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2 * (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_epi64() { + let mut arr = [0i64; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_i64scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, index, src); + let mut expected = [0i64; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_epi64() { + let mut arr = [0i64; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 8 is word-addressing + _mm512_mask_i64scatter_epi64::<8>(arr.as_mut_ptr() as *mut u8, mask, index, src); + let mut expected = [0i64; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2 * (i + 1) as i64; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_i64scatter_epi32() { + let mut arr = [0i32; 128]; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 4 is word-addressing + _mm512_i64scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, index, src); + let mut expected = [0i32; 128]; + for i in 0..8 { + expected[i * 16] = (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_i64scatter_epi32() { + let mut arr = [0i32; 128]; + let mask = 0b10101010; + let index = _mm512_setr_epi64(0, 16, 32, 48, 64, 80, 96, 112); + let src = _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8); + // A multiplier of 4 is word-addressing + _mm512_mask_i64scatter_epi32::<4>(arr.as_mut_ptr() as *mut u8, mask, index, src); + let mut expected = [0i32; 128]; + for i in 0..4 { + expected[i * 32 + 16] = 2 * (i + 1) as i32; + } + assert_eq!(&arr[..], &expected[..],); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rol_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_rol_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rol_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_rol_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_rol_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rol_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let r = _mm512_maskz_rol_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rol_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rol_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_rol_epi64::<1>(a); + let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rol_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_mask_rol_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_rol_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rol_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_maskz_rol_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rol_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rol_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_rol_epi64::<1>(a); + let e = _mm_set_epi64x(1 << 0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rol_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_mask_rol_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_rol_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rol_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_maskz_rol_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rol_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_ror_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_ror_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 63, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_ror_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_ror_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_ror_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 63, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_ror_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let r = _mm512_maskz_ror_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_ror_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 1 << 63); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_ror_epi64() { + let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_ror_epi64::<1>(a); + let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_ror_epi64() { + let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_mask_ror_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_ror_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_ror_epi64() { + let a = _mm256_set_epi64x(1 << 0, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_maskz_ror_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_ror_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 63, 1 << 31, 1 << 31, 1 << 31); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_ror_epi64() { + let a = _mm_set_epi64x(1 << 0, 1 << 32); + let r = _mm_ror_epi64::<1>(a); + let e = _mm_set_epi64x(1 << 63, 1 << 31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_ror_epi64() { + let a = _mm_set_epi64x(1 << 0, 1 << 32); + let r = _mm_mask_ror_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_ror_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 63, 1 << 31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_ror_epi64() { + let a = _mm_set_epi64x(1 << 0, 1 << 32); + let r = _mm_maskz_ror_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_ror_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 63, 1 << 31); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_slli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_slli_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_slli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_slli_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_slli_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_slli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let r = _mm512_maskz_slli_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_slli_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_slli_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_mask_slli_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_slli_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_slli_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let r = _mm256_maskz_slli_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_slli_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_slli_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_mask_slli_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_slli_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_slli_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let r = _mm_maskz_slli_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_slli_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_srli_epi64::<1>(a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let r = _mm512_mask_srli_epi64::<1>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srli_epi64::<1>(a, 0b11111111, a); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srli_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let r = _mm512_maskz_srli_epi64::<1>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srli_epi64::<1>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srli_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_mask_srli_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srli_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srli_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_maskz_srli_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srli_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srli_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_mask_srli_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srli_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srli_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_maskz_srli_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srli_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rolv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_rolv_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 34, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rolv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_rolv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_rolv_epi64(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 34, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rolv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 62, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); + let r = _mm512_maskz_rolv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rolv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 1 << 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rolv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_rolv_epi64(a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rolv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_rolv_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_rolv_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rolv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_rolv_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rolv_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 34, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rolv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 63); + let b = _mm_set_epi64x(0, 1); + let r = _mm_rolv_epi64(a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rolv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 63); + let b = _mm_set_epi64x(0, 1); + let r = _mm_mask_rolv_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_rolv_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rolv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 63); + let b = _mm_set_epi64x(0, 1); + let r = _mm_maskz_rolv_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rolv_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_rorv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_rorv_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_rorv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_rorv_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_rorv_epi64(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_rorv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let b = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 2); + let r = _mm512_maskz_rorv_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_rorv_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 1 << 62); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_rorv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_rorv_epi64(a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_rorv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_rorv_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_rorv_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_rorv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 0, 1 << 32, 1 << 32); + let b = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_rorv_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_rorv_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(1 << 32, 1 << 63, 1 << 30, 1 << 29); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_rorv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 0); + let b = _mm_set_epi64x(0, 1); + let r = _mm_rorv_epi64(a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_rorv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 0); + let b = _mm_set_epi64x(0, 1); + let r = _mm_mask_rorv_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_rorv_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_rorv_epi64() { + let a = _mm_set_epi64x(1 << 32, 1 << 0); + let b = _mm_set_epi64x(0, 1); + let r = _mm_maskz_rorv_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_rorv_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(1 << 32, 1 << 63); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sllv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 63, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 2, 2, 3, 4, 5, 6, 7); + let r = _mm512_sllv_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 0, 1 << 34, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sllv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 63, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_sllv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sllv_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 1 << 33, 0, 1 << 35, + 1 << 36, 1 << 37, 1 << 38, 1 << 39, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sllv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 1); + let r = _mm512_maskz_sllv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sllv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 36, 1 << 37, 1 << 38, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sllv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32); + let count = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_sllv_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sllv_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 32, 1 << 33, 0, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sllv_epi64() { + let a = _mm256_set_epi64x(1 << 32, 1 << 32, 1 << 63, 1 << 32); + let count = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_sllv_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sllv_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 32, 1 << 33, 0, 1 << 35); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sllv_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(2, 3); + let r = _mm_mask_sllv_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sllv_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 35); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sllv_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(2, 3); + let r = _mm_maskz_sllv_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sllv_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 35); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srlv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_srlv_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 0, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srlv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 0, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_srlv_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srlv_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 32, 0, 1 << 30, 1 << 29, + 1 << 28, 1 << 27, 1 << 26, 1 << 25, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srlv_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let count = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_srlv_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srlv_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 28, 1 << 27, 1 << 26, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srlv_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_mask_srlv_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srlv_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srlv_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_maskz_srlv_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srlv_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srlv_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_mask_srlv_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srlv_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srlv_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_maskz_srlv_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srlv_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sll_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_sll_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + let count = _mm_set_epi64x(1, 0); + let r = _mm512_sll_epi64(a, count); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sll_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 63, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_sll_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sll_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 33, 1 << 33, 1 << 33, + 1 << 33, 1 << 33, 1 << 33, 1 << 33, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sll_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 63, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_sll_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sll_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 33, 1 << 33, 1 << 33, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sll_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_mask_sll_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sll_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sll_epi64() { + let a = _mm256_set_epi64x(1 << 63, 1 << 32, 1 << 32, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_maskz_sll_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sll_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(0, 1 << 33, 1 << 33, 1 << 33); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sll_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm_mask_sll_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sll_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sll_epi64() { + let a = _mm_set_epi64x(1 << 63, 1 << 32); + let count = _mm_set_epi64x(0, 1); + let r = _mm_maskz_sll_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sll_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(0, 1 << 33); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srl_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_srl_epi64(a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srl_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 0, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_mask_srl_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srl_epi64(a, 0b11111111, a, count); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 1 << 31, 1 << 31, 1 << 31, + 1 << 31, 1 << 31, 1 << 31, 1 << 31, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srl_epi64() { + #[rustfmt::skip] + let a = _mm512_set_epi64( + 1 << 32, 1 << 32, 1 << 32, 1 << 32, + 1 << 32, 1 << 32, 1 << 32, 1 << 0, + ); + let count = _mm_set_epi64x(0, 1); + let r = _mm512_maskz_srl_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srl_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 1 << 31, 1 << 31, 1 << 31, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srl_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_mask_srl_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srl_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srl_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_maskz_srl_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srl_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srl_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_mask_srl_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srl_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srl_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_maskz_srl_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srl_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_sra_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_mask_sra_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_sra_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_sra_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm_set_epi64x(0, 2); + let r = _mm512_maskz_sra_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_sra_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_sra_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_sra_epi64(a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_sra_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_mask_sra_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_sra_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_sra_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm256_maskz_sra_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_sra_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_sra_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_sra_epi64(a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_sra_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_mask_sra_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_sra_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_sra_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set_epi64x(0, 1); + let r = _mm_maskz_sra_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_sra_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_srav_epi64(a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_mask_srav_epi64(a, 0, a, count); + assert_eq_m512i(r, a); + let r = _mm512_mask_srav_epi64(a, 0b11111111, a, count); + let e = _mm512_set_epi64(0, -2, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srav_epi64() { + let a = _mm512_set_epi64(1, -8, 0, 0, 0, 0, 15, -16); + let count = _mm512_set_epi64(2, 2, 0, 0, 0, 0, 2, 1); + let r = _mm512_maskz_srav_epi64(0, a, count); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srav_epi64(0b00001111, a, count); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 3, -8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_srav_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_srav_epi64(a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srav_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_mask_srav_epi64(a, 0, a, count); + assert_eq_m256i(r, a); + let r = _mm256_mask_srav_epi64(a, 0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srav_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let count = _mm256_set1_epi64x(1); + let r = _mm256_maskz_srav_epi64(0, a, count); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srav_epi64(0b00001111, a, count); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_srav_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_srav_epi64(a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srav_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_mask_srav_epi64(a, 0, a, count); + assert_eq_m128i(r, a); + let r = _mm_mask_srav_epi64(a, 0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srav_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let count = _mm_set1_epi64x(1); + let r = _mm_maskz_srav_epi64(0, a, count); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srav_epi64(0b00000011, a, count); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_srai_epi64::<2>(a); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_mask_srai_epi64::<2>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_srai_epi64::<2>(a, 0b11111111, a); + let e = _mm512_set_epi64(0, -1, 3, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_srai_epi64() { + let a = _mm512_set_epi64(1, -4, 15, 0, 0, 0, 0, -16); + let r = _mm512_maskz_srai_epi64::<2>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_srai_epi64::<2>(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, -4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_srai_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_srai_epi64::<1>(a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_srai_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_mask_srai_epi64::<1>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_srai_epi64::<1>(a, 0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_srai_epi64() { + let a = _mm256_set_epi64x(1 << 5, 0, 0, 0); + let r = _mm256_maskz_srai_epi64::<1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_srai_epi64::<1>(0b00001111, a); + let e = _mm256_set_epi64x(1 << 4, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_srai_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_srai_epi64::<1>(a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_srai_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_mask_srai_epi64::<1>(a, 0, a); + assert_eq_m128i(r, a); + let r = _mm_mask_srai_epi64::<1>(a, 0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_srai_epi64() { + let a = _mm_set_epi64x(1 << 5, 0); + let r = _mm_maskz_srai_epi64::<1>(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_srai_epi64::<1>(0b00000011, a); + let e = _mm_set_epi64x(1 << 4, 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permute_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_permute_pd::<0b11_11_11_11>(a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permute_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_permute_pd::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permute_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permute_pd::<0b11_11_11_11>(0b11111111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permute_pd() { + let a = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_mask_permute_pd::<0b11_11>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_permute_pd::<0b11_11>(a, 0b00001111, a); + let e = _mm256_set_pd(3., 3., 1., 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permute_pd() { + let a = _mm256_set_pd(3., 2., 1., 0.); + let r = _mm256_maskz_permute_pd::<0b11_11>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permute_pd::<0b11_11>(0b00001111, a); + let e = _mm256_set_pd(3., 3., 1., 1.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permute_pd() { + let a = _mm_set_pd(1., 0.); + let r = _mm_mask_permute_pd::<0b11>(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_permute_pd::<0b11>(a, 0b00000011, a); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permute_pd() { + let a = _mm_set_pd(1., 0.); + let r = _mm_maskz_permute_pd::<0b11>(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_permute_pd::<0b11>(0b00000011, a); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_permutex_epi64::<0b11_11_11_11>(a); + let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex_epi64::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex_epi64() { + let a = _mm512_setr_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex_epi64::<0b11_11_11_11>(0b11111111, a); + let e = _mm512_setr_epi64(3, 3, 3, 3, 7, 7, 7, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex_epi64() { + let a = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_permutex_epi64::<0b11_11_11_11>(a); + let e = _mm256_set_epi64x(3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex_epi64() { + let a = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex_epi64::<0b11_11_11_11>(a, 0b00001111, a); + let e = _mm256_set_epi64x(3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm256_maskz_permutex_epi64() { + let a = _mm256_set_epi64x(3, 2, 1, 0); + let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex_epi64::<0b11_11_11_11>(0b00001111, a); + let e = _mm256_set_epi64x(3, 3, 3, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_permutex_pd::<0b11_11_11_11>(a); + let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutex_pd::<0b11_11_11_11>(a, 0b11111111, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex_pd() { + let a = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutex_pd::<0b11_11_11_11>(0b11111111, a); + let e = _mm512_setr_pd(3., 3., 3., 3., 7., 7., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_permutex_pd::<0b11_11_11_11>(a); + let e = _mm256_set_pd(0., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutex_pd::<0b11_11_11_11>(a, 0b00001111, a); + let e = _mm256_set_pd(0., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutex_pd::<0b11_11_11_11>(0b00001111, a); + let e = _mm256_set_pd(0., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutevar_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_set1_epi64(0b1); + let r = _mm512_permutevar_pd(a, b); + let e = _mm512_set_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutevar_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_set1_epi64(0b1); + let r = _mm512_mask_permutevar_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutevar_pd(a, 0b11111111, a, b); + let e = _mm512_set_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutevar_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let b = _mm512_set1_epi64(0b1); + let r = _mm512_maskz_permutevar_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutevar_pd(0b00001111, a, b); + let e = _mm512_set_pd(0., 0., 0., 0., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutevar_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set1_epi64x(0b1); + let r = _mm256_mask_permutevar_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutevar_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(1., 1., 3., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutevar_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let b = _mm256_set1_epi64x(0b1); + let r = _mm256_maskz_permutevar_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutevar_pd(0b00001111, a, b); + let e = _mm256_set_pd(1., 1., 3., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutevar_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_epi64x(0b1); + let r = _mm_mask_permutevar_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_permutevar_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutevar_pd() { + let a = _mm_set_pd(0., 1.); + let b = _mm_set1_epi64x(0b1); + let r = _mm_maskz_permutevar_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_permutevar_pd(0b00000011, a, b); + let e = _mm_set_pd(1., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_epi64() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_permutexvar_epi64(idx, a); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutexvar_epi64() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_permutexvar_epi64(a, 0, idx, a); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutexvar_epi64(a, 0b11111111, idx, a); + let e = _mm512_set1_epi64(6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutexvar_epi64() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_permutexvar_epi64(0, idx, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutexvar_epi64(0b00001111, idx, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 6, 6, 6, 6); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutexvar_epi64() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_permutexvar_epi64(idx, a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_epi64() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_permutexvar_epi64(a, 0, idx, a); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutexvar_epi64(a, 0b00001111, idx, a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_epi64() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_permutexvar_epi64(0, idx, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutexvar_epi64(0b00001111, idx, a); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutexvar_pd() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_permutexvar_pd(idx, a); + let e = _mm512_set1_pd(6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutexvar_pd() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_permutexvar_pd(a, 0, idx, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutexvar_pd(a, 0b11111111, idx, a); + let e = _mm512_set1_pd(6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutexvar_pd() { + let idx = _mm512_set1_epi64(1); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_permutexvar_pd(0, idx, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutexvar_pd(0b00001111, idx, a); + let e = _mm512_set_pd(0., 0., 0., 0., 6., 6., 6., 6.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutexvar_pd() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_permutexvar_pd(idx, a); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutexvar_pd() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_permutexvar_pd(a, 0, idx, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutexvar_pd(a, 0b00001111, idx, a); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutexvar_pd() { + let idx = _mm256_set1_epi64x(1); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_permutexvar_pd(0, idx, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutexvar_pd(0b00001111, idx, a); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_permutex2var_epi64(a, idx, b); + let e = _mm512_set_epi64(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_mask_permutex2var_epi64(a, 0, idx, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_permutex2var_epi64(a, 0b11111111, idx, b); + let e = _mm512_set_epi64(6, 100, 5, 100, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_maskz_permutex2var_epi64(0, a, idx, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_permutex2var_epi64(0b00001111, a, idx, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2_permutex2var_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let idx = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_epi64(100); + let r = _mm512_mask2_permutex2var_epi64(a, idx, 0, b); + assert_eq_m512i(r, idx); + let r = _mm512_mask2_permutex2var_epi64(a, idx, 0b00001111, b); + let e = _mm512_set_epi64(1000, 1 << 3, 2000, 1 << 3, 4, 100, 3, 100); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_permutex2var_epi64(a, idx, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_mask_permutex2var_epi64(a, 0, idx, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_permutex2var_epi64(a, 0b00001111, idx, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_maskz_permutex2var_epi64(0, a, idx, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_permutex2var_epi64(0b00001111, a, idx, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_epi64x(100); + let r = _mm256_mask2_permutex2var_epi64(a, idx, 0, b); + assert_eq_m256i(r, idx); + let r = _mm256_mask2_permutex2var_epi64(a, idx, 0b00001111, b); + let e = _mm256_set_epi64x(2, 100, 1, 100); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_permutex2var_epi64(a, idx, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_mask_permutex2var_epi64(a, 0, idx, b); + assert_eq_m128i(r, a); + let r = _mm_mask_permutex2var_epi64(a, 0b00000011, idx, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_maskz_permutex2var_epi64(0, a, idx, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_permutex2var_epi64(0b00000011, a, idx, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_epi64() { + let a = _mm_set_epi64x(0, 1); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_epi64x(100); + let r = _mm_mask2_permutex2var_epi64(a, idx, 0, b); + assert_eq_m128i(r, idx); + let r = _mm_mask2_permutex2var_epi64(a, idx, 0b00000011, b); + let e = _mm_set_epi64x(0, 100); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_permutex2var_pd(a, idx, b); + let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_mask_permutex2var_pd(a, 0, idx, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_permutex2var_pd(a, 0b11111111, idx, b); + let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_maskz_permutex2var_pd(0, a, idx, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_permutex2var_pd(0b00001111, a, idx, b); + let e = _mm512_set_pd(0., 0., 0., 0., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask2_permutex2var_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let idx = _mm512_set_epi64(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3); + let b = _mm512_set1_pd(100.); + let r = _mm512_mask2_permutex2var_pd(a, idx, 0, b); + assert_eq_m512d(r, _mm512_castsi512_pd(idx)); + let r = _mm512_mask2_permutex2var_pd(a, idx, 0b11111111, b); + let e = _mm512_set_pd(6., 100., 5., 100., 4., 100., 3., 100.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_permutex2var_pd(a, idx, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_mask_permutex2var_pd(a, 0, idx, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_permutex2var_pd(a, 0b00001111, idx, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_maskz_permutex2var_pd(0, a, idx, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_permutex2var_pd(0b00001111, a, idx, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask2_permutex2var_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let idx = _mm256_set_epi64x(1, 1 << 2, 2, 1 << 2); + let b = _mm256_set1_pd(100.); + let r = _mm256_mask2_permutex2var_pd(a, idx, 0, b); + assert_eq_m256d(r, _mm256_castsi256_pd(idx)); + let r = _mm256_mask2_permutex2var_pd(a, idx, 0b00001111, b); + let e = _mm256_set_pd(2., 100., 1., 100.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_permutex2var_pd(a, idx, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_mask_permutex2var_pd(a, 0, idx, b); + assert_eq_m128d(r, a); + let r = _mm_mask_permutex2var_pd(a, 0b00000011, idx, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_maskz_permutex2var_pd(0, a, idx, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_permutex2var_pd(0b00000011, a, idx, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask2_permutex2var_pd() { + let a = _mm_set_pd(0., 1.); + let idx = _mm_set_epi64x(1, 1 << 1); + let b = _mm_set1_pd(100.); + let r = _mm_mask2_permutex2var_pd(a, idx, 0, b); + assert_eq_m128d(r, _mm_castsi128_pd(idx)); + let r = _mm_mask2_permutex2var_pd(a, idx, 0b00000011, b); + let e = _mm_set_pd(0., 100.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_pd() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00001111, a, b); + let e = _mm256_set_pd(2., 1., 6., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_pd() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_shuffle_pd::<0b11_11_11_11>(0b00001111, a, b); + let e = _mm256_set_pd(2., 1., 6., 5.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_shuffle_pd() { + let a = _mm_set_pd(1., 4.); + let b = _mm_set_pd(2., 3.); + let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_shuffle_pd::<0b11_11_11_11>(a, 0b00000011, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_shuffle_pd() { + let a = _mm_set_pd(1., 4.); + let b = _mm_set_pd(2., 3.); + let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_shuffle_pd::<0b11_11_11_11>(0b00000011, a, b); + let e = _mm_set_pd(2., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_i64x2() { + let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_shuffle_i64x2::<0b00_00_00_00>(a, b); + let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_i64x2() { + let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_shuffle_i64x2::<0b00_00_00_00>(a, 0b11111111, a, b); + let e = _mm512_setr_epi64(1, 4, 1, 4, 2, 3, 2, 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_i64x2() { + let a = _mm512_setr_epi64(1, 4, 5, 8, 9, 12, 13, 16); + let b = _mm512_setr_epi64(2, 3, 6, 7, 10, 11, 14, 15); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_shuffle_i64x2::<0b00_00_00_00>(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 4, 1, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_shuffle_i64x2() { + let a = _mm256_set_epi64x(1, 4, 5, 8); + let b = _mm256_set_epi64x(2, 3, 6, 7); + let r = _mm256_shuffle_i64x2::<0b00>(a, b); + let e = _mm256_set_epi64x(6, 7, 5, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_i64x2() { + let a = _mm256_set_epi64x(1, 4, 5, 8); + let b = _mm256_set_epi64x(2, 3, 6, 7); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_shuffle_i64x2::<0b00>(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(6, 7, 5, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_i64x2() { + let a = _mm256_set_epi64x(1, 4, 5, 8); + let b = _mm256_set_epi64x(2, 3, 6, 7); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_shuffle_i64x2::<0b00>(0b00001111, a, b); + let e = _mm256_set_epi64x(6, 7, 5, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_shuffle_f64x2() { + let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm512_shuffle_f64x2::<0b00_00_00_00>(a, b); + let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_shuffle_f64x2() { + let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_shuffle_f64x2::<0b00_00_00_00>(a, 0b11111111, a, b); + let e = _mm512_setr_pd(1., 4., 1., 4., 2., 3., 2., 3.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_shuffle_f64x2() { + let a = _mm512_setr_pd(1., 4., 5., 8., 9., 12., 13., 16.); + let b = _mm512_setr_pd(2., 3., 6., 7., 10., 11., 14., 15.); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_shuffle_f64x2::<0b00_00_00_00>(0b00001111, a, b); + let e = _mm512_setr_pd(1., 4., 1., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_shuffle_f64x2() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_shuffle_f64x2::<0b00>(a, b); + let e = _mm256_set_pd(6., 7., 5., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_shuffle_f64x2() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_shuffle_f64x2::<0b00>(a, 0b00001111, a, b); + let e = _mm256_set_pd(6., 7., 5., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_shuffle_f64x2() { + let a = _mm256_set_pd(1., 4., 5., 8.); + let b = _mm256_set_pd(2., 3., 6., 7.); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_shuffle_f64x2::<0b00>(0b00001111, a, b); + let e = _mm256_set_pd(6., 7., 5., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_movedup_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_movedup_pd(a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_movedup_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_mask_movedup_pd(a, 0, a); + assert_eq_m512d(r, a); + let r = _mm512_mask_movedup_pd(a, 0b11111111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 5., 5., 7., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_movedup_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_movedup_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_movedup_pd(0b00001111, a); + let e = _mm512_setr_pd(1., 1., 3., 3., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_movedup_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_mask_movedup_pd(a, 0, a); + assert_eq_m256d(r, a); + let r = _mm256_mask_movedup_pd(a, 0b00001111, a); + let e = _mm256_set_pd(2., 2., 4., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_movedup_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let r = _mm256_maskz_movedup_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_movedup_pd(0b00001111, a); + let e = _mm256_set_pd(2., 2., 4., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_movedup_pd() { + let a = _mm_set_pd(1., 2.); + let r = _mm_mask_movedup_pd(a, 0, a); + assert_eq_m128d(r, a); + let r = _mm_mask_movedup_pd(a, 0b00000011, a); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_movedup_pd() { + let a = _mm_set_pd(1., 2.); + let r = _mm_maskz_movedup_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_movedup_pd(0b00000011, a); + let e = _mm_set_pd(2., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_inserti64x4() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_inserti64x4::<1>(a, b); + let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_inserti64x4() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_mask_inserti64x4::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_inserti64x4::<1>(a, 0b11111111, a, b); + let e = _mm512_setr_epi64(1, 2, 3, 4, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_inserti64x4() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_maskz_inserti64x4::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_inserti64x4::<1>(0b00001111, a, b); + let e = _mm512_setr_epi64(1, 2, 3, 4, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_insertf64x4() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_insertf64x4::<1>(a, b); + let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_insertf64x4() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_mask_insertf64x4::<1>(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_insertf64x4::<1>(a, 0b11111111, a, b); + let e = _mm512_setr_pd(1., 2., 3., 4., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_insertf64x4() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_maskz_insertf64x4::<1>(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_insertf64x4::<1>(0b00001111, a, b); + let e = _mm512_setr_pd(1., 2., 3., 4., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd128_pd512() { + let a = _mm_setr_pd(17., 18.); + let r = _mm512_castpd128_pd512(a); + let e = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd256_pd512() { + let a = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_castpd256_pd512(a); + let e = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextpd128_pd512() { + let a = _mm_setr_pd(17., 18.); + let r = _mm512_zextpd128_pd512(a); + let e = _mm512_setr_pd(17., 18., 0., 0., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextpd256_pd512() { + let a = _mm256_setr_pd(17., 18., 19., 20.); + let r = _mm512_zextpd256_pd512(a); + let e = _mm512_setr_pd(17., 18., 19., 20., 0., 0., 0., 0.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd512_pd128() { + let a = _mm512_setr_pd(17., 18., -1., -1., -1., -1., -1., -1.); + let r = _mm512_castpd512_pd128(a); + let e = _mm_setr_pd(17., 18.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd512_pd256() { + let a = _mm512_setr_pd(17., 18., 19., 20., -1., -1., -1., -1.); + let r = _mm512_castpd512_pd256(a); + let e = _mm256_setr_pd(17., 18., 19., 20.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd_ps() { + let a = _mm512_set1_pd(1.); + let r = _mm512_castpd_ps(a); + let e = _mm512_set_ps( + 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, 1.875, 0.0, + 1.875, 0.0, + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castpd_si512() { + let a = _mm512_set1_pd(1.); + let r = _mm512_castpd_si512(a); + let e = _mm512_set_epi32( + 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, 0, 1072693248, + 0, 1072693248, 0, 1072693248, 0, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi128_si512() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_castsi128_si512(a); + let e = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi256_si512() { + let a = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_castsi256_si512(a); + let e = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextsi128_si512() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_zextsi128_si512(a); + let e = _mm512_setr_epi64(17, 18, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_zextsi256_si512() { + let a = _mm256_setr_epi64x(17, 18, 19, 20); + let r = _mm512_zextsi256_si512(a); + let e = _mm512_setr_epi64(17, 18, 19, 20, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_si128() { + let a = _mm512_setr_epi64(17, 18, -1, -1, -1, -1, -1, -1); + let r = _mm512_castsi512_si128(a); + let e = _mm_setr_epi64x(17, 18); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_si256() { + let a = _mm512_setr_epi64(17, 18, 19, 20, -1, -1, -1, -1); + let r = _mm512_castsi512_si256(a); + let e = _mm256_setr_epi64x(17, 18, 19, 20); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_ps() { + let a = _mm512_set1_epi64(1 << 62); + let r = _mm512_castsi512_ps(a); + let e = _mm512_set_ps( + 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., 2., 0., + ); + assert_eq_m512(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_castsi512_pd() { + let a = _mm512_set1_epi64(1 << 62); + let r = _mm512_castsi512_pd(a); + let e = _mm512_set_pd(2., 2., 2., 2., 2., 2., 2., 2.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcastq_epi64() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_broadcastq_epi64(a); + let e = _mm512_set1_epi64(17); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcastq_epi64() { + let src = _mm512_set1_epi64(18); + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_mask_broadcastq_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcastq_epi64(src, 0b11111111, a); + let e = _mm512_set1_epi64(17); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcastq_epi64() { + let a = _mm_setr_epi64x(17, 18); + let r = _mm512_maskz_broadcastq_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcastq_epi64(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 17, 17, 17, 17); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcastq_epi64() { + let src = _mm256_set1_epi64x(18); + let a = _mm_set_epi64x(17, 18); + let r = _mm256_mask_broadcastq_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_broadcastq_epi64(src, 0b00001111, a); + let e = _mm256_set1_epi64x(18); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcastq_epi64() { + let a = _mm_set_epi64x(17, 18); + let r = _mm256_maskz_broadcastq_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_broadcastq_epi64(0b00001111, a); + let e = _mm256_set1_epi64x(18); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_broadcastq_epi64() { + let src = _mm_set1_epi64x(18); + let a = _mm_set_epi64x(17, 18); + let r = _mm_mask_broadcastq_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_broadcastq_epi64(src, 0b00000011, a); + let e = _mm_set1_epi64x(18); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_broadcastq_epi64() { + let a = _mm_set_epi64x(17, 18); + let r = _mm_maskz_broadcastq_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_broadcastq_epi64(0b00000011, a); + let e = _mm_set1_epi64x(18); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcastsd_pd() { + let a = _mm_set_pd(17., 18.); + let r = _mm512_broadcastsd_pd(a); + let e = _mm512_set1_pd(18.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcastsd_pd() { + let src = _mm512_set1_pd(18.); + let a = _mm_set_pd(17., 18.); + let r = _mm512_mask_broadcastsd_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_broadcastsd_pd(src, 0b11111111, a); + let e = _mm512_set1_pd(18.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcastsd_pd() { + let a = _mm_set_pd(17., 18.); + let r = _mm512_maskz_broadcastsd_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_broadcastsd_pd(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 18., 18., 18., 18.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_broadcastsd_pd() { + let src = _mm256_set1_pd(18.); + let a = _mm_set_pd(17., 18.); + let r = _mm256_mask_broadcastsd_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_broadcastsd_pd(src, 0b00001111, a); + let e = _mm256_set1_pd(18.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_broadcastsd_pd() { + let a = _mm_set_pd(17., 18.); + let r = _mm256_maskz_broadcastsd_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_broadcastsd_pd(0b00001111, a); + let e = _mm256_set1_pd(18.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcast_i64x4() { + let a = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm512_broadcast_i64x4(a); + let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcast_i64x4() { + let src = _mm512_set1_epi64(18); + let a = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm512_mask_broadcast_i64x4(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_broadcast_i64x4(src, 0b11111111, a); + let e = _mm512_set_epi64(17, 18, 19, 20, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcast_i64x4() { + let a = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm512_maskz_broadcast_i64x4(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_broadcast_i64x4(0b00001111, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 17, 18, 19, 20); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_broadcast_f64x4() { + let a = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm512_broadcast_f64x4(a); + let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_broadcast_f64x4() { + let src = _mm512_set1_pd(18.); + let a = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm512_mask_broadcast_f64x4(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_broadcast_f64x4(src, 0b11111111, a); + let e = _mm512_set_pd(17., 18., 19., 20., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_broadcast_f64x4() { + let a = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm512_maskz_broadcast_f64x4(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_broadcast_f64x4(0b00001111, a); + let e = _mm512_set_pd(0., 0., 0., 0., 17., 18., 19., 20.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_blend_epi64() { + let a = _mm512_set1_epi64(1); + let b = _mm512_set1_epi64(2); + let r = _mm512_mask_blend_epi64(0b11110000, a, b); + let e = _mm512_set_epi64(2, 2, 2, 2, 1, 1, 1, 1); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_blend_epi64() { + let a = _mm256_set1_epi64x(1); + let b = _mm256_set1_epi64x(2); + let r = _mm256_mask_blend_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(2); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_blend_epi64() { + let a = _mm_set1_epi64x(1); + let b = _mm_set1_epi64x(2); + let r = _mm_mask_blend_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_blend_pd() { + let a = _mm512_set1_pd(1.); + let b = _mm512_set1_pd(2.); + let r = _mm512_mask_blend_pd(0b11110000, a, b); + let e = _mm512_set_pd(2., 2., 2., 2., 1., 1., 1., 1.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_blend_pd() { + let a = _mm256_set1_pd(1.); + let b = _mm256_set1_pd(2.); + let r = _mm256_mask_blend_pd(0b00001111, a, b); + let e = _mm256_set1_pd(2.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_blend_pd() { + let a = _mm_set1_pd(1.); + let b = _mm_set1_pd(2.); + let r = _mm_mask_blend_pd(0b00000011, a, b); + let e = _mm_set1_pd(2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpackhi_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_unpackhi_epi64(a, b); + let e = _mm512_set_epi64(17, 1, 19, 3, 21, 5, 23, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpackhi_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_unpackhi_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpackhi_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(17, 1, 19, 3, 21, 5, 23, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpackhi_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_maskz_unpackhi_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpackhi_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 21, 5, 23, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_mask_unpackhi_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpackhi_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(17, 1, 19, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_maskz_unpackhi_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpackhi_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(17, 1, 19, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpackhi_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_mask_unpackhi_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpackhi_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(17, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_maskz_unpackhi_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpackhi_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(17, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpackhi_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_unpackhi_pd(a, b); + let e = _mm512_set_pd(17., 1., 19., 3., 21., 5., 23., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpackhi_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_mask_unpackhi_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_unpackhi_pd(a, 0b11111111, a, b); + let e = _mm512_set_pd(17., 1., 19., 3., 21., 5., 23., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpackhi_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_maskz_unpackhi_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_unpackhi_pd(0b00001111, a, b); + let e = _mm512_set_pd(0., 0., 0., 0., 21., 5., 23., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpackhi_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_mask_unpackhi_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_unpackhi_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(17., 1., 19., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpackhi_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_maskz_unpackhi_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_unpackhi_pd(0b00001111, a, b); + let e = _mm256_set_pd(17., 1., 19., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpackhi_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_mask_unpackhi_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_unpackhi_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(17., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpackhi_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_maskz_unpackhi_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_unpackhi_pd(0b00000011, a, b); + let e = _mm_set_pd(17., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpacklo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_unpacklo_epi64(a, b); + let e = _mm512_set_epi64(18, 2, 20, 4, 22, 6, 24, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpacklo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_mask_unpacklo_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_unpacklo_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(18, 2, 20, 4, 22, 6, 24, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpacklo_epi64() { + let a = _mm512_set_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let b = _mm512_set_epi64(17, 18, 19, 20, 21, 22, 23, 24); + let r = _mm512_maskz_unpacklo_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_unpacklo_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 22, 6, 24, 8); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_mask_unpacklo_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_unpacklo_epi64(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(18, 2, 20, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_epi64() { + let a = _mm256_set_epi64x(1, 2, 3, 4); + let b = _mm256_set_epi64x(17, 18, 19, 20); + let r = _mm256_maskz_unpacklo_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_unpacklo_epi64(0b00001111, a, b); + let e = _mm256_set_epi64x(18, 2, 20, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpacklo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_mask_unpacklo_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_unpacklo_epi64(a, 0b00000011, a, b); + let e = _mm_set_epi64x(18, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_epi64() { + let a = _mm_set_epi64x(1, 2); + let b = _mm_set_epi64x(17, 18); + let r = _mm_maskz_unpacklo_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_unpacklo_epi64(0b00000011, a, b); + let e = _mm_set_epi64x(18, 2); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_unpacklo_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_unpacklo_pd(a, b); + let e = _mm512_set_pd(18., 2., 20., 4., 22., 6., 24., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_unpacklo_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_mask_unpacklo_pd(a, 0, a, b); + assert_eq_m512d(r, a); + let r = _mm512_mask_unpacklo_pd(a, 0b11111111, a, b); + let e = _mm512_set_pd(18., 2., 20., 4., 22., 6., 24., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_unpacklo_pd() { + let a = _mm512_set_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let b = _mm512_set_pd(17., 18., 19., 20., 21., 22., 23., 24.); + let r = _mm512_maskz_unpacklo_pd(0, a, b); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_unpacklo_pd(0b00001111, a, b); + let e = _mm512_set_pd(0., 0., 0., 0., 22., 6., 24., 8.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_unpacklo_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_mask_unpacklo_pd(a, 0, a, b); + assert_eq_m256d(r, a); + let r = _mm256_mask_unpacklo_pd(a, 0b00001111, a, b); + let e = _mm256_set_pd(18., 2., 20., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_unpacklo_pd() { + let a = _mm256_set_pd(1., 2., 3., 4.); + let b = _mm256_set_pd(17., 18., 19., 20.); + let r = _mm256_maskz_unpacklo_pd(0, a, b); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_unpacklo_pd(0b00001111, a, b); + let e = _mm256_set_pd(18., 2., 20., 4.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_unpacklo_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_mask_unpacklo_pd(a, 0, a, b); + assert_eq_m128d(r, a); + let r = _mm_mask_unpacklo_pd(a, 0b00000011, a, b); + let e = _mm_set_pd(18., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_unpacklo_pd() { + let a = _mm_set_pd(1., 2.); + let b = _mm_set_pd(17., 18.); + let r = _mm_maskz_unpacklo_pd(0, a, b); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_unpacklo_pd(0b00000011, a, b); + let e = _mm_set_pd(18., 2.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_alignr_epi64() { + let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm512_alignr_epi64::<0>(a, b); + assert_eq_m512i(r, b); + let r = _mm512_alignr_epi64::<8>(a, b); + assert_eq_m512i(r, b); + let r = _mm512_alignr_epi64::<1>(a, b); + let e = _mm512_set_epi64(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_alignr_epi64() { + let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm512_mask_alignr_epi64::<1>(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_alignr_epi64::<1>(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1, 16, 15, 14, 13, 12, 11, 10); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_alignr_epi64() { + let a = _mm512_set_epi64(8, 7, 6, 5, 4, 3, 2, 1); + let b = _mm512_set_epi64(16, 15, 14, 13, 12, 11, 10, 9); + let r = _mm512_maskz_alignr_epi64::<1>(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_alignr_epi64::<1>(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 13, 12, 11, 10); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_alignr_epi64() { + let a = _mm256_set_epi64x(4, 3, 2, 1); + let b = _mm256_set_epi64x(8, 7, 6, 5); + let r = _mm256_alignr_epi64::<0>(a, b); + let e = _mm256_set_epi64x(8, 7, 6, 5); + assert_eq_m256i(r, e); + let r = _mm256_alignr_epi64::<6>(a, b); + let e = _mm256_set_epi64x(6, 5, 4, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_alignr_epi64() { + let a = _mm256_set_epi64x(4, 3, 2, 1); + let b = _mm256_set_epi64x(8, 7, 6, 5); + let r = _mm256_mask_alignr_epi64::<1>(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_alignr_epi64::<0>(a, 0b00001111, a, b); + let e = _mm256_set_epi64x(8, 7, 6, 5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_alignr_epi64() { + let a = _mm256_set_epi64x(4, 3, 2, 1); + let b = _mm256_set_epi64x(8, 7, 6, 5); + let r = _mm256_maskz_alignr_epi64::<1>(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_alignr_epi64::<0>(0b00001111, a, b); + let e = _mm256_set_epi64x(8, 7, 6, 5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_alignr_epi64() { + let a = _mm_set_epi64x(2, 1); + let b = _mm_set_epi64x(4, 3); + let r = _mm_alignr_epi64::<0>(a, b); + let e = _mm_set_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_alignr_epi64() { + let a = _mm_set_epi64x(2, 1); + let b = _mm_set_epi64x(4, 3); + let r = _mm_mask_alignr_epi64::<1>(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_alignr_epi64::<0>(a, 0b00000011, a, b); + let e = _mm_set_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_alignr_epi64() { + let a = _mm_set_epi64x(2, 1); + let b = _mm_set_epi64x(4, 3); + let r = _mm_maskz_alignr_epi64::<1>(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_alignr_epi64::<0>(0b00000011, a, b); + let e = _mm_set_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_and_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_and_epi64(a, 0b01111111, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_and_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_and_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_and_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_and_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 0); + let r = _mm256_mask_and_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_and_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_and_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 0); + let r = _mm256_maskz_and_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_and_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_and_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 0); + let r = _mm_mask_and_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_and_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_and_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 0); + let r = _mm_maskz_and_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_and_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_and_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_and_epi64(a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_or_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_or_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_or_epi64(a, 0b11111111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_or_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_or_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_or_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_or_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_or_epi64(a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_or_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_mask_or_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_or_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_or_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_maskz_or_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_or_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_or_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_or_epi64(a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_or_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_mask_or_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_or_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_or_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_maskz_or_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_or_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_or_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_or_epi64(a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, + 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_mask_xor_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_xor_epi64(a, 0b11111111, a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_xor_epi64() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_maskz_xor_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_xor_epi64(0b00001111, a, b); + let e = _mm512_set_epi64(0, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_xor_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_xor_epi64(a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_xor_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_mask_xor_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_xor_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_xor_epi64() { + let a = _mm256_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm256_set1_epi64x(1 << 13); + let r = _mm256_maskz_xor_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_xor_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_xor_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_xor_epi64(a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_xor_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_mask_xor_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_xor_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_xor_epi64() { + let a = _mm_set1_epi64x(1 << 0 | 1 << 15); + let b = _mm_set1_epi64x(1 << 13); + let r = _mm_maskz_xor_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_xor_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 0 | 1 << 13 | 1 << 15); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_xor_si512() { + let a = _mm512_set_epi64(1 << 0 | 1 << 15, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let b = _mm512_set_epi64(1 << 13, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 2 | 1 << 3); + let r = _mm512_xor_epi64(a, b); + let e = _mm512_set_epi64(1 << 0 | 1 << 13 | 1 << 15, 0, 0, 0, 0, 0, 0, 0); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_andnot_epi64() { + let a = _mm512_set1_epi64(0); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_andnot_epi64(a, b); + let e = _mm512_set1_epi64(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_andnot_epi64() { + let a = _mm512_set1_epi64(1 << 1 | 1 << 2); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_mask_andnot_epi64(a, 0, a, b); + assert_eq_m512i(r, a); + let r = _mm512_mask_andnot_epi64(a, 0b11111111, a, b); + let e = _mm512_set1_epi64(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_andnot_epi64() { + let a = _mm512_set1_epi64(1 << 1 | 1 << 2); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_maskz_andnot_epi64(0, a, b); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_andnot_epi64(0b00001111, a, b); + #[rustfmt::skip] + let e = _mm512_set_epi64( + 0, 0, 0, 0, + 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, + ); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_andnot_epi64() { + let a = _mm256_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm256_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm256_mask_andnot_epi64(a, 0, a, b); + assert_eq_m256i(r, a); + let r = _mm256_mask_andnot_epi64(a, 0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_andnot_epi64() { + let a = _mm256_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm256_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm256_maskz_andnot_epi64(0, a, b); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_andnot_epi64(0b00001111, a, b); + let e = _mm256_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_andnot_epi64() { + let a = _mm_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm_mask_andnot_epi64(a, 0, a, b); + assert_eq_m128i(r, a); + let r = _mm_mask_andnot_epi64(a, 0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_andnot_epi64() { + let a = _mm_set1_epi64x(1 << 1 | 1 << 2); + let b = _mm_set1_epi64x(1 << 3 | 1 << 4); + let r = _mm_maskz_andnot_epi64(0, a, b); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_andnot_epi64(0b00000011, a, b); + let e = _mm_set1_epi64x(1 << 3 | 1 << 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_andnot_si512() { + let a = _mm512_set1_epi64(0); + let b = _mm512_set1_epi64(1 << 3 | 1 << 4); + let r = _mm512_andnot_si512(a, b); + let e = _mm512_set1_epi64(1 << 3 | 1 << 4); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_add_epi64() { + let a = _mm512_set1_epi64(1); + let e: i64 = _mm512_reduce_add_epi64(a); + assert_eq!(8, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_add_epi64() { + let a = _mm512_set1_epi64(1); + let e: i64 = _mm512_mask_reduce_add_epi64(0b11110000, a); + assert_eq!(4, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_add_pd() { + let a = _mm512_set1_pd(1.); + let e: f64 = _mm512_reduce_add_pd(a); + assert_eq!(8., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_add_pd() { + let a = _mm512_set1_pd(1.); + let e: f64 = _mm512_mask_reduce_add_pd(0b11110000, a); + assert_eq!(4., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_mul_epi64() { + let a = _mm512_set1_epi64(2); + let e: i64 = _mm512_reduce_mul_epi64(a); + assert_eq!(256, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_mul_epi64() { + let a = _mm512_set1_epi64(2); + let e: i64 = _mm512_mask_reduce_mul_epi64(0b11110000, a); + assert_eq!(16, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_mul_pd() { + let a = _mm512_set1_pd(2.); + let e: f64 = _mm512_reduce_mul_pd(a); + assert_eq!(256., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_mul_pd() { + let a = _mm512_set1_pd(2.); + let e: f64 = _mm512_mask_reduce_mul_pd(0b11110000, a); + assert_eq!(16., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_reduce_max_epi64(a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_mask_reduce_max_epi64(0b11110000, a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_reduce_max_epu64(a); + assert_eq!(7, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_mask_reduce_max_epu64(0b11110000, a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_max_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_reduce_max_pd(a); + assert_eq!(7., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_max_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_mask_reduce_max_pd(0b11110000, a); + assert_eq!(3., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_reduce_min_epi64(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: i64 = _mm512_mask_reduce_min_epi64(0b11110000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_reduce_min_epu64(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_epu64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let e: u64 = _mm512_mask_reduce_min_epu64(0b11110000, a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_min_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_reduce_min_pd(a); + assert_eq!(0., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_min_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let e: f64 = _mm512_mask_reduce_min_pd(0b11110000, a); + assert_eq!(0., e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_and_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_reduce_and_epi64(a); + assert_eq!(0, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_and_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_mask_reduce_and_epi64(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_reduce_or_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_reduce_or_epi64(a); + assert_eq!(3, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_reduce_or_epi64() { + let a = _mm512_set_epi64(1, 1, 1, 1, 2, 2, 2, 2); + let e: i64 = _mm512_mask_reduce_or_epi64(0b11110000, a); + assert_eq!(1, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_extractf64x4_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_extractf64x4_pd::<1>(a); + let e = _mm256_setr_pd(5., 6., 7., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_extractf64x4_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let src = _mm256_set1_pd(100.); + let r = _mm512_mask_extractf64x4_pd::<1>(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm512_mask_extractf64x4_pd::<1>(src, 0b11111111, a); + let e = _mm256_setr_pd(5., 6., 7., 8.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_extractf64x4_pd() { + let a = _mm512_setr_pd(1., 2., 3., 4., 5., 6., 7., 8.); + let r = _mm512_maskz_extractf64x4_pd::<1>(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm512_maskz_extractf64x4_pd::<1>(0b00000001, a); + let e = _mm256_setr_pd(5., 0., 0., 0.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_extracti64x4_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_extracti64x4_epi64::<0x1>(a); + let e = _mm256_setr_epi64x(5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_extracti64x4_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let src = _mm256_set1_epi64x(100); + let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm512_mask_extracti64x4_epi64::<0x1>(src, 0b11111111, a); + let e = _mm256_setr_epi64x(5, 6, 7, 8); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_extracti64x4_epi64() { + let a = _mm512_setr_epi64(1, 2, 3, 4, 5, 6, 7, 8); + let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm512_maskz_extracti64x4_epi64::<0x1>(0b00000001, a); + let e = _mm256_setr_epi64x(5, 0, 0, 0); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compress_epi64() { + let src = _mm512_set1_epi64(200); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_compress_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_compress_epi64(src, 0b01010101, a); + let e = _mm512_set_epi64(200, 200, 200, 200, 1, 3, 5, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_compress_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_compress_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_compress_epi64(0b01010101, a); + let e = _mm512_set_epi64(0, 0, 0, 0, 1, 3, 5, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compress_epi64() { + let src = _mm256_set1_epi64x(200); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_compress_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_compress_epi64(src, 0b00000101, a); + let e = _mm256_set_epi64x(200, 200, 1, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_compress_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_compress_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_compress_epi64(0b00000101, a); + let e = _mm256_set_epi64x(0, 0, 1, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compress_epi64() { + let src = _mm_set1_epi64x(200); + let a = _mm_set_epi64x(0, 1); + let r = _mm_mask_compress_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_compress_epi64(src, 0b00000001, a); + let e = _mm_set_epi64x(200, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_compress_epi64() { + let a = _mm_set_epi64x(0, 1); + let r = _mm_maskz_compress_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_compress_epi64(0b00000001, a); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_compress_pd() { + let src = _mm512_set1_pd(200.); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_compress_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_compress_pd(src, 0b01010101, a); + let e = _mm512_set_pd(200., 200., 200., 200., 1., 3., 5., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_compress_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_compress_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_compress_pd(0b01010101, a); + let e = _mm512_set_pd(0., 0., 0., 0., 1., 3., 5., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_compress_pd() { + let src = _mm256_set1_pd(200.); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_compress_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_compress_pd(src, 0b00000101, a); + let e = _mm256_set_pd(200., 200., 1., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_compress_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_compress_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_compress_pd(0b00000101, a); + let e = _mm256_set_pd(0., 0., 1., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_compress_pd() { + let src = _mm_set1_pd(200.); + let a = _mm_set_pd(0., 1.); + let r = _mm_mask_compress_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_compress_pd(src, 0b00000001, a); + let e = _mm_set_pd(200., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_compress_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_maskz_compress_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_compress_pd(0b00000001, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expand_epi64() { + let src = _mm512_set1_epi64(200); + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_mask_expand_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_expand_epi64(src, 0b01010101, a); + let e = _mm512_set_epi64(200, 4, 200, 5, 200, 6, 200, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expand_epi64() { + let a = _mm512_set_epi64(0, 1, 2, 3, 4, 5, 6, 7); + let r = _mm512_maskz_expand_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_expand_epi64(0b01010101, a); + let e = _mm512_set_epi64(0, 4, 0, 5, 0, 6, 0, 7); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expand_epi64() { + let src = _mm256_set1_epi64x(200); + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_mask_expand_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_expand_epi64(src, 0b00000101, a); + let e = _mm256_set_epi64x(200, 2, 200, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expand_epi64() { + let a = _mm256_set_epi64x(0, 1, 2, 3); + let r = _mm256_maskz_expand_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_expand_epi64(0b00000101, a); + let e = _mm256_set_epi64x(0, 2, 0, 3); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expand_epi64() { + let src = _mm_set1_epi64x(200); + let a = _mm_set_epi64x(0, 1); + let r = _mm_mask_expand_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_expand_epi64(src, 0b00000001, a); + let e = _mm_set_epi64x(200, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expand_epi64() { + let a = _mm_set_epi64x(0, 1); + let r = _mm_maskz_expand_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_expand_epi64(0b00000001, a); + let e = _mm_set_epi64x(0, 1); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_expand_pd() { + let src = _mm512_set1_pd(200.); + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_mask_expand_pd(src, 0, a); + assert_eq_m512d(r, src); + let r = _mm512_mask_expand_pd(src, 0b01010101, a); + let e = _mm512_set_pd(200., 4., 200., 5., 200., 6., 200., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_expand_pd() { + let a = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.); + let r = _mm512_maskz_expand_pd(0, a); + assert_eq_m512d(r, _mm512_setzero_pd()); + let r = _mm512_maskz_expand_pd(0b01010101, a); + let e = _mm512_set_pd(0., 4., 0., 5., 0., 6., 0., 7.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_expand_pd() { + let src = _mm256_set1_pd(200.); + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_mask_expand_pd(src, 0, a); + assert_eq_m256d(r, src); + let r = _mm256_mask_expand_pd(src, 0b00000101, a); + let e = _mm256_set_pd(200., 2., 200., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_expand_pd() { + let a = _mm256_set_pd(0., 1., 2., 3.); + let r = _mm256_maskz_expand_pd(0, a); + assert_eq_m256d(r, _mm256_setzero_pd()); + let r = _mm256_maskz_expand_pd(0b00000101, a); + let e = _mm256_set_pd(0., 2., 0., 3.); + assert_eq_m256d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_expand_pd() { + let src = _mm_set1_pd(200.); + let a = _mm_set_pd(0., 1.); + let r = _mm_mask_expand_pd(src, 0, a); + assert_eq_m128d(r, src); + let r = _mm_mask_expand_pd(src, 0b00000001, a); + let e = _mm_set_pd(200., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_expand_pd() { + let a = _mm_set_pd(0., 1.); + let r = _mm_maskz_expand_pd(0, a); + assert_eq_m128d(r, _mm_setzero_pd()); + let r = _mm_maskz_expand_pd(0b00000001, a); + let e = _mm_set_pd(0., 1.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_loadu_epi64() { + let a = &[4, 3, 2, 5, -8, -9, -64, -50]; + let p = a.as_ptr(); + let r = _mm512_loadu_epi64(black_box(p)); + let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_loadu_epi64() { + let a = &[4, 3, 2, 5]; + let p = a.as_ptr(); + let r = _mm256_loadu_epi64(black_box(p)); + let e = _mm256_setr_epi64x(4, 3, 2, 5); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_loadu_epi64() { + let a = &[4, 3]; + let p = a.as_ptr(); + let r = _mm_loadu_epi64(black_box(p)); + let e = _mm_setr_epi64x(4, 3); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_storeu_epi16() { + let a = _mm512_set1_epi64(9); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_storeu_epi16() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi16() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi16() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtsepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, i16::MAX, i16::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi16() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_undefined_si128(); + _mm512_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set1_epi16(u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi16() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm256_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16( + 0, + 0, + 0, + 0, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + u16::MAX as i16, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi16() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtusepi64_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi16(0, 0, 0, 0, 0, 0, u16::MAX as i16, u16::MAX as i16); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_storeu_epi8() { + let a = _mm512_set1_epi64(9); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_storeu_epi8() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi8() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + i8::MAX, i8::MAX, i8::MAX, i8::MAX, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi8() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtsepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, i8::MAX, i8::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi8() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm512_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi8() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm256_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi8() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi8(0); + _mm_mask_cvtusepi64_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a); + #[rustfmt::skip] + let e = _mm_set_epi8( + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, u8::MAX as i8, u8::MAX as i8, + ); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtepi64_storeu_epi32() { + let a = _mm512_set1_epi64(9); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm256_set1_epi32(9); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi32(9, 9, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtepi64_storeu_epi32() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm_set_epi32(0, 0, 9, 9); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtsepi64_storeu_epi32() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm256_set1_epi32(i32::MAX); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtsepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a); + let e = _mm_set1_epi32(i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtsepi64_storeu_epi32() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtsepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a); + let e = _mm_set_epi32(0, 0, i32::MAX, i32::MAX); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_cvtusepi64_storeu_epi32() { + let a = _mm512_set1_epi64(i64::MAX); + let mut r = _mm256_undefined_si256(); + _mm512_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b11111111, a); + let e = _mm256_set1_epi32(u32::MAX as i32); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_cvtusepi64_storeu_epi32() { + let a = _mm256_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi32(0); + _mm256_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00001111, a); + let e = _mm_set1_epi32(u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_cvtusepi64_storeu_epi32() { + let a = _mm_set1_epi64x(i64::MAX); + let mut r = _mm_set1_epi16(0); + _mm_mask_cvtusepi64_storeu_epi32(&mut r as *mut _ as *mut i8, 0b00000011, a); + let e = _mm_set_epi32(0, 0, u32::MAX as i32, u32::MAX as i32); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_storeu_epi64() { + let a = _mm512_set1_epi64(9); + let mut r = _mm512_set1_epi64(0); + _mm512_storeu_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_storeu_epi64() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm256_set1_epi64x(0); + _mm256_storeu_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_storeu_epi64() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi64x(0); + _mm_storeu_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 8], // 64 bytes + } + let a = Align { + data: [4, 3, 2, 5, -8, -9, -64, -50], + }; + let p = (a.data).as_ptr(); + let r = _mm512_load_epi64(black_box(p)); + let e = _mm512_setr_epi64(4, 3, 2, 5, -8, -9, -64, -50); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 4], + } + let a = Align { data: [4, 3, 2, 5] }; + let p = (a.data).as_ptr(); + let r = _mm256_load_epi64(black_box(p)); + let e = _mm256_set_epi64x(5, 2, 3, 4); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_load_epi64() { + #[repr(align(64))] + struct Align { + data: [i64; 2], + } + let a = Align { data: [4, 3] }; + let p = (a.data).as_ptr(); + let r = _mm_load_epi64(black_box(p)); + let e = _mm_set_epi64x(3, 4); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_epi64() { + let a = _mm512_set1_epi64(9); + let mut r = _mm512_set1_epi64(0); + _mm512_store_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m512i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_store_epi64() { + let a = _mm256_set1_epi64x(9); + let mut r = _mm256_set1_epi64x(0); + _mm256_store_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m256i(r, a); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_store_epi64() { + let a = _mm_set1_epi64x(9); + let mut r = _mm_set1_epi64x(0); + _mm_store_epi64(&mut r as *mut _ as *mut i64, a); + assert_eq_m128i(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_load_pd() { + #[repr(align(64))] + struct Align { + data: [f64; 8], // 64 bytes + } + let a = Align { + data: [4., 3., 2., 5., -8., -9., -64., -50.], + }; + let p = (a.data).as_ptr(); + let r = _mm512_load_pd(black_box(p)); + let e = _mm512_setr_pd(4., 3., 2., 5., -8., -9., -64., -50.); + assert_eq_m512d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_store_pd() { + let a = _mm512_set1_pd(9.); + let mut r = _mm512_undefined_pd(); + _mm512_store_pd(&mut r as *mut _ as *mut f64, a); + assert_eq_m512d(r, a); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_test_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 0 | 1 << 1); + let r = _mm512_test_epi64_mask(a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_test_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 0 | 1 << 1); + let r = _mm512_mask_test_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_test_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_test_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm256_test_epi64_mask(a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_test_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm256_mask_test_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_test_epi64_mask(0b00001111, a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_test_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm_test_epi64_mask(a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_test_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 0 | 1 << 1); + let r = _mm_mask_test_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_test_epi64_mask(0b00000011, a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_testn_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 0 | 1 << 1); + let r = _mm512_testn_epi64_mask(a, b); + let e: __mmask8 = 0b00000000; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_testn_epi64_mask() { + let a = _mm512_set1_epi64(1 << 0); + let b = _mm512_set1_epi64(1 << 1); + let r = _mm512_mask_testn_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm512_mask_testn_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b11111111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_testn_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 1); + let r = _mm256_testn_epi64_mask(a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_testn_epi64_mask() { + let a = _mm256_set1_epi64x(1 << 0); + let b = _mm256_set1_epi64x(1 << 1); + let r = _mm256_mask_testn_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm256_mask_testn_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b00001111; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_testn_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 1); + let r = _mm_testn_epi64_mask(a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_testn_epi64_mask() { + let a = _mm_set1_epi64x(1 << 0); + let b = _mm_set1_epi64x(1 << 1); + let r = _mm_mask_testn_epi64_mask(0, a, b); + assert_eq!(r, 0); + let r = _mm_mask_testn_epi64_mask(0b11111111, a, b); + let e: __mmask8 = 0b00000011; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_stream_pd() { + #[repr(align(64))] + struct Memory { + pub data: [f64; 8], + } + let a = _mm512_set1_pd(7.0); + let mut mem = Memory { data: [-1.0; 8] }; + + _mm512_stream_pd(&mut mem.data[0] as *mut f64, a); + for i in 0..8 { + assert_eq!(mem.data[i], get_m512d(a, i)); + } + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_stream_si512() { + #[repr(align(64))] + struct Memory { + pub data: [i64; 8], + } + let a = _mm512_set1_epi64(7); + let mut mem = Memory { data: [-1; 8] }; + + _mm512_stream_si512(&mut mem.data[0] as *mut i64, a); + for i in 0..8 { + assert_eq!(mem.data[i], get_m512i(a, i)); + } + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_mask_set1_epi64() { + let src = _mm512_set1_epi64(2); + let a: i64 = 11; + let r = _mm512_mask_set1_epi64(src, 0, a); + assert_eq_m512i(r, src); + let r = _mm512_mask_set1_epi64(src, 0b11111111, a); + let e = _mm512_set1_epi64(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm512_maskz_set1_epi64() { + let a: i64 = 11; + let r = _mm512_maskz_set1_epi64(0, a); + assert_eq_m512i(r, _mm512_setzero_si512()); + let r = _mm512_maskz_set1_epi64(0b11111111, a); + let e = _mm512_set1_epi64(11); + assert_eq_m512i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_mask_set1_epi64() { + let src = _mm256_set1_epi64x(2); + let a: i64 = 11; + let r = _mm256_mask_set1_epi64(src, 0, a); + assert_eq_m256i(r, src); + let r = _mm256_mask_set1_epi64(src, 0b00001111, a); + let e = _mm256_set1_epi64x(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm256_maskz_set1_epi64() { + let a: i64 = 11; + let r = _mm256_maskz_set1_epi64(0, a); + assert_eq_m256i(r, _mm256_setzero_si256()); + let r = _mm256_maskz_set1_epi64(0b00001111, a); + let e = _mm256_set1_epi64x(11); + assert_eq_m256i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_mask_set1_epi64() { + let src = _mm_set1_epi64x(2); + let a: i64 = 11; + let r = _mm_mask_set1_epi64(src, 0, a); + assert_eq_m128i(r, src); + let r = _mm_mask_set1_epi64(src, 0b00000011, a); + let e = _mm_set1_epi64x(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f,avx512vl")] + unsafe fn test_mm_maskz_set1_epi64() { + let a: i64 = 11; + let r = _mm_maskz_set1_epi64(0, a); + assert_eq_m128i(r, _mm_setzero_si128()); + let r = _mm_maskz_set1_epi64(0b00000011, a); + let e = _mm_set1_epi64x(11); + assert_eq_m128i(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundsi64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: i64 = 9; + let r = _mm_cvti64_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvti64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvti64_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_si64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvt_roundsd_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsd_u64() { + let a = _mm_set_pd(1., f64::MAX); + let r = _mm_cvt_roundsd_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtsd_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_i64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_si64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_si64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: i64 = -1; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvt_roundss_u64::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtss_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_i64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_i64::<_MM_FROUND_CUR_DIRECTION>(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_si64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_si64::<_MM_FROUND_CUR_DIRECTION>(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvtt_roundsd_u64::<_MM_FROUND_CUR_DIRECTION>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttsd_u64() { + let a = _mm_set_pd(1., -1.5); + let r = _mm_cvttsd_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_i64(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_i64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_i64::<_MM_FROUND_CUR_DIRECTION>(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_si64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_si64::<_MM_FROUND_CUR_DIRECTION>(a); + let e: i64 = -2; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtt_roundss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvtt_roundss_u64::<_MM_FROUND_CUR_DIRECTION>(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvttss_u64() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let r = _mm_cvttss_u64(a); + let e: u64 = u64::MAX; + assert_eq!(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u64 = 9; + let r = _mm_cvtu64_ss(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvtu64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u64 = 9; + let r = _mm_cvtu64_sd(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu64_ss() { + let a = _mm_set_ps(0., -0.5, 1., -1.5); + let b: u64 = 9; + let r = _mm_cvt_roundu64_ss::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_ps(0., -0.5, 1., 9.); + assert_eq_m128(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundu64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: u64 = 9; + let r = _mm_cvt_roundu64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundi64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } + + #[simd_test(enable = "avx512f")] + unsafe fn test_mm_cvt_roundsi64_sd() { + let a = _mm_set_pd(1., -1.5); + let b: i64 = 9; + let r = _mm_cvt_roundsi64_sd::<{ _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC }>(a, b); + let e = _mm_set_pd(1., 9.); + assert_eq_m128d(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs new file mode 100644 index 000000000..9f71a8d38 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi.rs @@ -0,0 +1,183 @@ +//! Bit Manipulation Instruction (BMI) Set 1.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_bmi]: https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Extracts bits in range [`start`, `start` + `length`) from `a` into +/// the least significant bits of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(bextr))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _bextr_u64(a: u64, start: u32, len: u32) -> u64 { + _bextr2_u64(a, ((start & 0xff) | ((len & 0xff) << 8)) as u64) +} + +/// Extracts bits of `a` specified by `control` into +/// the least significant bits of the result. +/// +/// Bits `[7,0]` of `control` specify the index to the first bit in the range +/// to be extracted, and bits `[15,8]` specify the length of the range. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bextr2_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(bextr))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _bextr2_u64(a: u64, control: u64) -> u64 { + x86_bmi_bextr_64(a, control) +} + +/// Bitwise logical `AND` of inverted `a` with `b`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_andn_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(andn))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _andn_u64(a: u64, b: u64) -> u64 { + !a & b +} + +/// Extracts lowest set isolated bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsi_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsi))] +#[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsi_u64(x: u64) -> u64 { + x & x.wrapping_neg() +} + +/// Gets mask up to lowest set bit. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsmsk_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsmsk))] +#[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsmsk_u64(x: u64) -> u64 { + x ^ (x.wrapping_sub(1_u64)) +} + +/// Resets the lowest set bit of `x`. +/// +/// If `x` is sets CF. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_blsr_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(blsr))] +#[cfg(not(target_arch = "x86"))] // generates lots of instructions +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _blsr_u64(x: u64) -> u64 { + x & (x.wrapping_sub(1)) +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_tzcnt_u64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _tzcnt_u64(x: u64) -> u64 { + x.trailing_zeros() as u64 +} + +/// Counts the number of trailing least significant zero bits. +/// +/// When the source operand is `0`, it returns its size in bits. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_tzcnt_64) +#[inline] +#[target_feature(enable = "bmi1")] +#[cfg_attr(test, assert_instr(tzcnt))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_tzcnt_64(x: u64) -> i64 { + x.trailing_zeros() as i64 +} + +extern "C" { + #[link_name = "llvm.x86.bmi.bextr.64"] + fn x86_bmi_bextr_64(x: u64, y: u64) -> u64; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::{x86::*, x86_64::*}; + + #[simd_test(enable = "bmi1")] + unsafe fn test_bextr_u64() { + let r = _bextr_u64(0b0101_0000u64, 4, 4); + assert_eq!(r, 0b0000_0101u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_andn_u64() { + assert_eq!(_andn_u64(0, 0), 0); + assert_eq!(_andn_u64(0, 1), 1); + assert_eq!(_andn_u64(1, 0), 0); + assert_eq!(_andn_u64(1, 1), 0); + + let r = _andn_u64(0b0000_0000u64, 0b0000_0000u64); + assert_eq!(r, 0b0000_0000u64); + + let r = _andn_u64(0b0000_0000u64, 0b1111_1111u64); + assert_eq!(r, 0b1111_1111u64); + + let r = _andn_u64(0b1111_1111u64, 0b0000_0000u64); + assert_eq!(r, 0b0000_0000u64); + + let r = _andn_u64(0b1111_1111u64, 0b1111_1111u64); + assert_eq!(r, 0b0000_0000u64); + + let r = _andn_u64(0b0100_0000u64, 0b0101_1101u64); + assert_eq!(r, 0b0001_1101u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsi_u64() { + assert_eq!(_blsi_u64(0b1101_0000u64), 0b0001_0000u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsmsk_u64() { + let r = _blsmsk_u64(0b0011_0000u64); + assert_eq!(r, 0b0001_1111u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_blsr_u64() { + // TODO: test the behavior when the input is `0`. + let r = _blsr_u64(0b0011_0000u64); + assert_eq!(r, 0b0010_0000u64); + } + + #[simd_test(enable = "bmi1")] + unsafe fn test_tzcnt_u64() { + assert_eq!(_tzcnt_u64(0b0000_0001u64), 0u64); + assert_eq!(_tzcnt_u64(0b0000_0000u64), 64u64); + assert_eq!(_tzcnt_u64(0b1001_0000u64), 4u64); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs new file mode 100644 index 000000000..356d95a3d --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs @@ -0,0 +1,139 @@ +//! Bit Manipulation Instruction (BMI) Set 2.0. +//! +//! The reference is [Intel 64 and IA-32 Architectures Software Developer's +//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref]. +//! +//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions +//! available. +//! +//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +//! [wikipedia_bmi]: +//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29 + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Unsigned multiply without affecting flags. +/// +/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with +/// the low half and the high half of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64) +#[inline] +#[cfg_attr(test, assert_instr(mul))] +#[target_feature(enable = "bmi2")] +#[cfg(not(target_arch = "x86"))] // calls an intrinsic +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 { + let result: u128 = (a as u128) * (b as u128); + *hi = (result >> 64) as u64; + result as u64 +} + +/// Zeroes higher bits of `a` >= `index`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(bzhi))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 { + x86_bmi2_bzhi_64(a, index as u64) +} + +/// Scatter contiguous low order bits of `a` to the result at the positions +/// specified by the `mask`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(pdep))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 { + x86_bmi2_pdep_64(a, mask) +} + +/// Gathers the bits of `x` specified by the `mask` into the contiguous low +/// order bit positions of the result. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64) +#[inline] +#[target_feature(enable = "bmi2")] +#[cfg_attr(test, assert_instr(pext))] +#[cfg(not(target_arch = "x86"))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 { + x86_bmi2_pext_64(a, mask) +} + +extern "C" { + #[link_name = "llvm.x86.bmi.bzhi.64"] + fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64; + #[link_name = "llvm.x86.bmi.pdep.64"] + fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64; + #[link_name = "llvm.x86.bmi.pext.64"] + fn x86_bmi2_pext_64(x: u64, y: u64) -> u64; +} + +#[cfg(test)] +mod tests { + use stdarch_test::simd_test; + + use crate::core_arch::x86_64::*; + + #[simd_test(enable = "bmi2")] + unsafe fn test_pext_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0000_0011_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b0001_0111_0100_0011u64; + + assert_eq!(_pext_u64(n, m0), s0); + assert_eq!(_pext_u64(n, m1), s1); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_pdep_u64() { + let n = 0b1011_1110_1001_0011u64; + + let m0 = 0b0110_0011_1000_0101u64; + let s0 = 0b0000_0010_0000_0101u64; + + let m1 = 0b1110_1011_1110_1111u64; + let s1 = 0b1110_1001_0010_0011u64; + + assert_eq!(_pdep_u64(n, m0), s0); + assert_eq!(_pdep_u64(n, m1), s1); + } + + #[simd_test(enable = "bmi2")] + unsafe fn test_bzhi_u64() { + let n = 0b1111_0010u64; + let s = 0b0001_0010u64; + assert_eq!(_bzhi_u64(n, 5), s); + } + + #[simd_test(enable = "bmi2")] + #[rustfmt::skip] + unsafe fn test_mulx_u64() { + let a: u64 = 9_223_372_036_854_775_800; + let b: u64 = 100; + let mut hi = 0; + let lo = _mulx_u64(a, b, &mut hi); + /* +result = 922337203685477580000 = +0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000 + ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + */ + assert_eq!( + lo, + 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64 + ); + assert_eq!(hi, 0b00110001u64); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bswap.rs b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs new file mode 100644 index 000000000..90a209ce3 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bswap.rs @@ -0,0 +1,29 @@ +//! Byte swap intrinsics. + +#![allow(clippy::module_name_repetitions)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Returns an integer with the reversed byte order of x +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bswap64) +#[inline] +#[cfg_attr(test, assert_instr(bswap))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _bswap64(x: i64) -> i64 { + x.swap_bytes() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_bswap64() { + unsafe { + assert_eq!(_bswap64(0x0EADBEEFFADECA0E), 0x0ECADEFAEFBEAD0E); + assert_eq!(_bswap64(0x0000000000000000), 0x0000000000000000); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/bt.rs b/library/stdarch/crates/core_arch/src/x86_64/bt.rs new file mode 100644 index 000000000..53da9d02f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/bt.rs @@ -0,0 +1,135 @@ +use crate::arch::asm; +#[cfg(test)] +use stdarch_test::assert_instr; + +// x32 wants to use a 32-bit address size, but asm! defaults to using the full +// register name (e.g. rax). We have to explicitly override the placeholder to +// use the 32-bit register name in that case. +#[cfg(target_pointer_width = "32")] +macro_rules! bt { + ($inst:expr) => { + concat!($inst, " {b}, ({p:e})") + }; +} +#[cfg(target_pointer_width = "64")] +macro_rules! bt { + ($inst:expr) => { + concat!($inst, " {b}, ({p})") + }; +} + +/// Returns the bit in position `b` of the memory addressed by `p`. +#[inline] +#[cfg_attr(test, assert_instr(bt))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittest64(p: *const i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(readonly, nostack, pure, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then sets the bit to `1`. +#[inline] +#[cfg_attr(test, assert_instr(bts))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandset64(p: *mut i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btsq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then resets that bit to `0`. +#[inline] +#[cfg_attr(test, assert_instr(btr))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandreset64(p: *mut i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btrq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +/// Returns the bit in position `b` of the memory addressed by `p`, then inverts that bit. +#[inline] +#[cfg_attr(test, assert_instr(btc))] +#[stable(feature = "simd_x86_bittest", since = "1.55.0")] +pub unsafe fn _bittestandcomplement64(p: *mut i64, b: i64) -> u8 { + let r: u8; + asm!( + bt!("btcq"), + "setc {r}", + p = in(reg) p, + b = in(reg) b, + r = out(reg_byte) r, + options(nostack, att_syntax) + ); + r +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86_64::*; + + #[test] + fn test_bittest64() { + unsafe { + let a = 0b0101_0000i64; + assert_eq!(_bittest64(&a as _, 4), 1); + assert_eq!(_bittest64(&a as _, 5), 0); + } + } + + #[test] + fn test_bittestandset64() { + unsafe { + let mut a = 0b0101_0000i64; + assert_eq!(_bittestandset64(&mut a as _, 4), 1); + assert_eq!(_bittestandset64(&mut a as _, 4), 1); + assert_eq!(_bittestandset64(&mut a as _, 5), 0); + assert_eq!(_bittestandset64(&mut a as _, 5), 1); + } + } + + #[test] + fn test_bittestandreset64() { + unsafe { + let mut a = 0b0101_0000i64; + assert_eq!(_bittestandreset64(&mut a as _, 4), 1); + assert_eq!(_bittestandreset64(&mut a as _, 4), 0); + assert_eq!(_bittestandreset64(&mut a as _, 5), 0); + assert_eq!(_bittestandreset64(&mut a as _, 5), 0); + } + } + + #[test] + fn test_bittestandcomplement64() { + unsafe { + let mut a = 0b0101_0000i64; + assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1); + assert_eq!(_bittestandcomplement64(&mut a as _, 4), 0); + assert_eq!(_bittestandcomplement64(&mut a as _, 4), 1); + assert_eq!(_bittestandcomplement64(&mut a as _, 5), 0); + assert_eq!(_bittestandcomplement64(&mut a as _, 5), 1); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs new file mode 100644 index 000000000..391daed20 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/cmpxchg16b.rs @@ -0,0 +1,73 @@ +use crate::sync::atomic::Ordering; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Compares and exchange 16 bytes (128 bits) of data atomically. +/// +/// This intrinsic corresponds to the `cmpxchg16b` instruction on `x86_64` +/// processors. It performs an atomic compare-and-swap, updating the `ptr` +/// memory location to `val` if the current value in memory equals `old`. +/// +/// # Return value +/// +/// This function returns the previous value at the memory location. If it is +/// equal to `old` then the memory was updated to `new`. +/// +/// # Memory Orderings +/// +/// This atomic operations has the same semantics of memory orderings as +/// `AtomicUsize::compare_exchange` does, only operating on 16 bytes of memory +/// instead of just a pointer. +/// +/// For more information on memory orderings here see the `compare_exchange` +/// documentation for other `Atomic*` types in the standard library. +/// +/// # Unsafety +/// +/// This method is unsafe because it takes a raw pointer and will attempt to +/// read and possibly write the memory at the pointer. The pointer must also be +/// aligned on a 16-byte boundary. +/// +/// This method also requires the `cmpxchg16b` CPU feature to be available at +/// runtime to work correctly. If the CPU running the binary does not actually +/// support `cmpxchg16b` and the program enters an execution path that +/// eventually would reach this function the behavior is undefined. +/// +/// The `success` ordering must also be stronger or equal to `failure`, or this +/// function call is undefined. See the `Atomic*` documentation's +/// `compare_exchange` function for more information. When `compare_exchange` +/// panics, this is undefined behavior. Currently this function aborts the +/// process with an undefined instruction. +#[inline] +#[cfg_attr(test, assert_instr(cmpxchg16b, success = Ordering::SeqCst, failure = Ordering::SeqCst))] +#[target_feature(enable = "cmpxchg16b")] +pub unsafe fn cmpxchg16b( + dst: *mut u128, + old: u128, + new: u128, + success: Ordering, + failure: Ordering, +) -> u128 { + use crate::{intrinsics, sync::atomic::Ordering::*}; + + debug_assert!(dst as usize % 16 == 0); + + let (val, _ok) = match (success, failure) { + (Acquire, Acquire) => intrinsics::atomic_cxchg_acq(dst, old, new), + (Release, Relaxed) => intrinsics::atomic_cxchg_rel(dst, old, new), + (AcqRel, Acquire) => intrinsics::atomic_cxchg_acqrel(dst, old, new), + (Relaxed, Relaxed) => intrinsics::atomic_cxchg_relaxed(dst, old, new), + (SeqCst, SeqCst) => intrinsics::atomic_cxchg(dst, old, new), + (Acquire, Relaxed) => intrinsics::atomic_cxchg_acq_failrelaxed(dst, old, new), + (AcqRel, Relaxed) => intrinsics::atomic_cxchg_acqrel_failrelaxed(dst, old, new), + (SeqCst, Relaxed) => intrinsics::atomic_cxchg_failrelaxed(dst, old, new), + (SeqCst, Acquire) => intrinsics::atomic_cxchg_failacq(dst, old, new), + + // The above block is all copied from libcore, and this statement is + // also copied from libcore except that it's a panic in libcore and we + // have a little bit more of a lightweight panic here. + _ => crate::core_arch::x86::ud2(), + }; + val +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs new file mode 100644 index 000000000..d02702046 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/fxsr.rs @@ -0,0 +1,112 @@ +//! FXSR floating-point context fast save and restore. + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.fxsave64"] + fn fxsave64(p: *mut u8); + #[link_name = "llvm.x86.fxrstor64"] + fn fxrstor64(p: *const u8); +} + +/// Saves the `x87` FPU, `MMX` technology, `XMM`, and `MXCSR` registers to the +/// 512-byte-long 16-byte-aligned memory region `mem_addr`. +/// +/// A misaligned destination operand raises a general-protection (#GP) or an +/// alignment check exception (#AC). +/// +/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. +/// +/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html +/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxsave64) +#[inline] +#[target_feature(enable = "fxsr")] +#[cfg_attr(test, assert_instr(fxsave64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _fxsave64(mem_addr: *mut u8) { + fxsave64(mem_addr) +} + +/// Restores the `XMM`, `MMX`, `MXCSR`, and `x87` FPU registers from the +/// 512-byte-long 16-byte-aligned memory region `mem_addr`. +/// +/// The contents of this memory region should have been written to by a +/// previous +/// `_fxsave` or `_fxsave64` intrinsic. +/// +/// A misaligned destination operand raises a general-protection (#GP) or an +/// alignment check exception (#AC). +/// +/// See [`FXSAVE`][fxsave] and [`FXRSTOR`][fxrstor]. +/// +/// [fxsave]: http://www.felixcloutier.com/x86/FXSAVE.html +/// [fxrstor]: http://www.felixcloutier.com/x86/FXRSTOR.html +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_fxrstor64) +#[inline] +#[target_feature(enable = "fxsr")] +#[cfg_attr(test, assert_instr(fxrstor64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _fxrstor64(mem_addr: *const u8) { + fxrstor64(mem_addr) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::x86_64::*; + use std::{cmp::PartialEq, fmt}; + use stdarch_test::simd_test; + + #[repr(align(16))] + struct FxsaveArea { + data: [u8; 512], // 512 bytes + } + + impl FxsaveArea { + fn new() -> FxsaveArea { + FxsaveArea { data: [0; 512] } + } + fn ptr(&mut self) -> *mut u8 { + &mut self.data[0] as *mut _ as *mut u8 + } + } + + impl PartialEq for FxsaveArea { + fn eq(&self, other: &FxsaveArea) -> bool { + for i in 0..self.data.len() { + if self.data[i] != other.data[i] { + return false; + } + } + true + } + } + + impl fmt::Debug for FxsaveArea { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for i in 0..self.data.len() { + write!(f, "{}", self.data[i])?; + if i != self.data.len() - 1 { + write!(f, ", ")?; + } + } + write!(f, "]") + } + } + + #[simd_test(enable = "fxsr")] + unsafe fn fxsave64() { + let mut a = FxsaveArea::new(); + let mut b = FxsaveArea::new(); + + fxsr::_fxsave64(a.ptr()); + fxsr::_fxrstor64(a.ptr()); + fxsr::_fxsave64(b.ptr()); + assert_eq!(a, b); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/macros.rs b/library/stdarch/crates/core_arch/src/x86_64/macros.rs new file mode 100644 index 000000000..a3ea0e821 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/macros.rs @@ -0,0 +1,36 @@ +//! Utility macros. + +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a round number. +pub(crate) struct ValidateConstRound; +impl ValidateConstRound { + pub(crate) const VALID: () = { + assert!( + IMM == 4 || IMM == 8 || IMM == 9 || IMM == 10 || IMM == 11, + "Invalid IMM value" + ); + }; +} + +#[allow(unused)] +macro_rules! static_assert_rounding { + ($imm:ident) => { + let _ = $crate::core_arch::x86_64::macros::ValidateConstRound::<$imm>::VALID; + }; +} + +// Helper struct used to trigger const eval errors when the const generic immediate value `imm` is +// not a sae number. +pub(crate) struct ValidateConstSae; +impl ValidateConstSae { + pub(crate) const VALID: () = { + assert!(IMM == 4 || IMM == 8, "Invalid IMM value"); + }; +} + +#[allow(unused)] +macro_rules! static_assert_sae { + ($imm:ident) => { + let _ = $crate::core_arch::x86_64::macros::ValidateConstSae::<$imm>::VALID; + }; +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/mod.rs b/library/stdarch/crates/core_arch/src/x86_64/mod.rs new file mode 100644 index 000000000..461874ece --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/mod.rs @@ -0,0 +1,55 @@ +//! `x86_64` intrinsics + +#[macro_use] +mod macros; + +mod fxsr; +pub use self::fxsr::*; + +mod sse; +pub use self::sse::*; + +mod sse2; +pub use self::sse2::*; + +mod sse41; +pub use self::sse41::*; + +mod sse42; +pub use self::sse42::*; + +mod xsave; +pub use self::xsave::*; + +mod abm; +pub use self::abm::*; + +mod avx; +pub use self::avx::*; + +mod bmi; +pub use self::bmi::*; + +mod bmi2; +pub use self::bmi2::*; + +mod avx2; +pub use self::avx2::*; + +mod avx512f; +pub use self::avx512f::*; + +mod bswap; +pub use self::bswap::*; + +mod rdrand; +pub use self::rdrand::*; + +mod cmpxchg16b; +pub use self::cmpxchg16b::*; + +mod adx; +pub use self::adx::*; + +mod bt; +pub use self::bt::*; diff --git a/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs new file mode 100644 index 000000000..e5ec933fb --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/rdrand.rs @@ -0,0 +1,44 @@ +//! RDRAND and RDSEED instructions for returning random numbers from an Intel +//! on-chip hardware random number generator which has been seeded by an +//! on-chip entropy source. + +#![allow(clippy::module_name_repetitions)] + +#[allow(improper_ctypes)] +extern "unadjusted" { + #[link_name = "llvm.x86.rdrand.64"] + fn x86_rdrand64_step() -> (u64, i32); + #[link_name = "llvm.x86.rdseed.64"] + fn x86_rdseed64_step() -> (u64, i32); +} + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Read a hardware generated 64-bit random value and store the result in val. +/// Returns 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdrand64_step) +#[inline] +#[target_feature(enable = "rdrand")] +#[cfg_attr(test, assert_instr(rdrand))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdrand64_step(val: &mut u64) -> i32 { + let (v, flag) = x86_rdrand64_step(); + *val = v; + flag +} + +/// Read a 64-bit NIST SP800-90B and SP800-90C compliant random value and store +/// in val. Return 1 if a random value was generated, and 0 otherwise. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_rdseed64_step) +#[inline] +#[target_feature(enable = "rdseed")] +#[cfg_attr(test, assert_instr(rdseed))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _rdseed64_step(val: &mut u64) -> i32 { + let (v, flag) = x86_rdseed64_step(); + *val = v; + flag +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse.rs b/library/stdarch/crates/core_arch/src/x86_64/sse.rs new file mode 100644 index 000000000..ca6799c90 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse.rs @@ -0,0 +1,148 @@ +//! `x86_64` Streaming SIMD Extensions (SSE) + +use crate::core_arch::x86::*; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.sse.cvtss2si64"] + fn cvtss2si64(a: __m128) -> i64; + #[link_name = "llvm.x86.sse.cvttss2si64"] + fn cvttss2si64(a: __m128) -> i64; + #[link_name = "llvm.x86.sse.cvtsi642ss"] + fn cvtsi642ss(a: __m128, b: i64) -> __m128; +} + +/// Converts the lowest 32 bit float in the input vector to a 64 bit integer. +/// +/// The result is rounded according to the current rounding mode. If the result +/// cannot be represented as a 64 bit integer the result will be +/// `0x8000_0000_0000_0000` (`i64::MIN`) or trigger an invalid operation +/// floating point exception if unmasked (see +/// [`_mm_setcsr`](fn._mm_setcsr.html)). +/// +/// This corresponds to the `CVTSS2SI` instruction (with 64 bit output). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_si64) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtss_si64(a: __m128) -> i64 { + cvtss2si64(a) +} + +/// Converts the lowest 32 bit float in the input vector to a 64 bit integer +/// with truncation. +/// +/// The result is rounded always using truncation (round towards zero). If the +/// result cannot be represented as a 64 bit integer the result will be +/// `0x8000_0000_0000_0000` (`i64::MIN`) or an invalid operation floating +/// point exception if unmasked (see [`_mm_setcsr`](fn._mm_setcsr.html)). +/// +/// This corresponds to the `CVTTSS2SI` instruction (with 64 bit output). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttss_si64) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvttss2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvttss_si64(a: __m128) -> i64 { + cvttss2si64(a) +} + +/// Converts a 64 bit integer to a 32 bit float. The result vector is the input +/// vector `a` with the lowest 32 bit float replaced by the converted integer. +/// +/// This intrinsic corresponds to the `CVTSI2SS` instruction (with 64 bit +/// input). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_ss) +#[inline] +#[target_feature(enable = "sse")] +#[cfg_attr(test, assert_instr(cvtsi2ss))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi64_ss(a: __m128, b: i64) -> __m128 { + cvtsi642ss(a, b) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtss_si64() { + let inputs = &[ + (42.0f32, 42i64), + (-31.4, -31), + (-33.5, -34), + (-34.5, -34), + (4.0e10, 40_000_000_000), + (4.0e-10, 0), + (f32::NAN, i64::MIN), + (2147483500.1, 2147483520), + (9.223371e18, 9223370937343148032), + ]; + for i in 0..inputs.len() { + let (xi, e) = inputs[i]; + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvtss_si64(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvtss_si64({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvttss_si64() { + let inputs = &[ + (42.0f32, 42i64), + (-31.4, -31), + (-33.5, -33), + (-34.5, -34), + (10.999, 10), + (-5.99, -5), + (4.0e10, 40_000_000_000), + (4.0e-10, 0), + (f32::NAN, i64::MIN), + (2147483500.1, 2147483520), + (9.223371e18, 9223370937343148032), + (9.223372e18, i64::MIN), + ]; + for i in 0..inputs.len() { + let (xi, e) = inputs[i]; + let x = _mm_setr_ps(xi, 1.0, 3.0, 4.0); + let r = _mm_cvttss_si64(x); + assert_eq!( + e, r, + "TestCase #{} _mm_cvttss_si64({:?}) = {}, expected: {}", + i, x, r, e + ); + } + } + + #[simd_test(enable = "sse")] + unsafe fn test_mm_cvtsi64_ss() { + let inputs = &[ + (4555i64, 4555.0f32), + (322223333, 322223330.0), + (-432, -432.0), + (-322223333, -322223330.0), + (9223372036854775807, 9.223372e18), + (-9223372036854775808, -9.223372e18), + ]; + + for i in 0..inputs.len() { + let (x, f) = inputs[i]; + let a = _mm_setr_ps(5.0, 6.0, 7.0, 8.0); + let r = _mm_cvtsi64_ss(a, x); + let e = _mm_setr_ps(f, 6.0, 7.0, 8.0); + assert_eq_m128(e, r); + } + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse2.rs b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs new file mode 100644 index 000000000..f487a067f --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse2.rs @@ -0,0 +1,209 @@ +//! `x86_64`'s Streaming SIMD Extensions 2 (SSE2) + +use crate::{ + core_arch::{simd_llvm::*, x86::*}, + intrinsics, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.sse2.cvtsd2si64"] + fn cvtsd2si64(a: __m128d) -> i64; + #[link_name = "llvm.x86.sse2.cvttsd2si64"] + fn cvttsd2si64(a: __m128d) -> i64; +} + +/// Converts the lower double-precision (64-bit) floating-point element in a to +/// a 64-bit integer. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsd_si64(a: __m128d) -> i64 { + cvtsd2si64(a) +} + +/// Alias for `_mm_cvtsd_si64` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si64x) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsd_si64x(a: __m128d) -> i64 { + _mm_cvtsd_si64(a) +} + +/// Converts the lower double-precision (64-bit) floating-point element in `a` +/// to a 64-bit integer with truncation. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvttsd_si64(a: __m128d) -> i64 { + cvttsd2si64(a) +} + +/// Alias for `_mm_cvttsd_si64` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si64x) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvttsd2si))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvttsd_si64x(a: __m128d) -> i64 { + _mm_cvttsd_si64(a) +} + +/// Stores a 64-bit integer value in the specified memory location. +/// To minimize caching, the data is flagged as non-temporal (unlikely to be +/// used again soon). +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(movnti))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_stream_si64(mem_addr: *mut i64, a: i64) { + intrinsics::nontemporal_store(mem_addr, a); +} + +/// Returns a vector whose lowest element is `a` and all higher elements are +/// `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi64_si128(a: i64) -> __m128i { + _mm_set_epi64x(0, a) +} + +/// Returns a vector whose lowest element is `a` and all higher elements are +/// `0`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_si128) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi64x_si128(a: i64) -> __m128i { + _mm_cvtsi64_si128(a) +} + +/// Returns the lowest element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi128_si64(a: __m128i) -> i64 { + simd_extract(a.as_i64x2(), 0) +} + +/// Returns the lowest element of `a`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si64x) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(all(test, not(windows)), assert_instr(movq))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi128_si64x(a: __m128i) -> i64 { + _mm_cvtsi128_si64(a) +} + +/// Returns `a` with its lower element replaced by `b` after converting it to +/// an `f64`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi64_sd(a: __m128d, b: i64) -> __m128d { + simd_insert(a, 0, b as f64) +} + +/// Returns `a` with its lower element replaced by `b` after converting it to +/// an `f64`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi64x_sd) +#[inline] +#[target_feature(enable = "sse2")] +#[cfg_attr(test, assert_instr(cvtsi2sd))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_cvtsi64x_sd(a: __m128d, b: i64) -> __m128d { + _mm_cvtsi64_sd(a, b) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use std::boxed; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_si64() { + let r = _mm_cvtsd_si64(_mm_setr_pd(-2.0, 5.0)); + assert_eq!(r, -2_i64); + + let r = _mm_cvtsd_si64(_mm_setr_pd(f64::MAX, f64::MIN)); + assert_eq!(r, i64::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsd_si64x() { + let r = _mm_cvtsd_si64x(_mm_setr_pd(f64::NAN, f64::NAN)); + assert_eq!(r, i64::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttsd_si64() { + let a = _mm_setr_pd(-1.1, 2.2); + let r = _mm_cvttsd_si64(a); + assert_eq!(r, -1_i64); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvttsd_si64x() { + let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); + let r = _mm_cvttsd_si64x(a); + assert_eq!(r, i64::MIN); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_stream_si64() { + let a: i64 = 7; + let mut mem = boxed::Box::::new(-1); + _mm_stream_si64(&mut *mem as *mut i64, a); + assert_eq!(a, *mem); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi64_si128() { + let r = _mm_cvtsi64_si128(5); + assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi128_si64() { + let r = _mm_cvtsi128_si64(_mm_setr_epi64x(5, 0)); + assert_eq!(r, 5); + } + + #[simd_test(enable = "sse2")] + unsafe fn test_mm_cvtsi64_sd() { + let a = _mm_set1_pd(3.5); + let r = _mm_cvtsi64_sd(a, 5); + assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse41.rs b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs new file mode 100644 index 000000000..3d1ea0cf6 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse41.rs @@ -0,0 +1,62 @@ +//! `i686`'s Streaming SIMD Extensions 4.1 (SSE4.1) + +use crate::{ + core_arch::{simd_llvm::*, x86::*}, + mem::transmute, +}; + +#[cfg(test)] +use stdarch_test::assert_instr; + +/// Extracts an 64-bit integer from `a` selected with `IMM1` +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(pextrq, IMM1 = 1))] +#[rustc_legacy_const_generics(1)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_extract_epi64(a: __m128i) -> i64 { + static_assert_imm1!(IMM1); + simd_extract(a.as_i64x2(), IMM1 as u32) +} + +/// Returns a copy of `a` with the 64-bit integer from `i` inserted at a +/// location specified by `IMM1`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi64) +#[inline] +#[target_feature(enable = "sse4.1")] +#[cfg_attr(test, assert_instr(pinsrq, IMM1 = 0))] +#[rustc_legacy_const_generics(2)] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_insert_epi64(a: __m128i, i: i64) -> __m128i { + static_assert_imm1!(IMM1); + transmute(simd_insert(a.as_i64x2(), IMM1 as u32, i)) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + use stdarch_test::simd_test; + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_extract_epi64() { + let a = _mm_setr_epi64x(0, 1); + let r = _mm_extract_epi64::<1>(a); + assert_eq!(r, 1); + let r = _mm_extract_epi64::<0>(a); + assert_eq!(r, 0); + } + + #[simd_test(enable = "sse4.1")] + unsafe fn test_mm_insert_epi64() { + let a = _mm_set1_epi64x(0); + let e = _mm_setr_epi64x(0, 32); + let r = _mm_insert_epi64::<1>(a, 32); + assert_eq_m128i(r, e); + let e = _mm_setr_epi64x(32, 0); + let r = _mm_insert_epi64::<0>(a, 32); + assert_eq_m128i(r, e); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/sse42.rs b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs new file mode 100644 index 000000000..6b5d087c1 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/sse42.rs @@ -0,0 +1,37 @@ +//! `x86_64`'s Streaming SIMD Extensions 4.2 (SSE4.2) + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.sse42.crc32.64.64"] + fn crc32_64_64(crc: u64, v: u64) -> u64; +} + +/// Starting with the initial value in `crc`, return the accumulated +/// CRC32-C value for unsigned 64-bit integer `v`. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u64) +#[inline] +#[target_feature(enable = "sse4.2")] +#[cfg_attr(test, assert_instr(crc32))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _mm_crc32_u64(crc: u64, v: u64) -> u64 { + crc32_64_64(crc, v) +} + +#[cfg(test)] +mod tests { + use crate::core_arch::arch::x86_64::*; + + use stdarch_test::simd_test; + + #[simd_test(enable = "sse4.2")] + unsafe fn test_mm_crc32_u64() { + let crc = 0x7819dccd3e824; + let v = 0x2a22b845fed; + let i = _mm_crc32_u64(crc, v); + assert_eq!(i, 0xbb6cdc6c); + } +} diff --git a/library/stdarch/crates/core_arch/src/x86_64/xsave.rs b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs new file mode 100644 index 000000000..2afd3e433 --- /dev/null +++ b/library/stdarch/crates/core_arch/src/x86_64/xsave.rs @@ -0,0 +1,227 @@ +//! `x86_64`'s `xsave` and `xsaveopt` target feature intrinsics + +#![allow(clippy::module_name_repetitions)] + +#[cfg(test)] +use stdarch_test::assert_instr; + +#[allow(improper_ctypes)] +extern "C" { + #[link_name = "llvm.x86.xsave64"] + fn xsave64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xrstor64"] + fn xrstor64(p: *const u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsaveopt64"] + fn xsaveopt64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsavec64"] + fn xsavec64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xsaves64"] + fn xsaves64(p: *mut u8, hi: u32, lo: u32); + #[link_name = "llvm.x86.xrstors64"] + fn xrstors64(p: *const u8, hi: u32, lo: u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits `[62:0]` in `save_mask` and XCR0. +/// `mem_addr` must be aligned on a 64-byte boundary. +/// +/// The format of the XSAVE area is detailed in Section 13.4, “XSAVE Area,” of +/// Intel® 64 and IA-32 Architectures Software Developer’s Manual, Volume 1. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsave64) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xsave64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsave64(mem_addr: *mut u8, save_mask: u64) { + xsave64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial restore of the enabled processor states using +/// the state information stored in memory at `mem_addr`. +/// +/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstor64) +#[inline] +#[target_feature(enable = "xsave")] +#[cfg_attr(test, assert_instr(xrstor64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xrstor64(mem_addr: *const u8, rs_mask: u64) { + xrstor64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr`. +/// +/// State is saved based on bits `[62:0]` in `save_mask` and `XCR0`. +/// `mem_addr` must be aligned on a 64-byte boundary. The hardware may optimize +/// the manner in which data is saved. The performance of this instruction will +/// be equal to or better than using the `XSAVE64` instruction. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaveopt64) +#[inline] +#[target_feature(enable = "xsave,xsaveopt")] +#[cfg_attr(test, assert_instr(xsaveopt64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsaveopt64(mem_addr: *mut u8, save_mask: u64) { + xsaveopt64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory +/// at `mem_addr`. +/// +/// `xsavec` differs from `xsave` in that it uses compaction and that it may +/// use init optimization. State is saved based on bits `[62:0]` in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsavec64) +#[inline] +#[target_feature(enable = "xsave,xsavec")] +#[cfg_attr(test, assert_instr(xsavec64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsavec64(mem_addr: *mut u8, save_mask: u64) { + xsavec64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial save of the enabled processor states to memory at +/// `mem_addr` +/// +/// `xsaves` differs from xsave in that it can save state components +/// corresponding to bits set in `IA32_XSS` `MSR` and that it may use the +/// modified optimization. State is saved based on bits `[62:0]` in `save_mask` +/// and `XCR0`. `mem_addr` must be aligned on a 64-byte boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xsaves64) +#[inline] +#[target_feature(enable = "xsave,xsaves")] +#[cfg_attr(test, assert_instr(xsaves64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xsaves64(mem_addr: *mut u8, save_mask: u64) { + xsaves64(mem_addr, (save_mask >> 32) as u32, save_mask as u32); +} + +/// Performs a full or partial restore of the enabled processor states using the +/// state information stored in memory at `mem_addr`. +/// +/// `xrstors` differs from `xrstor` in that it can restore state components +/// corresponding to bits set in the `IA32_XSS` `MSR`; `xrstors` cannot restore +/// from an `xsave` area in which the extended region is in the standard form. +/// State is restored based on bits `[62:0]` in `rs_mask`, `XCR0`, and +/// `mem_addr.HEADER.XSTATE_BV`. `mem_addr` must be aligned on a 64-byte +/// boundary. +/// +/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_xrstors64) +#[inline] +#[target_feature(enable = "xsave,xsaves")] +#[cfg_attr(test, assert_instr(xrstors64))] +#[stable(feature = "simd_x86", since = "1.27.0")] +pub unsafe fn _xrstors64(mem_addr: *const u8, rs_mask: u64) { + xrstors64(mem_addr, (rs_mask >> 32) as u32, rs_mask as u32); +} + +// FIXME: https://github.com/rust-lang/stdarch/issues/209 +// All these tests fail with Intel SDE. +/* +#[cfg(test)] +mod tests { + use crate::core_arch::x86::x86_64::xsave; + use stdarch_test::simd_test; + use std::fmt; + + // FIXME: https://github.com/rust-lang/stdarch/issues/209 + #[repr(align(64))] + struct XsaveArea { + // max size for 256-bit registers is 800 bytes: + // see https://software.intel.com/en-us/node/682996 + // max size for 512-bit registers is 2560 bytes: + // FIXME: add source + data: [u8; 2560], + } + + impl XsaveArea { + fn new() -> XsaveArea { + XsaveArea { data: [0; 2560] } + } + fn ptr(&mut self) -> *mut u8 { + &mut self.data[0] as *mut _ as *mut u8 + } + } + + impl PartialEq for XsaveArea { + fn eq(&self, other: &XsaveArea) -> bool { + for i in 0..self.data.len() { + if self.data[i] != other.data[i] { + return false; + } + } + true + } + } + + impl fmt::Debug for XsaveArea { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "[")?; + for i in 0..self.data.len() { + write!(f, "{}", self.data[i])?; + if i != self.data.len() - 1 { + write!(f, ", ")?; + } + } + write!(f, "]") + } + } + + #[simd_test(enable = "xsave")] + unsafe fn xsave64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsave64(a.ptr(), m); + xsave::_xrstor64(a.ptr(), m); + xsave::_xsave64(b.ptr(), m); + assert_eq!(a, b); + } + + #[simd_test(enable = "xsave,xsaveopt")] + unsafe fn xsaveopt64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsaveopt64(a.ptr(), m); + xsave::_xrstor64(a.ptr(), m); + xsave::_xsaveopt64(b.ptr(), m); + assert_eq!(a, b); + } + + #[simd_test(enable = "xsave,xsavec")] + unsafe fn xsavec64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsavec64(a.ptr(), m); + xsave::_xrstor64(a.ptr(), m); + xsave::_xsavec64(b.ptr(), m); + assert_eq!(a, b); + } + + #[simd_test(enable = "xsave,xsaves")] + unsafe fn xsaves64() { + let m = 0xFFFFFFFFFFFFFFFF_u64; //< all registers + let mut a = XsaveArea::new(); + let mut b = XsaveArea::new(); + + xsave::_xsaves64(a.ptr(), m); + xsave::_xrstors64(a.ptr(), m); + xsave::_xsaves64(b.ptr(), m); + assert_eq!(a, b); + } +} +*/ -- cgit v1.2.3