summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86/avx512cd.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86/avx512cd.rs')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx512cd.rs1170
1 files changed, 1170 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512cd.rs b/library/stdarch/crates/core_arch/src/x86/avx512cd.rs
new file mode 100644
index 000000000..ac9d3aed3
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/avx512cd.rs
@@ -0,0 +1,1170 @@
+use crate::{
+ core_arch::{simd::*, simd_llvm::*, x86::*},
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmw_epi32&expand=553)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
+pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i {
+ _mm512_set1_epi32(k as i32)
+}
+
+/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmw_epi32&expand=552)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
+pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i {
+ _mm256_set1_epi32(k as i32)
+}
+
+/// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmw_epi32&expand=551)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d
+pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i {
+ _mm_set1_epi32(k as i32)
+}
+
+/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmb_epi64&expand=550)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
+pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i {
+ _mm512_set1_epi64(k as i64)
+}
+
+/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmb_epi64&expand=549)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
+pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i {
+ _mm256_set1_epi64x(k as i64)
+}
+
+/// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmb_epi64&expand=548)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q
+pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i {
+ _mm_set1_epi64x(k as i64)
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi32&expand=1248)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i {
+ transmute(vpconflictd(a.as_i32x16()))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi32&expand=1249)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+ let conflict = _mm512_conflict_epi32(a).as_i32x16();
+ transmute(simd_select_bitmask(k, conflict, src.as_i32x16()))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi32&expand=1250)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i {
+ let conflict = _mm512_conflict_epi32(a).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, conflict, zero))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi32&expand=1245)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i {
+ transmute(vpconflictd256(a.as_i32x8()))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi32&expand=1246)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+ let conflict = _mm256_conflict_epi32(a).as_i32x8();
+ transmute(simd_select_bitmask(k, conflict, src.as_i32x8()))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi32&expand=1247)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i {
+ let conflict = _mm256_conflict_epi32(a).as_i32x8();
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, conflict, zero))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi32&expand=1242)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i {
+ transmute(vpconflictd128(a.as_i32x4()))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi32&expand=1243)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let conflict = _mm_conflict_epi32(a).as_i32x4();
+ transmute(simd_select_bitmask(k, conflict, src.as_i32x4()))
+}
+
+/// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi32&expand=1244)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictd))]
+pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i {
+ let conflict = _mm_conflict_epi32(a).as_i32x4();
+ let zero = _mm_setzero_si128().as_i32x4();
+ transmute(simd_select_bitmask(k, conflict, zero))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi64&expand=1257)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i {
+ transmute(vpconflictq(a.as_i64x8()))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi64&expand=1258)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+ let conflict = _mm512_conflict_epi64(a).as_i64x8();
+ transmute(simd_select_bitmask(k, conflict, src.as_i64x8()))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi64&expand=1259)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i {
+ let conflict = _mm512_conflict_epi64(a).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, conflict, zero))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi64&expand=1254)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i {
+ transmute(vpconflictq256(a.as_i64x4()))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi64&expand=1255)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+ let conflict = _mm256_conflict_epi64(a).as_i64x4();
+ transmute(simd_select_bitmask(k, conflict, src.as_i64x4()))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi64&expand=1256)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i {
+ let conflict = _mm256_conflict_epi64(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, conflict, zero))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi64&expand=1251)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i {
+ transmute(vpconflictq128(a.as_i64x2()))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi64&expand=1252)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let conflict = _mm_conflict_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, conflict, src.as_i64x2()))
+}
+
+/// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi64&expand=1253)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vpconflictq))]
+pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let conflict = _mm_conflict_epi64(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, conflict, zero))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi32&expand=3491)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i {
+ transmute(vplzcntd(a.as_i32x16(), false))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi32&expand=3492)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
+ let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
+ transmute(simd_select_bitmask(k, zerocount, src.as_i32x16()))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi32&expand=3493)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i {
+ let zerocount = _mm512_lzcnt_epi32(a).as_i32x16();
+ let zero = _mm512_setzero_si512().as_i32x16();
+ transmute(simd_select_bitmask(k, zerocount, zero))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi32&expand=3488)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i {
+ transmute(vplzcntd256(a.as_i32x8(), false))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi32&expand=3489)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+ let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
+ transmute(simd_select_bitmask(k, zerocount, src.as_i32x8()))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi32&expand=3490)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i {
+ let zerocount = _mm256_lzcnt_epi32(a).as_i32x8();
+ let zero = _mm256_setzero_si256().as_i32x8();
+ transmute(simd_select_bitmask(k, zerocount, zero))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi32&expand=3485)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i {
+ transmute(vplzcntd128(a.as_i32x4(), false))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi32&expand=3486)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
+ transmute(simd_select_bitmask(k, zerocount, src.as_i32x4()))
+}
+
+/// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi32&expand=3487)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntd))]
+pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i {
+ let zerocount = _mm_lzcnt_epi32(a).as_i32x4();
+ let zero = _mm_setzero_si128().as_i32x4();
+ transmute(simd_select_bitmask(k, zerocount, zero))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi64&expand=3500)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i {
+ transmute(vplzcntq(a.as_i64x8(), false))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi64&expand=3501)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
+ let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
+ transmute(simd_select_bitmask(k, zerocount, src.as_i64x8()))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi64&expand=3502)
+#[inline]
+#[target_feature(enable = "avx512cd")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i {
+ let zerocount = _mm512_lzcnt_epi64(a).as_i64x8();
+ let zero = _mm512_setzero_si512().as_i64x8();
+ transmute(simd_select_bitmask(k, zerocount, zero))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi64&expand=3497)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i {
+ transmute(vplzcntq256(a.as_i64x4(), false))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi64&expand=3498)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
+ let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
+ transmute(simd_select_bitmask(k, zerocount, src.as_i64x4()))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi64&expand=3499)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i {
+ let zerocount = _mm256_lzcnt_epi64(a).as_i64x4();
+ let zero = _mm256_setzero_si256().as_i64x4();
+ transmute(simd_select_bitmask(k, zerocount, zero))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi64&expand=3494)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i {
+ transmute(vplzcntq128(a.as_i64x2(), false))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi64&expand=3495)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
+ let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
+ transmute(simd_select_bitmask(k, zerocount, src.as_i64x2()))
+}
+
+/// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi64&expand=3496)
+#[inline]
+#[target_feature(enable = "avx512cd,avx512vl")]
+#[cfg_attr(test, assert_instr(vplzcntq))]
+pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i {
+ let zerocount = _mm_lzcnt_epi64(a).as_i64x2();
+ let zero = _mm_setzero_si128().as_i64x2();
+ transmute(simd_select_bitmask(k, zerocount, zero))
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.avx512.conflict.d.512"]
+ fn vpconflictd(a: i32x16) -> i32x16;
+ #[link_name = "llvm.x86.avx512.conflict.d.256"]
+ fn vpconflictd256(a: i32x8) -> i32x8;
+ #[link_name = "llvm.x86.avx512.conflict.d.128"]
+ fn vpconflictd128(a: i32x4) -> i32x4;
+
+ #[link_name = "llvm.x86.avx512.conflict.q.512"]
+ fn vpconflictq(a: i64x8) -> i64x8;
+ #[link_name = "llvm.x86.avx512.conflict.q.256"]
+ fn vpconflictq256(a: i64x4) -> i64x4;
+ #[link_name = "llvm.x86.avx512.conflict.q.128"]
+ fn vpconflictq128(a: i64x2) -> i64x2;
+
+ #[link_name = "llvm.ctlz.v16i32"]
+ fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16;
+ #[link_name = "llvm.ctlz.v8i32"]
+ fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8;
+ #[link_name = "llvm.ctlz.v4i32"]
+ fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4;
+
+ #[link_name = "llvm.ctlz.v8i64"]
+ fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8;
+ #[link_name = "llvm.ctlz.v4i64"]
+ fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4;
+ #[link_name = "llvm.ctlz.v2i64"]
+ fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2;
+}
+
+#[cfg(test)]
+mod tests {
+
+ use crate::core_arch::x86::*;
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_broadcastmw_epi32() {
+ let a: __mmask16 = 2;
+ let r = _mm512_broadcastmw_epi32(a);
+ let e = _mm512_set1_epi32(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_broadcastmw_epi32() {
+ let a: __mmask16 = 2;
+ let r = _mm256_broadcastmw_epi32(a);
+ let e = _mm256_set1_epi32(2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_broadcastmw_epi32() {
+ let a: __mmask16 = 2;
+ let r = _mm_broadcastmw_epi32(a);
+ let e = _mm_set1_epi32(2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_broadcastmb_epi64() {
+ let a: __mmask8 = 2;
+ let r = _mm512_broadcastmb_epi64(a);
+ let e = _mm512_set1_epi64(2);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_broadcastmb_epi64() {
+ let a: __mmask8 = 2;
+ let r = _mm256_broadcastmb_epi64(a);
+ let e = _mm256_set1_epi64x(2);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_broadcastmb_epi64() {
+ let a: __mmask8 = 2;
+ let r = _mm_broadcastmb_epi64(a);
+ let e = _mm_set1_epi64x(2);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_conflict_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let r = _mm512_conflict_epi32(a);
+ let e = _mm512_set_epi32(
+ 1 << 14
+ | 1 << 13
+ | 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 13
+ | 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_mask_conflict_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let r = _mm512_mask_conflict_epi32(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a);
+ let e = _mm512_set_epi32(
+ 1 << 14
+ | 1 << 13
+ | 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 13
+ | 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_maskz_conflict_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let r = _mm512_maskz_conflict_epi32(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a);
+ let e = _mm512_set_epi32(
+ 1 << 14
+ | 1 << 13
+ | 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 13
+ | 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 12
+ | 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 11
+ | 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 10
+ | 1 << 9
+ | 1 << 8
+ | 1 << 7
+ | 1 << 6
+ | 1 << 5
+ | 1 << 4
+ | 1 << 3
+ | 1 << 2
+ | 1 << 1
+ | 1 << 0,
+ 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_conflict_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let r = _mm256_conflict_epi32(a);
+ let e = _mm256_set_epi32(
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_mask_conflict_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let r = _mm256_mask_conflict_epi32(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_conflict_epi32(a, 0b11111111, a);
+ let e = _mm256_set_epi32(
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_maskz_conflict_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let r = _mm256_maskz_conflict_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_conflict_epi32(0b11111111, a);
+ let e = _mm256_set_epi32(
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_conflict_epi32() {
+ let a = _mm_set1_epi32(1);
+ let r = _mm_conflict_epi32(a);
+ let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_mask_conflict_epi32() {
+ let a = _mm_set1_epi32(1);
+ let r = _mm_mask_conflict_epi32(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_conflict_epi32(a, 0b00001111, a);
+ let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_maskz_conflict_epi32() {
+ let a = _mm_set1_epi32(1);
+ let r = _mm_maskz_conflict_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_conflict_epi32(0b00001111, a);
+ let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_conflict_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let r = _mm512_conflict_epi64(a);
+ let e = _mm512_set_epi64(
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_mask_conflict_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let r = _mm512_mask_conflict_epi64(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_conflict_epi64(a, 0b11111111, a);
+ let e = _mm512_set_epi64(
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_maskz_conflict_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let r = _mm512_maskz_conflict_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_conflict_epi64(0b11111111, a);
+ let e = _mm512_set_epi64(
+ 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 2 | 1 << 1 | 1 << 0,
+ 1 << 1 | 1 << 0,
+ 1 << 0,
+ 0,
+ );
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_conflict_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let r = _mm256_conflict_epi64(a);
+ let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_mask_conflict_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let r = _mm256_mask_conflict_epi64(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_conflict_epi64(a, 0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_maskz_conflict_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let r = _mm256_maskz_conflict_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_conflict_epi64(0b00001111, a);
+ let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_conflict_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let r = _mm_conflict_epi64(a);
+ let e = _mm_set_epi64x(1 << 0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_mask_conflict_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let r = _mm_mask_conflict_epi64(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_conflict_epi64(a, 0b00000011, a);
+ let e = _mm_set_epi64x(1 << 0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_maskz_conflict_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let r = _mm_maskz_conflict_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_conflict_epi64(0b00000011, a);
+ let e = _mm_set_epi64x(1 << 0, 0);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_lzcnt_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let r = _mm512_lzcnt_epi32(a);
+ let e = _mm512_set1_epi32(31);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_mask_lzcnt_epi32() {
+ let a = _mm512_set1_epi32(1);
+ let r = _mm512_mask_lzcnt_epi32(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a);
+ let e = _mm512_set1_epi32(31);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_maskz_lzcnt_epi32() {
+ let a = _mm512_set1_epi32(2);
+ let r = _mm512_maskz_lzcnt_epi32(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a);
+ let e = _mm512_set1_epi32(30);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_lzcnt_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let r = _mm256_lzcnt_epi32(a);
+ let e = _mm256_set1_epi32(31);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_mask_lzcnt_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let r = _mm256_mask_lzcnt_epi32(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a);
+ let e = _mm256_set1_epi32(31);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_maskz_lzcnt_epi32() {
+ let a = _mm256_set1_epi32(1);
+ let r = _mm256_maskz_lzcnt_epi32(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_lzcnt_epi32(0b11111111, a);
+ let e = _mm256_set1_epi32(31);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_lzcnt_epi32() {
+ let a = _mm_set1_epi32(1);
+ let r = _mm_lzcnt_epi32(a);
+ let e = _mm_set1_epi32(31);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_mask_lzcnt_epi32() {
+ let a = _mm_set1_epi32(1);
+ let r = _mm_mask_lzcnt_epi32(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a);
+ let e = _mm_set1_epi32(31);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_maskz_lzcnt_epi32() {
+ let a = _mm_set1_epi32(1);
+ let r = _mm_maskz_lzcnt_epi32(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_lzcnt_epi32(0b00001111, a);
+ let e = _mm_set1_epi32(31);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_lzcnt_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let r = _mm512_lzcnt_epi64(a);
+ let e = _mm512_set1_epi64(63);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_mask_lzcnt_epi64() {
+ let a = _mm512_set1_epi64(1);
+ let r = _mm512_mask_lzcnt_epi64(a, 0, a);
+ assert_eq_m512i(r, a);
+ let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a);
+ let e = _mm512_set1_epi64(63);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd")]
+ unsafe fn test_mm512_maskz_lzcnt_epi64() {
+ let a = _mm512_set1_epi64(2);
+ let r = _mm512_maskz_lzcnt_epi64(0, a);
+ assert_eq_m512i(r, _mm512_setzero_si512());
+ let r = _mm512_maskz_lzcnt_epi64(0b11111111, a);
+ let e = _mm512_set1_epi64(62);
+ assert_eq_m512i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_lzcnt_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let r = _mm256_lzcnt_epi64(a);
+ let e = _mm256_set1_epi64x(63);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_mask_lzcnt_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let r = _mm256_mask_lzcnt_epi64(a, 0, a);
+ assert_eq_m256i(r, a);
+ let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a);
+ let e = _mm256_set1_epi64x(63);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm256_maskz_lzcnt_epi64() {
+ let a = _mm256_set1_epi64x(1);
+ let r = _mm256_maskz_lzcnt_epi64(0, a);
+ assert_eq_m256i(r, _mm256_setzero_si256());
+ let r = _mm256_maskz_lzcnt_epi64(0b00001111, a);
+ let e = _mm256_set1_epi64x(63);
+ assert_eq_m256i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_lzcnt_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let r = _mm_lzcnt_epi64(a);
+ let e = _mm_set1_epi64x(63);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_mask_lzcnt_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let r = _mm_mask_lzcnt_epi64(a, 0, a);
+ assert_eq_m128i(r, a);
+ let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a);
+ let e = _mm_set1_epi64x(63);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "avx512cd,avx512vl")]
+ unsafe fn test_mm_maskz_lzcnt_epi64() {
+ let a = _mm_set1_epi64x(1);
+ let r = _mm_maskz_lzcnt_epi64(0, a);
+ assert_eq_m128i(r, _mm_setzero_si128());
+ let r = _mm_maskz_lzcnt_epi64(0b00001111, a);
+ let e = _mm_set1_epi64x(63);
+ assert_eq_m128i(r, e);
+ }
+}