diff options
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86')
14 files changed, 201 insertions, 204 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs index ad9e68db6..f8e83a35b 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx.rs @@ -2450,7 +2450,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i { ) } -/// Broadcasts 16-bit integer `a` to all all elements of returned vector. +/// Broadcasts 16-bit integer `a` to all elements of returned vector. /// This intrinsic may generate the `vpbroadcastw`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16) diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs index 16add3dbb..8638b3136 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx2.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs @@ -1857,7 +1857,9 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25 #[cfg_attr(test, assert_instr(vpmaxsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaxsw(a.as_i16x16(), b.as_i16x16())) + let a = a.as_i16x16(); + let b = b.as_i16x16(); + transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b)) } /// Compares packed 32-bit integers in `a` and `b`, and returns the packed @@ -1869,7 +1871,9 @@ pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaxsd(a.as_i32x8(), b.as_i32x8())) + let a = a.as_i32x8(); + let b = b.as_i32x8(); + transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b)) } /// Compares packed 8-bit integers in `a` and `b`, and returns the packed @@ -1881,7 +1885,9 @@ pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaxsb(a.as_i8x32(), b.as_i8x32())) + let a = a.as_i8x32(); + let b = b.as_i8x32(); + transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b)) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns @@ -1893,7 +1899,9 @@ pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaxuw(a.as_u16x16(), b.as_u16x16())) + let a = a.as_u16x16(); + let b = b.as_u16x16(); + transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b)) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns @@ -1905,7 +1913,9 @@ pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaxud(a.as_u32x8(), b.as_u32x8())) + let a = a.as_u32x8(); + let b = b.as_u32x8(); + transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b)) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns @@ -1917,7 +1927,9 @@ pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmaxub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { - transmute(pmaxub(a.as_u8x32(), b.as_u8x32())) + let a = a.as_u8x32(); + let b = b.as_u8x32(); + transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b)) } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed @@ -1929,7 +1941,9 @@ pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpminsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { - transmute(pminsw(a.as_i16x16(), b.as_i16x16())) + let a = a.as_i16x16(); + let b = b.as_i16x16(); + transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b)) } /// Compares packed 32-bit integers in `a` and `b`, and returns the packed @@ -1941,7 +1955,9 @@ pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { - transmute(pminsd(a.as_i32x8(), b.as_i32x8())) + let a = a.as_i32x8(); + let b = b.as_i32x8(); + transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b)) } /// Compares packed 8-bit integers in `a` and `b`, and returns the packed @@ -1953,7 +1969,9 @@ pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { - transmute(pminsb(a.as_i8x32(), b.as_i8x32())) + let a = a.as_i8x32(); + let b = b.as_i8x32(); + transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b)) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns @@ -1965,7 +1983,9 @@ pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { - transmute(pminuw(a.as_u16x16(), b.as_u16x16())) + let a = a.as_u16x16(); + let b = b.as_u16x16(); + transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b)) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns @@ -1977,7 +1997,9 @@ pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpminud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { - transmute(pminud(a.as_u32x8(), b.as_u32x8())) + let a = a.as_u32x8(); + let b = b.as_u32x8(); + transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b)) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns @@ -1989,7 +2011,9 @@ pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpminub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { - transmute(pminub(a.as_u8x32(), b.as_u8x32())) + let a = a.as_u8x32(); + let b = b.as_u8x32(); + transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b)) } /// Creates mask from the most significant bit of each 8-bit element in `a`, @@ -2001,7 +2025,9 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i { #[cfg_attr(test, assert_instr(vpmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 { - simd_bitmask::<_, u32>(a.as_i8x32()) as i32 + let z = i8x32::splat(0); + let m: i8x32 = simd_lt(a.as_i8x32(), z); + simd_bitmask::<_, u32>(m) as i32 } /// Computes the sum of absolute differences (SADs) of quadruplets of unsigned @@ -3618,30 +3644,6 @@ extern "C" { fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2); #[link_name = "llvm.x86.avx2.maskstore.q.256"] fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4); - #[link_name = "llvm.x86.avx2.pmaxs.w"] - fn pmaxsw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.pmaxs.d"] - fn pmaxsd(a: i32x8, b: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx2.pmaxs.b"] - fn pmaxsb(a: i8x32, b: i8x32) -> i8x32; - #[link_name = "llvm.x86.avx2.pmaxu.w"] - fn pmaxuw(a: u16x16, b: u16x16) -> u16x16; - #[link_name = "llvm.x86.avx2.pmaxu.d"] - fn pmaxud(a: u32x8, b: u32x8) -> u32x8; - #[link_name = "llvm.x86.avx2.pmaxu.b"] - fn pmaxub(a: u8x32, b: u8x32) -> u8x32; - #[link_name = "llvm.x86.avx2.pmins.w"] - fn pminsw(a: i16x16, b: i16x16) -> i16x16; - #[link_name = "llvm.x86.avx2.pmins.d"] - fn pminsd(a: i32x8, b: i32x8) -> i32x8; - #[link_name = "llvm.x86.avx2.pmins.b"] - fn pminsb(a: i8x32, b: i8x32) -> i8x32; - #[link_name = "llvm.x86.avx2.pminu.w"] - fn pminuw(a: u16x16, b: u16x16) -> u16x16; - #[link_name = "llvm.x86.avx2.pminu.d"] - fn pminud(a: u32x8, b: u32x8) -> u32x8; - #[link_name = "llvm.x86.avx2.pminu.b"] - fn pminub(a: u8x32, b: u8x32) -> u8x32; #[link_name = "llvm.x86.avx2.mpsadbw"] fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16; #[link_name = "llvm.x86.avx2.pmulhu.w"] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs index e9977e018..b21ededab 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs @@ -80,7 +80,7 @@ pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh { } /// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b -/// to packed BF16 (16-bit) floating-point elements and and store the results in single vector +/// to packed BF16 (16-bit) floating-point elements and store the results in single vector /// dst using writemask k (elements are copied from src when the corresponding mask bit is not set). /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654&avx512techs=AVX512_BF16&text=_mm256_mask_cvtne2ps_pbh) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs index 3c9df3912..1099ee2cb 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs @@ -303,7 +303,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __ } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. -/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. /// It then selects these bits and packs them into the output. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bitshuffle_epi64_mask) @@ -315,7 +315,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. -/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. /// It then selects these bits and packs them into the output. /// /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. @@ -330,7 +330,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. -/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. /// It then selects these bits and packs them into the output. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bitshuffle_epi64_mask) @@ -342,7 +342,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. -/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. /// It then selects these bits and packs them into the output. /// /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. @@ -357,7 +357,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. -/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. /// It then selects these bits and packs them into the output. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bitshuffle_epi64_mask) @@ -369,7 +369,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 { } /// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers. -/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer. +/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer. /// It then selects these bits and packs them into the output. /// /// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set. diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs index 49d78ed60..fbf71dfc4 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs @@ -7450,7 +7450,7 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i { transmute(simd_select_bitmask(k, r, zero)) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst. +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221) #[inline] @@ -7501,7 +7501,7 @@ pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i { transmute(r) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219) #[inline] @@ -7518,7 +7518,7 @@ pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>( transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220) #[inline] @@ -7532,7 +7532,7 @@ pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216) #[inline] @@ -7549,7 +7549,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>( transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217) #[inline] @@ -7563,7 +7563,7 @@ pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213) #[inline] @@ -7580,7 +7580,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>( transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) } -/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214) #[inline] @@ -7594,7 +7594,7 @@ pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero)) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst. +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212) #[inline] @@ -7645,7 +7645,7 @@ pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i { transmute(r) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210) #[inline] @@ -7662,7 +7662,7 @@ pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>( transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32())) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211) #[inline] @@ -7676,7 +7676,7 @@ pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m transmute(simd_select_bitmask(k, r.as_i16x32(), zero)) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207) #[inline] @@ -7693,7 +7693,7 @@ pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>( transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16())) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208) #[inline] @@ -7707,7 +7707,7 @@ pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero)) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204) #[inline] @@ -7724,7 +7724,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>( transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8())) } -/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). +/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set). /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs index f70a28466..0ddb51283 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs +++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs @@ -26268,7 +26268,7 @@ pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i { transmute(i8x64::splat(a)) } -/// Broadcast the low packed 16-bit integer from a to all all elements of dst. +/// Broadcast the low packed 16-bit integer from a to all elements of dst. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi16&expand=4944) #[inline] diff --git a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs b/library/stdarch/crates/core_arch/src/x86/gfni.rs index 66fd1c2e1..679b2548a 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs +++ b/library/stdarch/crates/core_arch/src/x86/gfni.rs @@ -65,7 +65,7 @@ extern "C" { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i { transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64())) @@ -80,7 +80,7 @@ pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm512_mask_gf2p8mul_epi8( src: __m512i, @@ -104,7 +104,7 @@ pub unsafe fn _mm512_mask_gf2p8mul_epi8( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i { let zero = _mm512_setzero_si512().as_i8x64(); @@ -121,7 +121,7 @@ pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) - /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i { transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32())) @@ -136,7 +136,7 @@ pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm256_mask_gf2p8mul_epi8( src: __m256i, @@ -160,7 +160,7 @@ pub unsafe fn _mm256_mask_gf2p8mul_epi8( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i { let zero = _mm256_setzero_si256().as_i8x32(); @@ -177,8 +177,8 @@ pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) - /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vgf2p8mulb))] +#[target_feature(enable = "gfni")] +#[cfg_attr(test, assert_instr(gf2p8mulb))] pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i { transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16())) } @@ -192,7 +192,7 @@ pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm_mask_gf2p8mul_epi8( src: __m128i, @@ -216,7 +216,7 @@ pub unsafe fn _mm_mask_gf2p8mul_epi8( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8mul_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8mulb))] pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i { let zero = _mm_setzero_si128().as_i8x16(); @@ -234,7 +234,7 @@ pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _ /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i { @@ -256,7 +256,7 @@ pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>( @@ -283,7 +283,7 @@ pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>( @@ -307,7 +307,7 @@ pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i { @@ -329,7 +329,7 @@ pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>( @@ -356,7 +356,7 @@ pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>( @@ -380,8 +380,8 @@ pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] +#[target_feature(enable = "gfni")] +#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i { static_assert_imm8!(B); @@ -402,7 +402,7 @@ pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) - /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>( @@ -429,7 +429,7 @@ pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affine_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>( @@ -455,7 +455,7 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i { @@ -479,7 +479,7 @@ pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m5 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>( @@ -508,7 +508,7 @@ pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512f")] +#[target_feature(enable = "gfni,avx512bw,avx512f")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>( @@ -534,7 +534,7 @@ pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i { @@ -558,7 +558,7 @@ pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m2 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>( @@ -587,7 +587,7 @@ pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>( @@ -613,8 +613,8 @@ pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] -#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] +#[target_feature(enable = "gfni")] +#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i { static_assert_imm8!(B); @@ -637,7 +637,7 @@ pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(3)] pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>( @@ -666,7 +666,7 @@ pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>( /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affineinv_epi64_epi8) #[inline] -#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")] +#[target_feature(enable = "gfni,avx512bw,avx512vl")] #[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))] #[rustc_legacy_const_generics(4)] pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>( @@ -847,7 +847,7 @@ mod tests { _mm512_loadu_si512(black_box(pointer)) } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_gf2p8mul_epi8() { let (left, right, expected) = generate_byte_mul_test_data(); @@ -860,7 +860,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_maskz_gf2p8mul_epi8() { let (left, right, _expected) = generate_byte_mul_test_data(); @@ -879,7 +879,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_mask_gf2p8mul_epi8() { let (left, right, _expected) = generate_byte_mul_test_data(); @@ -897,7 +897,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_gf2p8mul_epi8() { let (left, right, expected) = generate_byte_mul_test_data(); @@ -910,7 +910,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_maskz_gf2p8mul_epi8() { let (left, right, _expected) = generate_byte_mul_test_data(); @@ -929,7 +929,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_mask_gf2p8mul_epi8() { let (left, right, _expected) = generate_byte_mul_test_data(); @@ -947,7 +947,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_gf2p8mul_epi8() { let (left, right, expected) = generate_byte_mul_test_data(); @@ -960,7 +960,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_maskz_gf2p8mul_epi8() { let (left, right, _expected) = generate_byte_mul_test_data(); @@ -979,7 +979,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_mask_gf2p8mul_epi8() { let (left, right, _expected) = generate_byte_mul_test_data(); @@ -997,7 +997,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_gf2p8affine_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1031,7 +1031,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1053,7 +1053,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1074,7 +1074,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_gf2p8affine_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1108,7 +1108,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1130,7 +1130,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1151,7 +1151,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_gf2p8affine_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1185,7 +1185,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1206,7 +1206,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1227,7 +1227,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1271,7 +1271,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1293,7 +1293,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw")] + #[simd_test(enable = "gfni,avx512bw")] unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1315,7 +1315,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1359,7 +1359,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1381,7 +1381,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1403,7 +1403,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_gf2p8affineinv_epi64_epi8() { let identity: i64 = 0x01_02_04_08_10_20_40_80; const IDENTITY_BYTE: i32 = 0; @@ -1447,7 +1447,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); @@ -1469,7 +1469,7 @@ mod tests { } } - #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")] + #[simd_test(enable = "gfni,avx512bw,avx512vl")] unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() { const CONSTANT_BYTE: i32 = 0x63; let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8); diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs index 6b50e95b2..37045e40e 100644 --- a/library/stdarch/crates/core_arch/src/x86/mod.rs +++ b/library/stdarch/crates/core_arch/src/x86/mod.rs @@ -835,17 +835,17 @@ pub use self::avx512vnni::*; mod avx512bitalg; pub use self::avx512bitalg::*; -mod avx512gfni; -pub use self::avx512gfni::*; +mod gfni; +pub use self::gfni::*; mod avx512vpopcntdq; pub use self::avx512vpopcntdq::*; -mod avx512vaes; -pub use self::avx512vaes::*; +mod vaes; +pub use self::vaes::*; -mod avx512vpclmulqdq; -pub use self::avx512vpclmulqdq::*; +mod vpclmulqdq; +pub use self::vpclmulqdq::*; mod bt; pub use self::bt::*; diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs index 03c3a14a5..f21288970 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse.rs @@ -1080,10 +1080,7 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 { /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps) #[inline] #[target_feature(enable = "sse")] -// FIXME: LLVM9 trunk has the following bug: -// https://github.com/rust-lang/stdarch/issues/794 -// so we only temporarily test this on i686 and x86_64 but not on i586: -#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(movmskps))] +#[cfg_attr(test, assert_instr(movmskps))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 { movmskps(a) diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs index 3e79b3539..cde4bc316 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse2.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs @@ -203,7 +203,9 @@ pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pmaxsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) + let a = a.as_i16x8(); + let b = b.as_i16x8(); + transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b)) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the @@ -215,7 +217,9 @@ pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pmaxub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) + let a = a.as_u8x16(); + let b = b.as_u8x16(); + transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b)) } /// Compares packed 16-bit integers in `a` and `b`, and returns the packed @@ -227,7 +231,9 @@ pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pminsw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { - transmute(pminsw(a.as_i16x8(), b.as_i16x8())) + let a = a.as_i16x8(); + let b = b.as_i16x8(); + transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b)) } /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the @@ -239,7 +245,9 @@ pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pminub))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { - transmute(pminub(a.as_u8x16(), b.as_u8x16())) + let a = a.as_u8x16(); + let b = b.as_u8x16(); + transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b)) } /// Multiplies the packed 16-bit integers in `a` and `b`. @@ -1378,7 +1386,9 @@ pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { #[cfg_attr(test, assert_instr(pmovmskb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { - simd_bitmask::<_, u16>(a.as_i8x16()) as u32 as i32 + let z = i8x16::splat(0); + let m: i8x16 = simd_lt(a.as_i8x16(), z); + simd_bitmask::<_, u16>(m) as u32 as i32 } /// Shuffles 32-bit integers in `a` using the control in `IMM8`. @@ -1409,7 +1419,7 @@ pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { /// `IMM8`. /// /// Put the results in the high 64 bits of the returned vector, with the low 64 -/// bits being copied from from `a`. +/// bits being copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16) #[inline] @@ -1441,7 +1451,7 @@ pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { /// `IMM8`. /// /// Put the results in the low 64 bits of the returned vector, with the high 64 -/// bits being copied from from `a`. +/// bits being copied from `a`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16) #[inline] @@ -2796,14 +2806,6 @@ extern "C" { fn pavgw(a: u16x8, b: u16x8) -> u16x8; #[link_name = "llvm.x86.sse2.pmadd.wd"] fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; - #[link_name = "llvm.x86.sse2.pmaxs.w"] - fn pmaxsw(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.pmaxu.b"] - fn pmaxub(a: u8x16, b: u8x16) -> u8x16; - #[link_name = "llvm.x86.sse2.pmins.w"] - fn pminsw(a: i16x8, b: i16x8) -> i16x8; - #[link_name = "llvm.x86.sse2.pminu.b"] - fn pminub(a: u8x16, b: u8x16) -> u8x16; #[link_name = "llvm.x86.sse2.pmulh.w"] fn pmulhw(a: i16x8, b: i16x8) -> i16x8; #[link_name = "llvm.x86.sse2.pmulhu.w"] diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs index 7c59f2702..3162ad7d9 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse41.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs @@ -281,7 +281,9 @@ pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { #[cfg_attr(test, assert_instr(pmaxsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaxsb(a.as_i8x16(), b.as_i8x16())) + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b)) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed @@ -293,7 +295,9 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pmaxuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaxuw(a.as_u16x8(), b.as_u16x8())) + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b)) } /// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum @@ -305,7 +309,9 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pmaxsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaxsd(a.as_i32x4(), b.as_i32x4())) + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b)) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed @@ -317,7 +323,9 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pmaxud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(pmaxud(a.as_u32x4(), b.as_u32x4())) + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b)) } /// Compares packed 8-bit integers in `a` and `b` and returns packed minimum @@ -329,7 +337,9 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pminsb))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { - transmute(pminsb(a.as_i8x16(), b.as_i8x16())) + let a = a.as_i8x16(); + let b = b.as_i8x16(); + transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b)) } /// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed @@ -341,7 +351,9 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pminuw))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { - transmute(pminuw(a.as_u16x8(), b.as_u16x8())) + let a = a.as_u16x8(); + let b = b.as_u16x8(); + transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b)) } /// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum @@ -353,7 +365,9 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pminsd))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { - transmute(pminsd(a.as_i32x4(), b.as_i32x4())) + let a = a.as_i32x4(); + let b = b.as_i32x4(); + transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b)) } /// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed @@ -365,7 +379,9 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i { #[cfg_attr(test, assert_instr(pminud))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i { - transmute(pminud(a.as_u32x4(), b.as_u32x4())) + let a = a.as_u32x4(); + let b = b.as_u32x4(); + transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b)) } /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers @@ -1122,22 +1138,6 @@ extern "C" { fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8; #[link_name = "llvm.x86.sse41.insertps"] fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128; - #[link_name = "llvm.x86.sse41.pmaxsb"] - fn pmaxsb(a: i8x16, b: i8x16) -> i8x16; - #[link_name = "llvm.x86.sse41.pmaxuw"] - fn pmaxuw(a: u16x8, b: u16x8) -> u16x8; - #[link_name = "llvm.x86.sse41.pmaxsd"] - fn pmaxsd(a: i32x4, b: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse41.pmaxud"] - fn pmaxud(a: u32x4, b: u32x4) -> u32x4; - #[link_name = "llvm.x86.sse41.pminsb"] - fn pminsb(a: i8x16, b: i8x16) -> i8x16; - #[link_name = "llvm.x86.sse41.pminuw"] - fn pminuw(a: u16x8, b: u16x8) -> u16x8; - #[link_name = "llvm.x86.sse41.pminsd"] - fn pminsd(a: i32x4, b: i32x4) -> i32x4; - #[link_name = "llvm.x86.sse41.pminud"] - fn pminud(a: u32x4, b: u32x4) -> u32x4; #[link_name = "llvm.x86.sse41.packusdw"] fn packusdw(a: i32x4, b: i32x4) -> u16x8; #[link_name = "llvm.x86.sse41.dppd"] diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs index f474b0671..4eb12480b 100644 --- a/library/stdarch/crates/core_arch/src/x86/sse42.rs +++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs @@ -614,7 +614,7 @@ mod tests { use crate::core_arch::x86::*; use std::ptr; - // Currently one cannot `load` a &[u8] that is is less than 16 + // Currently one cannot `load` a &[u8] that is less than 16 // in length. This makes loading strings less than 16 in length // a bit difficult. Rather than `load` and mutate the __m128i, // it is easier to memcpy the given string to a local slice with @@ -623,11 +623,7 @@ mod tests { unsafe fn str_to_m128i(s: &[u8]) -> __m128i { assert!(s.len() <= 16); let slice = &mut [0u8; 16]; - ptr::copy_nonoverlapping( - s.get_unchecked(0) as *const u8 as *const u8, - slice.get_unchecked_mut(0) as *mut u8 as *mut u8, - s.len(), - ); + ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len()); _mm_loadu_si128(slice.as_ptr() as *const _) } diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs b/library/stdarch/crates/core_arch/src/x86/vaes.rs index 676de312b..e09f8a113 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs +++ b/library/stdarch/crates/core_arch/src/x86/vaes.rs @@ -38,7 +38,7 @@ extern "C" { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512vl")] +#[target_feature(enable = "vaes")] #[cfg_attr(test, assert_instr(vaesenc))] pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i { aesenc_256(a, round_key) @@ -49,7 +49,7 @@ pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512vl")] +#[target_feature(enable = "vaes")] #[cfg_attr(test, assert_instr(vaesenclast))] pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i { aesenclast_256(a, round_key) @@ -60,7 +60,7 @@ pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512vl")] +#[target_feature(enable = "vaes")] #[cfg_attr(test, assert_instr(vaesdec))] pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i { aesdec_256(a, round_key) @@ -71,7 +71,7 @@ pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512vl")] +#[target_feature(enable = "vaes")] #[cfg_attr(test, assert_instr(vaesdeclast))] pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i { aesdeclast_256(a, round_key) @@ -82,7 +82,7 @@ pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512f")] +#[target_feature(enable = "vaes,avx512f")] #[cfg_attr(test, assert_instr(vaesenc))] pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i { aesenc_512(a, round_key) @@ -93,7 +93,7 @@ pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512f")] +#[target_feature(enable = "vaes,avx512f")] #[cfg_attr(test, assert_instr(vaesenclast))] pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i { aesenclast_512(a, round_key) @@ -104,7 +104,7 @@ pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512 /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512f")] +#[target_feature(enable = "vaes,avx512f")] #[cfg_attr(test, assert_instr(vaesdec))] pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i { aesdec_512(a, round_key) @@ -115,7 +115,7 @@ pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128) #[inline] -#[target_feature(enable = "avx512vaes,avx512f")] +#[target_feature(enable = "vaes,avx512f")] #[cfg_attr(test, assert_instr(vaesdeclast))] pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i { aesdeclast_512(a, round_key) @@ -138,7 +138,7 @@ mod tests { // ideally we'd be using quickcheck here instead #[target_feature(enable = "avx2")] - unsafe fn helper_for_256_avx512vaes( + unsafe fn helper_for_256_vaes( linear: unsafe fn(__m128i, __m128i) -> __m128i, vectorized: unsafe fn(__m256i, __m256i) -> __m256i, ) { @@ -187,7 +187,7 @@ mod tests { setup_state_key(_mm512_broadcast_i32x4) } - #[simd_test(enable = "avx512vaes,avx512vl")] + #[simd_test(enable = "vaes,avx512vl")] unsafe fn test_mm256_aesdec_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. let (a, k) = setup_state_key_256(); @@ -196,10 +196,10 @@ mod tests { let r = _mm256_aesdec_epi128(a, k); assert_eq_m256i(r, e); - helper_for_256_avx512vaes(_mm_aesdec_si128, _mm256_aesdec_epi128); + helper_for_256_vaes(_mm_aesdec_si128, _mm256_aesdec_epi128); } - #[simd_test(enable = "avx512vaes,avx512vl")] + #[simd_test(enable = "vaes,avx512vl")] unsafe fn test_mm256_aesdeclast_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. let (a, k) = setup_state_key_256(); @@ -208,10 +208,10 @@ mod tests { let r = _mm256_aesdeclast_epi128(a, k); assert_eq_m256i(r, e); - helper_for_256_avx512vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128); + helper_for_256_vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128); } - #[simd_test(enable = "avx512vaes,avx512vl")] + #[simd_test(enable = "vaes,avx512vl")] unsafe fn test_mm256_aesenc_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. // they are repeated appropriately @@ -221,10 +221,10 @@ mod tests { let r = _mm256_aesenc_epi128(a, k); assert_eq_m256i(r, e); - helper_for_256_avx512vaes(_mm_aesenc_si128, _mm256_aesenc_epi128); + helper_for_256_vaes(_mm_aesenc_si128, _mm256_aesenc_epi128); } - #[simd_test(enable = "avx512vaes,avx512vl")] + #[simd_test(enable = "vaes,avx512vl")] unsafe fn test_mm256_aesenclast_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. let (a, k) = setup_state_key_256(); @@ -233,11 +233,11 @@ mod tests { let r = _mm256_aesenclast_epi128(a, k); assert_eq_m256i(r, e); - helper_for_256_avx512vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128); + helper_for_256_vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128); } #[target_feature(enable = "avx512f")] - unsafe fn helper_for_512_avx512vaes( + unsafe fn helper_for_512_vaes( linear: unsafe fn(__m128i, __m128i) -> __m128i, vectorized: unsafe fn(__m512i, __m512i) -> __m512i, ) { @@ -282,7 +282,7 @@ mod tests { assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]); } - #[simd_test(enable = "avx512vaes,avx512f")] + #[simd_test(enable = "vaes,avx512f")] unsafe fn test_mm512_aesdec_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx. let (a, k) = setup_state_key_512(); @@ -291,10 +291,10 @@ mod tests { let r = _mm512_aesdec_epi128(a, k); assert_eq_m512i(r, e); - helper_for_512_avx512vaes(_mm_aesdec_si128, _mm512_aesdec_epi128); + helper_for_512_vaes(_mm_aesdec_si128, _mm512_aesdec_epi128); } - #[simd_test(enable = "avx512vaes,avx512f")] + #[simd_test(enable = "vaes,avx512f")] unsafe fn test_mm512_aesdeclast_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx. let (a, k) = setup_state_key_512(); @@ -303,10 +303,10 @@ mod tests { let r = _mm512_aesdeclast_epi128(a, k); assert_eq_m512i(r, e); - helper_for_512_avx512vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128); + helper_for_512_vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128); } - #[simd_test(enable = "avx512vaes,avx512f")] + #[simd_test(enable = "vaes,avx512f")] unsafe fn test_mm512_aesenc_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx. let (a, k) = setup_state_key_512(); @@ -315,10 +315,10 @@ mod tests { let r = _mm512_aesenc_epi128(a, k); assert_eq_m512i(r, e); - helper_for_512_avx512vaes(_mm_aesenc_si128, _mm512_aesenc_epi128); + helper_for_512_vaes(_mm_aesenc_si128, _mm512_aesenc_epi128); } - #[simd_test(enable = "avx512vaes,avx512f")] + #[simd_test(enable = "vaes,avx512f")] unsafe fn test_mm512_aesenclast_epi128() { // Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx. let (a, k) = setup_state_key_512(); @@ -327,6 +327,6 @@ mod tests { let r = _mm512_aesenclast_epi128(a, k); assert_eq_m512i(r, e); - helper_for_512_avx512vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128); + helper_for_512_vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128); } } diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs index 9bfeb903a..ea76708b8 100644 --- a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs +++ b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs @@ -32,7 +32,7 @@ extern "C" { /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_clmulepi64_epi128) #[inline] -#[target_feature(enable = "avx512vpclmulqdq,avx512f")] +#[target_feature(enable = "vpclmulqdq,avx512f")] // technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise #[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))] #[rustc_legacy_const_generics(2)] @@ -50,7 +50,7 @@ pub unsafe fn _mm512_clmulepi64_epi128<const IMM8: i32>(a: __m512i, b: __m512i) /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_clmulepi64_epi128) #[inline] -#[target_feature(enable = "avx512vpclmulqdq,avx512vl")] +#[target_feature(enable = "vpclmulqdq")] #[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))] #[rustc_legacy_const_generics(2)] pub unsafe fn _mm256_clmulepi64_epi128<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i { @@ -121,7 +121,7 @@ mod tests { // this function tests one of the possible 4 instances // with different inputs across lanes - #[target_feature(enable = "avx512vpclmulqdq,avx512f")] + #[target_feature(enable = "vpclmulqdq,avx512f")] unsafe fn verify_512_helper( linear: unsafe fn(__m128i, __m128i) -> __m128i, vectorized: unsafe fn(__m512i, __m512i) -> __m512i, @@ -162,7 +162,7 @@ mod tests { // this function tests one of the possible 4 instances // with different inputs across lanes for the VL version - #[target_feature(enable = "avx512vpclmulqdq,avx512vl")] + #[target_feature(enable = "vpclmulqdq,avx512vl")] unsafe fn verify_256_helper( linear: unsafe fn(__m128i, __m128i) -> __m128i, vectorized: unsafe fn(__m256i, __m256i) -> __m256i, @@ -204,7 +204,7 @@ mod tests { unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);} } - #[simd_test(enable = "avx512vpclmulqdq,avx512f")] + #[simd_test(enable = "vpclmulqdq,avx512f")] unsafe fn test_mm512_clmulepi64_epi128() { verify_kat_pclmul!( _mm512_broadcast_i32x4, @@ -230,7 +230,7 @@ mod tests { ); } - #[simd_test(enable = "avx512vpclmulqdq,avx512vl")] + #[simd_test(enable = "vpclmulqdq,avx512vl")] unsafe fn test_mm256_clmulepi64_epi128() { verify_kat_pclmul!( _mm256_broadcastsi128_si256, |