summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx.rs2
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx2.rs76
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx512bf16.rs2
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs12
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx512bw.rs28
-rw-r--r--library/stdarch/crates/core_arch/src/x86/avx512f.rs2
-rw-r--r--library/stdarch/crates/core_arch/src/x86/gfni.rs (renamed from library/stdarch/crates/core_arch/src/x86/avx512gfni.rs)114
-rw-r--r--library/stdarch/crates/core_arch/src/x86/mod.rs12
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse.rs5
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse2.rs32
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse41.rs48
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse42.rs8
-rw-r--r--library/stdarch/crates/core_arch/src/x86/vaes.rs (renamed from library/stdarch/crates/core_arch/src/x86/avx512vaes.rs)52
-rw-r--r--library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs (renamed from library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs)12
14 files changed, 201 insertions, 204 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/avx.rs b/library/stdarch/crates/core_arch/src/x86/avx.rs
index ad9e68db6..f8e83a35b 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx.rs
@@ -2450,7 +2450,7 @@ pub unsafe fn _mm256_set1_epi8(a: i8) -> __m256i {
)
}
-/// Broadcasts 16-bit integer `a` to all all elements of returned vector.
+/// Broadcasts 16-bit integer `a` to all elements of returned vector.
/// This intrinsic may generate the `vpbroadcastw`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_set1_epi16)
diff --git a/library/stdarch/crates/core_arch/src/x86/avx2.rs b/library/stdarch/crates/core_arch/src/x86/avx2.rs
index 16add3dbb..8638b3136 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx2.rs
@@ -1857,7 +1857,9 @@ pub unsafe fn _mm256_maskstore_epi64(mem_addr: *mut i64, mask: __m256i, a: __m25
#[cfg_attr(test, assert_instr(vpmaxsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
- transmute(pmaxsw(a.as_i16x16(), b.as_i16x16()))
+ let a = a.as_i16x16();
+ let b = b.as_i16x16();
+ transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
}
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1869,7 +1871,9 @@ pub unsafe fn _mm256_max_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
- transmute(pmaxsd(a.as_i32x8(), b.as_i32x8()))
+ let a = a.as_i32x8();
+ let b = b.as_i32x8();
+ transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
}
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1881,7 +1885,9 @@ pub unsafe fn _mm256_max_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
- transmute(pmaxsb(a.as_i8x32(), b.as_i8x32()))
+ let a = a.as_i8x32();
+ let b = b.as_i8x32();
+ transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1893,7 +1899,9 @@ pub unsafe fn _mm256_max_epi8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
- transmute(pmaxuw(a.as_u16x16(), b.as_u16x16()))
+ let a = a.as_u16x16();
+ let b = b.as_u16x16();
+ transmute(simd_select::<i16x16, _>(simd_gt(a, b), a, b))
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1905,7 +1913,9 @@ pub unsafe fn _mm256_max_epu16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
- transmute(pmaxud(a.as_u32x8(), b.as_u32x8()))
+ let a = a.as_u32x8();
+ let b = b.as_u32x8();
+ transmute(simd_select::<i32x8, _>(simd_gt(a, b), a, b))
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1917,7 +1927,9 @@ pub unsafe fn _mm256_max_epu32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmaxub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
- transmute(pmaxub(a.as_u8x32(), b.as_u8x32()))
+ let a = a.as_u8x32();
+ let b = b.as_u8x32();
+ transmute(simd_select::<i8x32, _>(simd_gt(a, b), a, b))
}
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -1929,7 +1941,9 @@ pub unsafe fn _mm256_max_epu8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
- transmute(pminsw(a.as_i16x16(), b.as_i16x16()))
+ let a = a.as_i16x16();
+ let b = b.as_i16x16();
+ transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
}
/// Compares packed 32-bit integers in `a` and `b`, and returns the packed
@@ -1941,7 +1955,9 @@ pub unsafe fn _mm256_min_epi16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
- transmute(pminsd(a.as_i32x8(), b.as_i32x8()))
+ let a = a.as_i32x8();
+ let b = b.as_i32x8();
+ transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
}
/// Compares packed 8-bit integers in `a` and `b`, and returns the packed
@@ -1953,7 +1969,9 @@ pub unsafe fn _mm256_min_epi32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
- transmute(pminsb(a.as_i8x32(), b.as_i8x32()))
+ let a = a.as_i8x32();
+ let b = b.as_i8x32();
+ transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns
@@ -1965,7 +1983,9 @@ pub unsafe fn _mm256_min_epi8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
- transmute(pminuw(a.as_u16x16(), b.as_u16x16()))
+ let a = a.as_u16x16();
+ let b = b.as_u16x16();
+ transmute(simd_select::<i16x16, _>(simd_lt(a, b), a, b))
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns
@@ -1977,7 +1997,9 @@ pub unsafe fn _mm256_min_epu16(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
- transmute(pminud(a.as_u32x8(), b.as_u32x8()))
+ let a = a.as_u32x8();
+ let b = b.as_u32x8();
+ transmute(simd_select::<i32x8, _>(simd_lt(a, b), a, b))
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns
@@ -1989,7 +2011,9 @@ pub unsafe fn _mm256_min_epu32(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpminub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
- transmute(pminub(a.as_u8x32(), b.as_u8x32()))
+ let a = a.as_u8x32();
+ let b = b.as_u8x32();
+ transmute(simd_select::<i8x32, _>(simd_lt(a, b), a, b))
}
/// Creates mask from the most significant bit of each 8-bit element in `a`,
@@ -2001,7 +2025,9 @@ pub unsafe fn _mm256_min_epu8(a: __m256i, b: __m256i) -> __m256i {
#[cfg_attr(test, assert_instr(vpmovmskb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm256_movemask_epi8(a: __m256i) -> i32 {
- simd_bitmask::<_, u32>(a.as_i8x32()) as i32
+ let z = i8x32::splat(0);
+ let m: i8x32 = simd_lt(a.as_i8x32(), z);
+ simd_bitmask::<_, u32>(m) as i32
}
/// Computes the sum of absolute differences (SADs) of quadruplets of unsigned
@@ -3618,30 +3644,6 @@ extern "C" {
fn maskstoreq(mem_addr: *mut i8, mask: i64x2, a: i64x2);
#[link_name = "llvm.x86.avx2.maskstore.q.256"]
fn maskstoreq256(mem_addr: *mut i8, mask: i64x4, a: i64x4);
- #[link_name = "llvm.x86.avx2.pmaxs.w"]
- fn pmaxsw(a: i16x16, b: i16x16) -> i16x16;
- #[link_name = "llvm.x86.avx2.pmaxs.d"]
- fn pmaxsd(a: i32x8, b: i32x8) -> i32x8;
- #[link_name = "llvm.x86.avx2.pmaxs.b"]
- fn pmaxsb(a: i8x32, b: i8x32) -> i8x32;
- #[link_name = "llvm.x86.avx2.pmaxu.w"]
- fn pmaxuw(a: u16x16, b: u16x16) -> u16x16;
- #[link_name = "llvm.x86.avx2.pmaxu.d"]
- fn pmaxud(a: u32x8, b: u32x8) -> u32x8;
- #[link_name = "llvm.x86.avx2.pmaxu.b"]
- fn pmaxub(a: u8x32, b: u8x32) -> u8x32;
- #[link_name = "llvm.x86.avx2.pmins.w"]
- fn pminsw(a: i16x16, b: i16x16) -> i16x16;
- #[link_name = "llvm.x86.avx2.pmins.d"]
- fn pminsd(a: i32x8, b: i32x8) -> i32x8;
- #[link_name = "llvm.x86.avx2.pmins.b"]
- fn pminsb(a: i8x32, b: i8x32) -> i8x32;
- #[link_name = "llvm.x86.avx2.pminu.w"]
- fn pminuw(a: u16x16, b: u16x16) -> u16x16;
- #[link_name = "llvm.x86.avx2.pminu.d"]
- fn pminud(a: u32x8, b: u32x8) -> u32x8;
- #[link_name = "llvm.x86.avx2.pminu.b"]
- fn pminub(a: u8x32, b: u8x32) -> u8x32;
#[link_name = "llvm.x86.avx2.mpsadbw"]
fn mpsadbw(a: u8x32, b: u8x32, imm8: i32) -> u16x16;
#[link_name = "llvm.x86.avx2.pmulhu.w"]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
index e9977e018..b21ededab 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bf16.rs
@@ -80,7 +80,7 @@ pub unsafe fn _mm256_cvtne2ps_pbh(a: __m256, b: __m256) -> __m256bh {
}
/// Convert packed single-precision (32-bit) floating-point elements in two vectors a and b
-/// to packed BF16 (16-bit) floating-point elements and and store the results in single vector
+/// to packed BF16 (16-bit) floating-point elements and store the results in single vector
/// dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769,1651,1654&avx512techs=AVX512_BF16&text=_mm256_mask_cvtne2ps_pbh)
#[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
index 3c9df3912..1099ee2cb 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bitalg.rs
@@ -303,7 +303,7 @@ pub unsafe fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
/// It then selects these bits and packs them into the output.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_bitshuffle_epi64_mask)
@@ -315,7 +315,7 @@ pub unsafe fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
/// It then selects these bits and packs them into the output.
///
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
@@ -330,7 +330,7 @@ pub unsafe fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
/// It then selects these bits and packs them into the output.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_bitshuffle_epi64_mask)
@@ -342,7 +342,7 @@ pub unsafe fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
/// It then selects these bits and packs them into the output.
///
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
@@ -357,7 +357,7 @@ pub unsafe fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
/// It then selects these bits and packs them into the output.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bitshuffle_epi64_mask)
@@ -369,7 +369,7 @@ pub unsafe fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
}
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
-/// Then groups 8 8-bit values from `c`as indices into the the bits of the corresponding 64-bit integer.
+/// Then groups 8 8-bit values from `c`as indices into the bits of the corresponding 64-bit integer.
/// It then selects these bits and packs them into the output.
///
/// Uses the writemask in k - elements are zeroed in the result if the corresponding mask bit is not set.
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
index 49d78ed60..fbf71dfc4 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512bw.rs
@@ -7450,7 +7450,7 @@ pub unsafe fn _mm_maskz_set1_epi8(k: __mmask16, a: i8) -> __m128i {
transmute(simd_select_bitmask(k, r, zero))
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst.
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflelo_epi16&expand=5221)
#[inline]
@@ -7501,7 +7501,7 @@ pub unsafe fn _mm512_shufflelo_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
transmute(r)
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflelo_epi16&expand=5219)
#[inline]
@@ -7518,7 +7518,7 @@ pub unsafe fn _mm512_mask_shufflelo_epi16<const IMM8: i32>(
transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflelo_epi16&expand=5220)
#[inline]
@@ -7532,7 +7532,7 @@ pub unsafe fn _mm512_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask32, a: __m
transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflelo_epi16&expand=5216)
#[inline]
@@ -7549,7 +7549,7 @@ pub unsafe fn _mm256_mask_shufflelo_epi16<const IMM8: i32>(
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflelo_epi16&expand=5217)
#[inline]
@@ -7563,7 +7563,7 @@ pub unsafe fn _mm256_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask16, a: __m
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflelo_epi16&expand=5213)
#[inline]
@@ -7580,7 +7580,7 @@ pub unsafe fn _mm_mask_shufflelo_epi16<const IMM8: i32>(
transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
}
-/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the low 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the low 64 bits of 128-bit lanes of dst, with the high 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflelo_epi16&expand=5214)
#[inline]
@@ -7594,7 +7594,7 @@ pub unsafe fn _mm_maskz_shufflelo_epi16<const IMM8: i32>(k: __mmask8, a: __m128i
transmute(simd_select_bitmask(k, shuffle.as_i16x8(), zero))
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst.
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_shufflehi_epi16&expand=5212)
#[inline]
@@ -7645,7 +7645,7 @@ pub unsafe fn _mm512_shufflehi_epi16<const IMM8: i32>(a: __m512i) -> __m512i {
transmute(r)
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_shufflehi_epi16&expand=5210)
#[inline]
@@ -7662,7 +7662,7 @@ pub unsafe fn _mm512_mask_shufflehi_epi16<const IMM8: i32>(
transmute(simd_select_bitmask(k, r.as_i16x32(), src.as_i16x32()))
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_shufflehi_epi16&expand=5211)
#[inline]
@@ -7676,7 +7676,7 @@ pub unsafe fn _mm512_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask32, a: __m
transmute(simd_select_bitmask(k, r.as_i16x32(), zero))
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_shufflehi_epi16&expand=5207)
#[inline]
@@ -7693,7 +7693,7 @@ pub unsafe fn _mm256_mask_shufflehi_epi16<const IMM8: i32>(
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), src.as_i16x16()))
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_shufflehi_epi16&expand=5208)
#[inline]
@@ -7707,7 +7707,7 @@ pub unsafe fn _mm256_maskz_shufflehi_epi16<const IMM8: i32>(k: __mmask16, a: __m
transmute(simd_select_bitmask(k, shuffle.as_i16x16(), zero))
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using writemask k (elements are copied from src when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_shufflehi_epi16&expand=5204)
#[inline]
@@ -7724,7 +7724,7 @@ pub unsafe fn _mm_mask_shufflehi_epi16<const IMM8: i32>(
transmute(simd_select_bitmask(k, shuffle.as_i16x8(), src.as_i16x8()))
}
-/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
+/// Shuffle 16-bit integers in the high 64 bits of 128-bit lanes of a using the control in imm8. Store the results in the high 64 bits of 128-bit lanes of dst, with the low 64 bits of 128-bit lanes being copied from a to dst, using zeromask k (elements are zeroed out when the corresponding mask bit is not set).
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_shufflehi_epi16&expand=5205)
#[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512f.rs b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
index f70a28466..0ddb51283 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512f.rs
+++ b/library/stdarch/crates/core_arch/src/x86/avx512f.rs
@@ -26268,7 +26268,7 @@ pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
transmute(i8x64::splat(a))
}
-/// Broadcast the low packed 16-bit integer from a to all all elements of dst.
+/// Broadcast the low packed 16-bit integer from a to all elements of dst.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_set1_epi16&expand=4944)
#[inline]
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs b/library/stdarch/crates/core_arch/src/x86/gfni.rs
index 66fd1c2e1..679b2548a 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512gfni.rs
+++ b/library/stdarch/crates/core_arch/src/x86/gfni.rs
@@ -65,7 +65,7 @@ extern "C" {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
transmute(vgf2p8mulb_512(a.as_i8x64(), b.as_i8x64()))
@@ -80,7 +80,7 @@ pub unsafe fn _mm512_gf2p8mul_epi8(a: __m512i, b: __m512i) -> __m512i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm512_mask_gf2p8mul_epi8(
src: __m512i,
@@ -104,7 +104,7 @@ pub unsafe fn _mm512_mask_gf2p8mul_epi8(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -> __m512i {
let zero = _mm512_setzero_si512().as_i8x64();
@@ -121,7 +121,7 @@ pub unsafe fn _mm512_maskz_gf2p8mul_epi8(k: __mmask64, a: __m512i, b: __m512i) -
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
transmute(vgf2p8mulb_256(a.as_i8x32(), b.as_i8x32()))
@@ -136,7 +136,7 @@ pub unsafe fn _mm256_gf2p8mul_epi8(a: __m256i, b: __m256i) -> __m256i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm256_mask_gf2p8mul_epi8(
src: __m256i,
@@ -160,7 +160,7 @@ pub unsafe fn _mm256_mask_gf2p8mul_epi8(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -> __m256i {
let zero = _mm256_setzero_si256().as_i8x32();
@@ -177,8 +177,8 @@ pub unsafe fn _mm256_maskz_gf2p8mul_epi8(k: __mmask32, a: __m256i, b: __m256i) -
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8mulb))]
+#[target_feature(enable = "gfni")]
+#[cfg_attr(test, assert_instr(gf2p8mulb))]
pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
transmute(vgf2p8mulb_128(a.as_i8x16(), b.as_i8x16()))
}
@@ -192,7 +192,7 @@ pub unsafe fn _mm_gf2p8mul_epi8(a: __m128i, b: __m128i) -> __m128i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm_mask_gf2p8mul_epi8(
src: __m128i,
@@ -216,7 +216,7 @@ pub unsafe fn _mm_mask_gf2p8mul_epi8(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8mul_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8mulb))]
pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> __m128i {
let zero = _mm_setzero_si128().as_i8x16();
@@ -234,7 +234,7 @@ pub unsafe fn _mm_maskz_gf2p8mul_epi8(k: __mmask16, a: __m128i, b: __m128i) -> _
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
@@ -256,7 +256,7 @@ pub unsafe fn _mm512_gf2p8affine_epi64_epi8<const B: i32>(x: __m512i, a: __m512i
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
@@ -283,7 +283,7 @@ pub unsafe fn _mm512_maskz_gf2p8affine_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
@@ -307,7 +307,7 @@ pub unsafe fn _mm512_mask_gf2p8affine_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
@@ -329,7 +329,7 @@ pub unsafe fn _mm256_gf2p8affine_epi64_epi8<const B: i32>(x: __m256i, a: __m256i
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
@@ -356,7 +356,7 @@ pub unsafe fn _mm256_maskz_gf2p8affine_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
@@ -380,8 +380,8 @@ pub unsafe fn _mm256_mask_gf2p8affine_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
+#[target_feature(enable = "gfni")]
+#[cfg_attr(test, assert_instr(gf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
static_assert_imm8!(B);
@@ -402,7 +402,7 @@ pub unsafe fn _mm_gf2p8affine_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
@@ -429,7 +429,7 @@ pub unsafe fn _mm_maskz_gf2p8affine_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affine_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineqb, B = 0))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
@@ -455,7 +455,7 @@ pub unsafe fn _mm_mask_gf2p8affine_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m512i) -> __m512i {
@@ -479,7 +479,7 @@ pub unsafe fn _mm512_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m512i, a: __m5
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
@@ -508,7 +508,7 @@ pub unsafe fn _mm512_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512f")]
+#[target_feature(enable = "gfni,avx512bw,avx512f")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
@@ -534,7 +534,7 @@ pub unsafe fn _mm512_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m256i) -> __m256i {
@@ -558,7 +558,7 @@ pub unsafe fn _mm256_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m256i, a: __m2
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
@@ -587,7 +587,7 @@ pub unsafe fn _mm256_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
@@ -613,8 +613,8 @@ pub unsafe fn _mm256_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
-#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
+#[target_feature(enable = "gfni")]
+#[cfg_attr(test, assert_instr(gf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i) -> __m128i {
static_assert_imm8!(B);
@@ -637,7 +637,7 @@ pub unsafe fn _mm_gf2p8affineinv_epi64_epi8<const B: i32>(x: __m128i, a: __m128i
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(3)]
pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
@@ -666,7 +666,7 @@ pub unsafe fn _mm_maskz_gf2p8affineinv_epi64_epi8<const B: i32>(
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_gf2p8affineinv_epi64_epi8)
#[inline]
-#[target_feature(enable = "avx512gfni,avx512bw,avx512vl")]
+#[target_feature(enable = "gfni,avx512bw,avx512vl")]
#[cfg_attr(test, assert_instr(vgf2p8affineinvqb, B = 0))]
#[rustc_legacy_const_generics(4)]
pub unsafe fn _mm_mask_gf2p8affineinv_epi64_epi8<const B: i32>(
@@ -847,7 +847,7 @@ mod tests {
_mm512_loadu_si512(black_box(pointer))
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_gf2p8mul_epi8() {
let (left, right, expected) = generate_byte_mul_test_data();
@@ -860,7 +860,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_maskz_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
@@ -879,7 +879,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_mask_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
@@ -897,7 +897,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_gf2p8mul_epi8() {
let (left, right, expected) = generate_byte_mul_test_data();
@@ -910,7 +910,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
@@ -929,7 +929,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_mask_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
@@ -947,7 +947,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_gf2p8mul_epi8() {
let (left, right, expected) = generate_byte_mul_test_data();
@@ -960,7 +960,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_maskz_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
@@ -979,7 +979,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_mask_gf2p8mul_epi8() {
let (left, right, _expected) = generate_byte_mul_test_data();
@@ -997,7 +997,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_gf2p8affine_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
@@ -1031,7 +1031,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_maskz_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1053,7 +1053,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_mask_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1074,7 +1074,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_gf2p8affine_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
@@ -1108,7 +1108,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1130,7 +1130,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_mask_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1151,7 +1151,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_gf2p8affine_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
@@ -1185,7 +1185,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_maskz_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1206,7 +1206,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_mask_gf2p8affine_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1227,7 +1227,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_gf2p8affineinv_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
@@ -1271,7 +1271,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_maskz_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1293,7 +1293,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw")]
+ #[simd_test(enable = "gfni,avx512bw")]
unsafe fn test_mm512_mask_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1315,7 +1315,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_gf2p8affineinv_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
@@ -1359,7 +1359,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_maskz_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1381,7 +1381,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm256_mask_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1403,7 +1403,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_gf2p8affineinv_epi64_epi8() {
let identity: i64 = 0x01_02_04_08_10_20_40_80;
const IDENTITY_BYTE: i32 = 0;
@@ -1447,7 +1447,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_maskz_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
@@ -1469,7 +1469,7 @@ mod tests {
}
}
- #[simd_test(enable = "avx512gfni,avx512bw,avx512vl")]
+ #[simd_test(enable = "gfni,avx512bw,avx512vl")]
unsafe fn test_mm_mask_gf2p8affineinv_epi64_epi8() {
const CONSTANT_BYTE: i32 = 0x63;
let (matrices, vectors, _expected) = generate_affine_mul_test_data(CONSTANT_BYTE as u8);
diff --git a/library/stdarch/crates/core_arch/src/x86/mod.rs b/library/stdarch/crates/core_arch/src/x86/mod.rs
index 6b50e95b2..37045e40e 100644
--- a/library/stdarch/crates/core_arch/src/x86/mod.rs
+++ b/library/stdarch/crates/core_arch/src/x86/mod.rs
@@ -835,17 +835,17 @@ pub use self::avx512vnni::*;
mod avx512bitalg;
pub use self::avx512bitalg::*;
-mod avx512gfni;
-pub use self::avx512gfni::*;
+mod gfni;
+pub use self::gfni::*;
mod avx512vpopcntdq;
pub use self::avx512vpopcntdq::*;
-mod avx512vaes;
-pub use self::avx512vaes::*;
+mod vaes;
+pub use self::vaes::*;
-mod avx512vpclmulqdq;
-pub use self::avx512vpclmulqdq::*;
+mod vpclmulqdq;
+pub use self::vpclmulqdq::*;
mod bt;
pub use self::bt::*;
diff --git a/library/stdarch/crates/core_arch/src/x86/sse.rs b/library/stdarch/crates/core_arch/src/x86/sse.rs
index 03c3a14a5..f21288970 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse.rs
@@ -1080,10 +1080,7 @@ pub unsafe fn _mm_movelh_ps(a: __m128, b: __m128) -> __m128 {
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_ps)
#[inline]
#[target_feature(enable = "sse")]
-// FIXME: LLVM9 trunk has the following bug:
-// https://github.com/rust-lang/stdarch/issues/794
-// so we only temporarily test this on i686 and x86_64 but not on i586:
-#[cfg_attr(all(test, target_feature = "sse2"), assert_instr(movmskps))]
+#[cfg_attr(test, assert_instr(movmskps))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movemask_ps(a: __m128) -> i32 {
movmskps(a)
diff --git a/library/stdarch/crates/core_arch/src/x86/sse2.rs b/library/stdarch/crates/core_arch/src/x86/sse2.rs
index 3e79b3539..cde4bc316 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse2.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse2.rs
@@ -203,7 +203,9 @@ pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
- transmute(pmaxsw(a.as_i16x8(), b.as_i16x8()))
+ let a = a.as_i16x8();
+ let b = b.as_i16x8();
+ transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@@ -215,7 +217,9 @@ pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
- transmute(pmaxub(a.as_u8x16(), b.as_u8x16()))
+ let a = a.as_u8x16();
+ let b = b.as_u8x16();
+ transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
}
/// Compares packed 16-bit integers in `a` and `b`, and returns the packed
@@ -227,7 +231,9 @@ pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminsw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
- transmute(pminsw(a.as_i16x8(), b.as_i16x8()))
+ let a = a.as_i16x8();
+ let b = b.as_i16x8();
+ transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
}
/// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the
@@ -239,7 +245,9 @@ pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminub))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i {
- transmute(pminub(a.as_u8x16(), b.as_u8x16()))
+ let a = a.as_u8x16();
+ let b = b.as_u8x16();
+ transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
}
/// Multiplies the packed 16-bit integers in `a` and `b`.
@@ -1378,7 +1386,9 @@ pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
#[cfg_attr(test, assert_instr(pmovmskb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 {
- simd_bitmask::<_, u16>(a.as_i8x16()) as u32 as i32
+ let z = i8x16::splat(0);
+ let m: i8x16 = simd_lt(a.as_i8x16(), z);
+ simd_bitmask::<_, u16>(m) as u32 as i32
}
/// Shuffles 32-bit integers in `a` using the control in `IMM8`.
@@ -1409,7 +1419,7 @@ pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i {
/// `IMM8`.
///
/// Put the results in the high 64 bits of the returned vector, with the low 64
-/// bits being copied from from `a`.
+/// bits being copied from `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16)
#[inline]
@@ -1441,7 +1451,7 @@ pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i {
/// `IMM8`.
///
/// Put the results in the low 64 bits of the returned vector, with the high 64
-/// bits being copied from from `a`.
+/// bits being copied from `a`.
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16)
#[inline]
@@ -2796,14 +2806,6 @@ extern "C" {
fn pavgw(a: u16x8, b: u16x8) -> u16x8;
#[link_name = "llvm.x86.sse2.pmadd.wd"]
fn pmaddwd(a: i16x8, b: i16x8) -> i32x4;
- #[link_name = "llvm.x86.sse2.pmaxs.w"]
- fn pmaxsw(a: i16x8, b: i16x8) -> i16x8;
- #[link_name = "llvm.x86.sse2.pmaxu.b"]
- fn pmaxub(a: u8x16, b: u8x16) -> u8x16;
- #[link_name = "llvm.x86.sse2.pmins.w"]
- fn pminsw(a: i16x8, b: i16x8) -> i16x8;
- #[link_name = "llvm.x86.sse2.pminu.b"]
- fn pminub(a: u8x16, b: u8x16) -> u8x16;
#[link_name = "llvm.x86.sse2.pmulh.w"]
fn pmulhw(a: i16x8, b: i16x8) -> i16x8;
#[link_name = "llvm.x86.sse2.pmulhu.w"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse41.rs b/library/stdarch/crates/core_arch/src/x86/sse41.rs
index 7c59f2702..3162ad7d9 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse41.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse41.rs
@@ -281,7 +281,9 @@ pub unsafe fn _mm_insert_epi32<const IMM8: i32>(a: __m128i, i: i32) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
- transmute(pmaxsb(a.as_i8x16(), b.as_i8x16()))
+ let a = a.as_i8x16();
+ let b = b.as_i8x16();
+ transmute(simd_select::<i8x16, _>(simd_gt(a, b), a, b))
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@@ -293,7 +295,9 @@ pub unsafe fn _mm_max_epi8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
- transmute(pmaxuw(a.as_u16x8(), b.as_u16x8()))
+ let a = a.as_u16x8();
+ let b = b.as_u16x8();
+ transmute(simd_select::<i16x8, _>(simd_gt(a, b), a, b))
}
/// Compares packed 32-bit integers in `a` and `b`, and returns packed maximum
@@ -305,7 +309,9 @@ pub unsafe fn _mm_max_epu16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
- transmute(pmaxsd(a.as_i32x4(), b.as_i32x4()))
+ let a = a.as_i32x4();
+ let b = b.as_i32x4();
+ transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@@ -317,7 +323,9 @@ pub unsafe fn _mm_max_epi32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pmaxud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
- transmute(pmaxud(a.as_u32x4(), b.as_u32x4()))
+ let a = a.as_u32x4();
+ let b = b.as_u32x4();
+ transmute(simd_select::<i32x4, _>(simd_gt(a, b), a, b))
}
/// Compares packed 8-bit integers in `a` and `b` and returns packed minimum
@@ -329,7 +337,9 @@ pub unsafe fn _mm_max_epu32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminsb))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
- transmute(pminsb(a.as_i8x16(), b.as_i8x16()))
+ let a = a.as_i8x16();
+ let b = b.as_i8x16();
+ transmute(simd_select::<i8x16, _>(simd_lt(a, b), a, b))
}
/// Compares packed unsigned 16-bit integers in `a` and `b`, and returns packed
@@ -341,7 +351,9 @@ pub unsafe fn _mm_min_epi8(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminuw))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
- transmute(pminuw(a.as_u16x8(), b.as_u16x8()))
+ let a = a.as_u16x8();
+ let b = b.as_u16x8();
+ transmute(simd_select::<i16x8, _>(simd_lt(a, b), a, b))
}
/// Compares packed 32-bit integers in `a` and `b`, and returns packed minimum
@@ -353,7 +365,9 @@ pub unsafe fn _mm_min_epu16(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminsd))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
- transmute(pminsd(a.as_i32x4(), b.as_i32x4()))
+ let a = a.as_i32x4();
+ let b = b.as_i32x4();
+ transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
}
/// Compares packed unsigned 32-bit integers in `a` and `b`, and returns packed
@@ -365,7 +379,9 @@ pub unsafe fn _mm_min_epi32(a: __m128i, b: __m128i) -> __m128i {
#[cfg_attr(test, assert_instr(pminud))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm_min_epu32(a: __m128i, b: __m128i) -> __m128i {
- transmute(pminud(a.as_u32x4(), b.as_u32x4()))
+ let a = a.as_u32x4();
+ let b = b.as_u32x4();
+ transmute(simd_select::<i32x4, _>(simd_lt(a, b), a, b))
}
/// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers
@@ -1122,22 +1138,6 @@ extern "C" {
fn pblendw(a: i16x8, b: i16x8, imm8: u8) -> i16x8;
#[link_name = "llvm.x86.sse41.insertps"]
fn insertps(a: __m128, b: __m128, imm8: u8) -> __m128;
- #[link_name = "llvm.x86.sse41.pmaxsb"]
- fn pmaxsb(a: i8x16, b: i8x16) -> i8x16;
- #[link_name = "llvm.x86.sse41.pmaxuw"]
- fn pmaxuw(a: u16x8, b: u16x8) -> u16x8;
- #[link_name = "llvm.x86.sse41.pmaxsd"]
- fn pmaxsd(a: i32x4, b: i32x4) -> i32x4;
- #[link_name = "llvm.x86.sse41.pmaxud"]
- fn pmaxud(a: u32x4, b: u32x4) -> u32x4;
- #[link_name = "llvm.x86.sse41.pminsb"]
- fn pminsb(a: i8x16, b: i8x16) -> i8x16;
- #[link_name = "llvm.x86.sse41.pminuw"]
- fn pminuw(a: u16x8, b: u16x8) -> u16x8;
- #[link_name = "llvm.x86.sse41.pminsd"]
- fn pminsd(a: i32x4, b: i32x4) -> i32x4;
- #[link_name = "llvm.x86.sse41.pminud"]
- fn pminud(a: u32x4, b: u32x4) -> u32x4;
#[link_name = "llvm.x86.sse41.packusdw"]
fn packusdw(a: i32x4, b: i32x4) -> u16x8;
#[link_name = "llvm.x86.sse41.dppd"]
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
index f474b0671..4eb12480b 100644
--- a/library/stdarch/crates/core_arch/src/x86/sse42.rs
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -614,7 +614,7 @@ mod tests {
use crate::core_arch::x86::*;
use std::ptr;
- // Currently one cannot `load` a &[u8] that is is less than 16
+ // Currently one cannot `load` a &[u8] that is less than 16
// in length. This makes loading strings less than 16 in length
// a bit difficult. Rather than `load` and mutate the __m128i,
// it is easier to memcpy the given string to a local slice with
@@ -623,11 +623,7 @@ mod tests {
unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
assert!(s.len() <= 16);
let slice = &mut [0u8; 16];
- ptr::copy_nonoverlapping(
- s.get_unchecked(0) as *const u8 as *const u8,
- slice.get_unchecked_mut(0) as *mut u8 as *mut u8,
- s.len(),
- );
+ ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len());
_mm_loadu_si128(slice.as_ptr() as *const _)
}
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs b/library/stdarch/crates/core_arch/src/x86/vaes.rs
index 676de312b..e09f8a113 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vaes.rs
+++ b/library/stdarch/crates/core_arch/src/x86/vaes.rs
@@ -38,7 +38,7 @@ extern "C" {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenc_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512vl")]
+#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenc_256(a, round_key)
@@ -49,7 +49,7 @@ pub unsafe fn _mm256_aesenc_epi128(a: __m256i, round_key: __m256i) -> __m256i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesenclast_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512vl")]
+#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesenclast_256(a, round_key)
@@ -60,7 +60,7 @@ pub unsafe fn _mm256_aesenclast_epi128(a: __m256i, round_key: __m256i) -> __m256
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdec_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512vl")]
+#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdec_256(a, round_key)
@@ -71,7 +71,7 @@ pub unsafe fn _mm256_aesdec_epi128(a: __m256i, round_key: __m256i) -> __m256i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_aesdeclast_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512vl")]
+#[target_feature(enable = "vaes")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256i {
aesdeclast_256(a, round_key)
@@ -82,7 +82,7 @@ pub unsafe fn _mm256_aesdeclast_epi128(a: __m256i, round_key: __m256i) -> __m256
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenc_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
+#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenc))]
pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenc_512(a, round_key)
@@ -93,7 +93,7 @@ pub unsafe fn _mm512_aesenc_epi128(a: __m512i, round_key: __m512i) -> __m512i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesenclast_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
+#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesenclast))]
pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesenclast_512(a, round_key)
@@ -104,7 +104,7 @@ pub unsafe fn _mm512_aesenclast_epi128(a: __m512i, round_key: __m512i) -> __m512
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdec_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
+#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdec))]
pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdec_512(a, round_key)
@@ -115,7 +115,7 @@ pub unsafe fn _mm512_aesdec_epi128(a: __m512i, round_key: __m512i) -> __m512i {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_aesdeclast_epi128)
#[inline]
-#[target_feature(enable = "avx512vaes,avx512f")]
+#[target_feature(enable = "vaes,avx512f")]
#[cfg_attr(test, assert_instr(vaesdeclast))]
pub unsafe fn _mm512_aesdeclast_epi128(a: __m512i, round_key: __m512i) -> __m512i {
aesdeclast_512(a, round_key)
@@ -138,7 +138,7 @@ mod tests {
// ideally we'd be using quickcheck here instead
#[target_feature(enable = "avx2")]
- unsafe fn helper_for_256_avx512vaes(
+ unsafe fn helper_for_256_vaes(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
) {
@@ -187,7 +187,7 @@ mod tests {
setup_state_key(_mm512_broadcast_i32x4)
}
- #[simd_test(enable = "avx512vaes,avx512vl")]
+ #[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesdec_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let (a, k) = setup_state_key_256();
@@ -196,10 +196,10 @@ mod tests {
let r = _mm256_aesdec_epi128(a, k);
assert_eq_m256i(r, e);
- helper_for_256_avx512vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
+ helper_for_256_vaes(_mm_aesdec_si128, _mm256_aesdec_epi128);
}
- #[simd_test(enable = "avx512vaes,avx512vl")]
+ #[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesdeclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
let (a, k) = setup_state_key_256();
@@ -208,10 +208,10 @@ mod tests {
let r = _mm256_aesdeclast_epi128(a, k);
assert_eq_m256i(r, e);
- helper_for_256_avx512vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
+ helper_for_256_vaes(_mm_aesdeclast_si128, _mm256_aesdeclast_epi128);
}
- #[simd_test(enable = "avx512vaes,avx512vl")]
+ #[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesenc_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
// they are repeated appropriately
@@ -221,10 +221,10 @@ mod tests {
let r = _mm256_aesenc_epi128(a, k);
assert_eq_m256i(r, e);
- helper_for_256_avx512vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
+ helper_for_256_vaes(_mm_aesenc_si128, _mm256_aesenc_epi128);
}
- #[simd_test(enable = "avx512vaes,avx512vl")]
+ #[simd_test(enable = "vaes,avx512vl")]
unsafe fn test_mm256_aesenclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
let (a, k) = setup_state_key_256();
@@ -233,11 +233,11 @@ mod tests {
let r = _mm256_aesenclast_epi128(a, k);
assert_eq_m256i(r, e);
- helper_for_256_avx512vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
+ helper_for_256_vaes(_mm_aesenclast_si128, _mm256_aesenclast_epi128);
}
#[target_feature(enable = "avx512f")]
- unsafe fn helper_for_512_avx512vaes(
+ unsafe fn helper_for_512_vaes(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
) {
@@ -282,7 +282,7 @@ mod tests {
assert_eq_m128i(_mm512_extracti32x4_epi32::<3>(r), e_decomp[3]);
}
- #[simd_test(enable = "avx512vaes,avx512f")]
+ #[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesdec_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664949.aspx.
let (a, k) = setup_state_key_512();
@@ -291,10 +291,10 @@ mod tests {
let r = _mm512_aesdec_epi128(a, k);
assert_eq_m512i(r, e);
- helper_for_512_avx512vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
+ helper_for_512_vaes(_mm_aesdec_si128, _mm512_aesdec_epi128);
}
- #[simd_test(enable = "avx512vaes,avx512f")]
+ #[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesdeclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714178.aspx.
let (a, k) = setup_state_key_512();
@@ -303,10 +303,10 @@ mod tests {
let r = _mm512_aesdeclast_epi128(a, k);
assert_eq_m512i(r, e);
- helper_for_512_avx512vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
+ helper_for_512_vaes(_mm_aesdeclast_si128, _mm512_aesdeclast_epi128);
}
- #[simd_test(enable = "avx512vaes,avx512f")]
+ #[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesenc_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc664810.aspx.
let (a, k) = setup_state_key_512();
@@ -315,10 +315,10 @@ mod tests {
let r = _mm512_aesenc_epi128(a, k);
assert_eq_m512i(r, e);
- helper_for_512_avx512vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
+ helper_for_512_vaes(_mm_aesenc_si128, _mm512_aesenc_epi128);
}
- #[simd_test(enable = "avx512vaes,avx512f")]
+ #[simd_test(enable = "vaes,avx512f")]
unsafe fn test_mm512_aesenclast_epi128() {
// Constants taken from https://msdn.microsoft.com/en-us/library/cc714136.aspx.
let (a, k) = setup_state_key_512();
@@ -327,6 +327,6 @@ mod tests {
let r = _mm512_aesenclast_epi128(a, k);
assert_eq_m512i(r, e);
- helper_for_512_avx512vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
+ helper_for_512_vaes(_mm_aesenclast_si128, _mm512_aesenclast_epi128);
}
}
diff --git a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
index 9bfeb903a..ea76708b8 100644
--- a/library/stdarch/crates/core_arch/src/x86/avx512vpclmulqdq.rs
+++ b/library/stdarch/crates/core_arch/src/x86/vpclmulqdq.rs
@@ -32,7 +32,7 @@ extern "C" {
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_clmulepi64_epi128)
#[inline]
-#[target_feature(enable = "avx512vpclmulqdq,avx512f")]
+#[target_feature(enable = "vpclmulqdq,avx512f")]
// technically according to Intel's documentation we don't need avx512f here, however LLVM gets confused otherwise
#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
@@ -50,7 +50,7 @@ pub unsafe fn _mm512_clmulepi64_epi128<const IMM8: i32>(a: __m512i, b: __m512i)
///
/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_clmulepi64_epi128)
#[inline]
-#[target_feature(enable = "avx512vpclmulqdq,avx512vl")]
+#[target_feature(enable = "vpclmulqdq")]
#[cfg_attr(test, assert_instr(vpclmul, IMM8 = 0))]
#[rustc_legacy_const_generics(2)]
pub unsafe fn _mm256_clmulepi64_epi128<const IMM8: i32>(a: __m256i, b: __m256i) -> __m256i {
@@ -121,7 +121,7 @@ mod tests {
// this function tests one of the possible 4 instances
// with different inputs across lanes
- #[target_feature(enable = "avx512vpclmulqdq,avx512f")]
+ #[target_feature(enable = "vpclmulqdq,avx512f")]
unsafe fn verify_512_helper(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m512i, __m512i) -> __m512i,
@@ -162,7 +162,7 @@ mod tests {
// this function tests one of the possible 4 instances
// with different inputs across lanes for the VL version
- #[target_feature(enable = "avx512vpclmulqdq,avx512vl")]
+ #[target_feature(enable = "vpclmulqdq,avx512vl")]
unsafe fn verify_256_helper(
linear: unsafe fn(__m128i, __m128i) -> __m128i,
vectorized: unsafe fn(__m256i, __m256i) -> __m256i,
@@ -204,7 +204,7 @@ mod tests {
unroll! {assert_eq_m128i(_mm256_extracti128_si256::<2>(r),e_decomp[2]);}
}
- #[simd_test(enable = "avx512vpclmulqdq,avx512f")]
+ #[simd_test(enable = "vpclmulqdq,avx512f")]
unsafe fn test_mm512_clmulepi64_epi128() {
verify_kat_pclmul!(
_mm512_broadcast_i32x4,
@@ -230,7 +230,7 @@ mod tests {
);
}
- #[simd_test(enable = "avx512vpclmulqdq,avx512vl")]
+ #[simd_test(enable = "vpclmulqdq,avx512vl")]
unsafe fn test_mm256_clmulepi64_epi128() {
verify_kat_pclmul!(
_mm256_broadcastsi128_si256,