summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86/sse42.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86/sse42.rs')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse42.rs802
1 files changed, 802 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/sse42.rs b/library/stdarch/crates/core_arch/src/x86/sse42.rs
new file mode 100644
index 000000000..f474b0671
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/sse42.rs
@@ -0,0 +1,802 @@
+//! Streaming SIMD Extensions 4.2 (SSE4.2)
+//!
+//! Extends SSE4.1 with STTNI (String and Text New Instructions).
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+use crate::{
+ core_arch::{simd::*, simd_llvm::*, x86::*},
+ mem::transmute,
+};
+
+/// String contains unsigned 8-bit characters *(Default)*
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000;
+/// String contains unsigned 16-bit characters
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001;
+/// String contains signed 8-bit characters
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010;
+/// String contains unsigned 16-bit characters
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011;
+
+/// For each character in `a`, find if it is in `b` *(Default)*
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000;
+/// For each character in `a`, determine if
+/// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...`
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100;
+/// The strings defined by `a` and `b` are equal
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000;
+/// Search for the defined substring in the target
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100;
+
+/// Do not negate results *(Default)*
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000;
+/// Negates results
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000;
+/// Do not negate results before the end of the string
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000;
+/// Negates results only before the end of the string
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000;
+
+/// **Index only**: return the least significant bit *(Default)*
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000;
+/// **Index only**: return the most significant bit
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000;
+
+/// **Mask only**: return the bit mask
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_BIT_MASK: i32 = 0b0000_0000;
+/// **Mask only**: return the byte mask
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000;
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8`, and return the generated mask.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistrm<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+ static_assert_imm8!(IMM8);
+ transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8))
+}
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8` and return the generated index. Similar to
+/// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the
+/// lengths of `a` and `b` to be explicitly specified.
+///
+/// # Control modes
+///
+/// The control specified by `IMM8` may be one or more of the following.
+///
+/// ## Data size and signedness
+///
+/// - [`_SIDD_UBYTE_OPS`] - Default
+/// - [`_SIDD_UWORD_OPS`]
+/// - [`_SIDD_SBYTE_OPS`]
+/// - [`_SIDD_SWORD_OPS`]
+///
+/// ## Comparison options
+/// - [`_SIDD_CMP_EQUAL_ANY`] - Default
+/// - [`_SIDD_CMP_RANGES`]
+/// - [`_SIDD_CMP_EQUAL_EACH`]
+/// - [`_SIDD_CMP_EQUAL_ORDERED`]
+///
+/// ## Result polarity
+/// - [`_SIDD_POSITIVE_POLARITY`] - Default
+/// - [`_SIDD_NEGATIVE_POLARITY`]
+///
+/// ## Bit returned
+/// - [`_SIDD_LEAST_SIGNIFICANT`] - Default
+/// - [`_SIDD_MOST_SIGNIFICANT`]
+///
+/// # Examples
+///
+/// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`]
+///
+/// ```
+/// #[cfg(target_arch = "x86")]
+/// use std::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use std::arch::x86_64::*;
+///
+/// # fn main() {
+/// # if is_x86_feature_detected!("sse4.2") {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
+/// let haystack = b"This is a long string of text data\r\n\tthat extends
+/// multiple lines";
+/// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0";
+///
+/// let a = _mm_loadu_si128(needle.as_ptr() as *const _);
+/// let hop = 16;
+/// let mut indexes = Vec::new();
+///
+/// // Chunk the haystack into 16 byte chunks and find
+/// // the first "\r\n\t" in the chunk.
+/// for (i, chunk) in haystack.chunks(hop).enumerate() {
+/// let b = _mm_loadu_si128(chunk.as_ptr() as *const _);
+/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED);
+/// if idx != 16 {
+/// indexes.push((idx as usize) + (i * hop));
+/// }
+/// }
+/// assert_eq!(indexes, vec![34]);
+/// # }
+/// # unsafe { worker(); }
+/// # }
+/// # }
+/// ```
+///
+/// The `_mm_cmpistri` intrinsic may also be used to find the existence of
+/// one or more of a given set of characters in the haystack.
+///
+/// ```
+/// #[cfg(target_arch = "x86")]
+/// use std::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use std::arch::x86_64::*;
+///
+/// # fn main() {
+/// # if is_x86_feature_detected!("sse4.2") {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
+/// // Ensure your input is 16 byte aligned
+/// let password = b"hunter2\0\0\0\0\0\0\0\0\0";
+/// let special_chars = b"!@#$%^&*()[]:;<>";
+///
+/// // Load the input
+/// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _);
+/// let b = _mm_loadu_si128(password.as_ptr() as *const _);
+///
+/// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b
+/// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY);
+///
+/// if idx < 16 {
+/// println!("Congrats! Your password contains a special character");
+/// # panic!("{:?} does not contain a special character", password);
+/// } else {
+/// println!("Your password should contain a special character");
+/// }
+/// # }
+/// # unsafe { worker(); }
+/// # }
+/// # }
+/// ```
+///
+/// Finds the index of the first character in the haystack that is within a
+/// range of characters.
+///
+/// ```
+/// #[cfg(target_arch = "x86")]
+/// use std::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use std::arch::x86_64::*;
+///
+/// # fn main() {
+/// # if is_x86_feature_detected!("sse4.2") {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
+/// # let b = b":;<=>?@[\\]^_`abc";
+/// # let b = _mm_loadu_si128(b.as_ptr() as *const _);
+///
+/// // Specify the ranges of values to be searched for [A-Za-z0-9].
+/// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0";
+/// let a = _mm_loadu_si128(a.as_ptr() as *const _);
+///
+/// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges.
+/// // Which in this case will be the first alpha numeric byte found
+/// // in the string.
+/// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES);
+///
+/// if idx < 16 {
+/// println!("Found an alpha numeric character");
+/// # assert_eq!(idx, 13);
+/// } else {
+/// println!("Did not find an alpha numeric character");
+/// }
+/// # }
+/// # unsafe { worker(); }
+/// # }
+/// # }
+/// ```
+///
+/// Working with 16-bit characters.
+///
+/// ```
+/// #[cfg(target_arch = "x86")]
+/// use std::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use std::arch::x86_64::*;
+///
+/// # fn main() {
+/// # if is_x86_feature_detected!("sse4.2") {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
+/// # let mut some_utf16_words = [0u16; 8];
+/// # let mut more_utf16_words = [0u16; 8];
+/// # '❤'.encode_utf16(&mut some_utf16_words);
+/// # '𝕊'.encode_utf16(&mut more_utf16_words);
+/// // Load the input
+/// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _);
+/// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _);
+///
+/// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and
+/// // use _SIDD_CMP_EQUAL_EACH to compare the two strings.
+/// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH);
+///
+/// if idx == 0 {
+/// println!("16-bit unicode strings were equal!");
+/// # panic!("Strings should not be equal!")
+/// } else {
+/// println!("16-bit unicode strings were not equal!");
+/// }
+/// # }
+/// # unsafe { worker(); }
+/// # }
+/// # }
+/// ```
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistri<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)
+}
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8`, and return `1` if any character in `b` was null.
+/// and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistrz<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)
+}
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8`, and return `1` if the resulting mask was non-zero,
+/// and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistrc<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)
+}
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8`, and returns `1` if any character in `a` was null,
+/// and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistrs<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)
+}
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8`, and return bit `0` of the resulting bit mask.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistro<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)
+}
+
+/// Compares packed strings with implicit lengths in `a` and `b` using the
+/// control in `IMM8`, and return `1` if `b` did not contain a null
+/// character and the resulting mask was zero, and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpistra<const IMM8: i32>(a: __m128i, b: __m128i) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)
+}
+
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
+/// using the control in `IMM8`, and return the generated mask.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestrm<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i {
+ static_assert_imm8!(IMM8);
+ transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8))
+}
+
+/// Compares packed strings `a` and `b` with lengths `la` and `lb` using the
+/// control in `IMM8` and return the generated index. Similar to
+/// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly
+/// determines the length of `a` and `b`.
+///
+/// # Control modes
+///
+/// The control specified by `IMM8` may be one or more of the following.
+///
+/// ## Data size and signedness
+///
+/// - [`_SIDD_UBYTE_OPS`] - Default
+/// - [`_SIDD_UWORD_OPS`]
+/// - [`_SIDD_SBYTE_OPS`]
+/// - [`_SIDD_SWORD_OPS`]
+///
+/// ## Comparison options
+/// - [`_SIDD_CMP_EQUAL_ANY`] - Default
+/// - [`_SIDD_CMP_RANGES`]
+/// - [`_SIDD_CMP_EQUAL_EACH`]
+/// - [`_SIDD_CMP_EQUAL_ORDERED`]
+///
+/// ## Result polarity
+/// - [`_SIDD_POSITIVE_POLARITY`] - Default
+/// - [`_SIDD_NEGATIVE_POLARITY`]
+///
+/// ## Bit returned
+/// - [`_SIDD_LEAST_SIGNIFICANT`] - Default
+/// - [`_SIDD_MOST_SIGNIFICANT`]
+///
+/// # Examples
+///
+/// ```
+/// #[cfg(target_arch = "x86")]
+/// use std::arch::x86::*;
+/// #[cfg(target_arch = "x86_64")]
+/// use std::arch::x86_64::*;
+///
+/// # fn main() {
+/// # if is_x86_feature_detected!("sse4.2") {
+/// # #[target_feature(enable = "sse4.2")]
+/// # unsafe fn worker() {
+///
+/// // The string we want to find a substring in
+/// let haystack = b"Split \r\n\t line ";
+///
+/// // The string we want to search for with some
+/// // extra bytes we do not want to search for.
+/// let needle = b"\r\n\t ignore this ";
+///
+/// let a = _mm_loadu_si128(needle.as_ptr() as *const _);
+/// let b = _mm_loadu_si128(haystack.as_ptr() as *const _);
+///
+/// // Note: We explicitly specify we only want to search `b` for the
+/// // first 3 characters of a.
+/// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED);
+///
+/// assert_eq!(idx, 6);
+/// # }
+/// # unsafe { worker(); }
+/// # }
+/// # }
+/// ```
+///
+/// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html
+/// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html
+/// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html
+/// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html
+/// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html
+/// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html
+/// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html
+/// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html
+/// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html
+/// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html
+/// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html
+/// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html
+/// [`_mm_cmpistri`]: fn._mm_cmpistri.html
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestri<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)
+}
+
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
+/// using the control in `IMM8`, and return `1` if any character in
+/// `b` was null, and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestrz<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)
+}
+
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
+/// using the control in `IMM8`, and return `1` if the resulting mask
+/// was non-zero, and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestrc<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)
+}
+
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
+/// using the control in `IMM8`, and return `1` if any character in
+/// a was null, and `0` otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestrs<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)
+}
+
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
+/// using the control in `IMM8`, and return bit `0` of the resulting
+/// bit mask.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestro<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)
+}
+
+/// Compares packed strings in `a` and `b` with lengths `la` and `lb`
+/// using the control in `IMM8`, and return `1` if `b` did not
+/// contain a null character and the resulting mask was zero, and `0`
+/// otherwise.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))]
+#[rustc_legacy_const_generics(4)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpestra<const IMM8: i32>(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 {
+ static_assert_imm8!(IMM8);
+ pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)
+}
+
+/// Starting with the initial value in `crc`, return the accumulated
+/// CRC32-C value for unsigned 8-bit integer `v`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(crc32))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 {
+ crc32_32_8(crc, v)
+}
+
+/// Starting with the initial value in `crc`, return the accumulated
+/// CRC32-C value for unsigned 16-bit integer `v`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(crc32))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 {
+ crc32_32_16(crc, v)
+}
+
+/// Starting with the initial value in `crc`, return the accumulated
+/// CRC32-C value for unsigned 32-bit integer `v`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(crc32))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 {
+ crc32_32_32(crc, v)
+}
+
+/// Compares packed 64-bit integers in `a` and `b` for greater-than,
+/// return the results.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64)
+#[inline]
+#[target_feature(enable = "sse4.2")]
+#[cfg_attr(test, assert_instr(pcmpgtq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i {
+ transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2()))
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ // SSE 4.2 string and text comparison ops
+ #[link_name = "llvm.x86.sse42.pcmpestrm128"]
+ fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16;
+ #[link_name = "llvm.x86.sse42.pcmpestri128"]
+ fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpestriz128"]
+ fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpestric128"]
+ fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpestris128"]
+ fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpestrio128"]
+ fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpestria128"]
+ fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpistrm128"]
+ fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16;
+ #[link_name = "llvm.x86.sse42.pcmpistri128"]
+ fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpistriz128"]
+ fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpistric128"]
+ fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpistris128"]
+ fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpistrio128"]
+ fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32;
+ #[link_name = "llvm.x86.sse42.pcmpistria128"]
+ fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32;
+ // SSE 4.2 CRC instructions
+ #[link_name = "llvm.x86.sse42.crc32.32.8"]
+ fn crc32_32_8(crc: u32, v: u8) -> u32;
+ #[link_name = "llvm.x86.sse42.crc32.32.16"]
+ fn crc32_32_16(crc: u32, v: u16) -> u32;
+ #[link_name = "llvm.x86.sse42.crc32.32.32"]
+ fn crc32_32_32(crc: u32, v: u32) -> u32;
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+ use std::ptr;
+
+ // Currently one cannot `load` a &[u8] that is is less than 16
+ // in length. This makes loading strings less than 16 in length
+ // a bit difficult. Rather than `load` and mutate the __m128i,
+ // it is easier to memcpy the given string to a local slice with
+ // length 16 and `load` the local slice.
+ #[target_feature(enable = "sse4.2")]
+ unsafe fn str_to_m128i(s: &[u8]) -> __m128i {
+ assert!(s.len() <= 16);
+ let slice = &mut [0u8; 16];
+ ptr::copy_nonoverlapping(
+ s.get_unchecked(0) as *const u8 as *const u8,
+ slice.get_unchecked_mut(0) as *mut u8 as *mut u8,
+ s.len(),
+ );
+ _mm_loadu_si128(slice.as_ptr() as *const _)
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistrm() {
+ let a = str_to_m128i(b"Hello! Good-Bye!");
+ let b = str_to_m128i(b"hello! good-bye!");
+ let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b);
+ #[rustfmt::skip]
+ let res = _mm_setr_epi8(
+ 0x00, !0, !0, !0, !0, !0, !0, 0x00,
+ !0, !0, !0, !0, 0x00, !0, !0, !0,
+ );
+ assert_eq_m128i(i, res);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistri() {
+ let a = str_to_m128i(b"Hello");
+ let b = str_to_m128i(b" Hello ");
+ let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
+ assert_eq!(3, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistrz() {
+ let a = str_to_m128i(b"");
+ let b = str_to_m128i(b"Hello");
+ let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
+ assert_eq!(1, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistrc() {
+ let a = str_to_m128i(b" ");
+ let b = str_to_m128i(b" ! ");
+ let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b);
+ assert_eq!(1, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistrs() {
+ let a = str_to_m128i(b"Hello");
+ let b = str_to_m128i(b"");
+ let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b);
+ assert_eq!(1, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistro() {
+ #[rustfmt::skip]
+ let a_bytes = _mm_setr_epi8(
+ 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+ 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ );
+ #[rustfmt::skip]
+ let b_bytes = _mm_setr_epi8(
+ 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+ 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ );
+ let a = a_bytes;
+ let b = b_bytes;
+ let i = _mm_cmpistro::<{ _SIDD_UWORD_OPS | _SIDD_UNIT_MASK }>(a, b);
+ assert_eq!(0, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpistra() {
+ let a = str_to_m128i(b"");
+ let b = str_to_m128i(b"Hello!!!!!!!!!!!");
+ let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b);
+ assert_eq!(1, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestrm() {
+ let a = str_to_m128i(b"Hello!");
+ let b = str_to_m128i(b"Hello.");
+ let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5);
+ #[rustfmt::skip]
+ let r = _mm_setr_epi8(
+ !0, !0, !0, !0, !0, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ );
+ assert_eq_m128i(i, r);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestri() {
+ let a = str_to_m128i(b"bar - garbage");
+ let b = str_to_m128i(b"foobar");
+ let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6);
+ assert_eq!(3, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestrz() {
+ let a = str_to_m128i(b"");
+ let b = str_to_m128i(b"Hello");
+ let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6);
+ assert_eq!(1, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestrc() {
+ let va = str_to_m128i(b"!!!!!!!!");
+ let vb = str_to_m128i(b" ");
+ let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7);
+ assert_eq!(0, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestrs() {
+ #[rustfmt::skip]
+ let a_bytes = _mm_setr_epi8(
+ 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c,
+ 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ );
+ let a = a_bytes;
+ let b = _mm_set1_epi8(0x00);
+ let i = _mm_cmpestrs::<_SIDD_UWORD_OPS>(a, 8, b, 0);
+ assert_eq!(0, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestro() {
+ let a = str_to_m128i(b"Hello");
+ let b = str_to_m128i(b"World");
+ let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5);
+ assert_eq!(0, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpestra() {
+ let a = str_to_m128i(b"Cannot match a");
+ let b = str_to_m128i(b"Null after 14");
+ let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16);
+ assert_eq!(1, i);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_crc32_u8() {
+ let crc = 0x2aa1e72b;
+ let v = 0x2a;
+ let i = _mm_crc32_u8(crc, v);
+ assert_eq!(i, 0xf24122e4);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_crc32_u16() {
+ let crc = 0x8ecec3b5;
+ let v = 0x22b;
+ let i = _mm_crc32_u16(crc, v);
+ assert_eq!(i, 0x13bb2fb);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_crc32_u32() {
+ let crc = 0xae2912c8;
+ let v = 0x845fed;
+ let i = _mm_crc32_u32(crc, v);
+ assert_eq!(i, 0xffae2ed1);
+ }
+
+ #[simd_test(enable = "sse4.2")]
+ unsafe fn test_mm_cmpgt_epi64() {
+ let a = _mm_setr_epi64x(0, 0x2a);
+ let b = _mm_set1_epi64x(0x00);
+ let i = _mm_cmpgt_epi64(a, b);
+ assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64));
+ }
+}