//! Streaming SIMD Extensions 4.2 (SSE4.2) //! //! Extends SSE4.1 with STTNI (String and Text New Instructions). #[cfg(test)] use stdarch_test::assert_instr; use crate::{ core_arch::{simd::*, simd_llvm::*, x86::*}, mem::transmute, }; /// String contains unsigned 8-bit characters *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UBYTE_OPS: i32 = 0b0000_0000; /// String contains unsigned 16-bit characters #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UWORD_OPS: i32 = 0b0000_0001; /// String contains signed 8-bit characters #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_SBYTE_OPS: i32 = 0b0000_0010; /// String contains unsigned 16-bit characters #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_SWORD_OPS: i32 = 0b0000_0011; /// For each character in `a`, find if it is in `b` *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_ANY: i32 = 0b0000_0000; /// For each character in `a`, determine if /// `b[0] <= c <= b[1] or b[1] <= c <= b[2]...` #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_RANGES: i32 = 0b0000_0100; /// The strings defined by `a` and `b` are equal #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_EACH: i32 = 0b0000_1000; /// Search for the defined substring in the target #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_CMP_EQUAL_ORDERED: i32 = 0b0000_1100; /// Do not negate results *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_POSITIVE_POLARITY: i32 = 0b0000_0000; /// Negates results #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_NEGATIVE_POLARITY: i32 = 0b0001_0000; /// Do not negate results before the end of the string #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_POSITIVE_POLARITY: i32 = 0b0010_0000; /// Negates results only before the end of the string #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MASKED_NEGATIVE_POLARITY: i32 = 0b0011_0000; /// **Index only**: return the least significant bit *(Default)* #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_LEAST_SIGNIFICANT: i32 = 0b0000_0000; /// **Index only**: return the most significant bit #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_MOST_SIGNIFICANT: i32 = 0b0100_0000; /// **Mask only**: return the bit mask #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_BIT_MASK: i32 = 0b0000_0000; /// **Mask only**: return the byte mask #[stable(feature = "simd_x86", since = "1.27.0")] pub const _SIDD_UNIT_MASK: i32 = 0b0100_0000; /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8`, and return the generated mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrm) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistrm, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrm(a: __m128i, b: __m128i) -> __m128i { static_assert_imm8!(IMM8); transmute(pcmpistrm128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8)) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8` and return the generated index. Similar to /// [`_mm_cmpestri`] with the exception that [`_mm_cmpestri`] requires the /// lengths of `a` and `b` to be explicitly specified. /// /// # Control modes /// /// The control specified by `IMM8` may be one or more of the following. /// /// ## Data size and signedness /// /// - [`_SIDD_UBYTE_OPS`] - Default /// - [`_SIDD_UWORD_OPS`] /// - [`_SIDD_SBYTE_OPS`] /// - [`_SIDD_SWORD_OPS`] /// /// ## Comparison options /// - [`_SIDD_CMP_EQUAL_ANY`] - Default /// - [`_SIDD_CMP_RANGES`] /// - [`_SIDD_CMP_EQUAL_EACH`] /// - [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ## Result polarity /// - [`_SIDD_POSITIVE_POLARITY`] - Default /// - [`_SIDD_NEGATIVE_POLARITY`] /// /// ## Bit returned /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default /// - [`_SIDD_MOST_SIGNIFICANT`] /// /// # Examples /// /// Finds a substring using [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// let haystack = b"This is a long string of text data\r\n\tthat extends /// multiple lines"; /// let needle = b"\r\n\t\0\0\0\0\0\0\0\0\0\0\0\0\0"; /// /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); /// let hop = 16; /// let mut indexes = Vec::new(); /// /// // Chunk the haystack into 16 byte chunks and find /// // the first "\r\n\t" in the chunk. /// for (i, chunk) in haystack.chunks(hop).enumerate() { /// let b = _mm_loadu_si128(chunk.as_ptr() as *const _); /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_EQUAL_ORDERED); /// if idx != 16 { /// indexes.push((idx as usize) + (i * hop)); /// } /// } /// assert_eq!(indexes, vec![34]); /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// The `_mm_cmpistri` intrinsic may also be used to find the existence of /// one or more of a given set of characters in the haystack. /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// // Ensure your input is 16 byte aligned /// let password = b"hunter2\0\0\0\0\0\0\0\0\0"; /// let special_chars = b"!@#$%^&*()[]:;<>"; /// /// // Load the input /// let a = _mm_loadu_si128(special_chars.as_ptr() as *const _); /// let b = _mm_loadu_si128(password.as_ptr() as *const _); /// /// // Use _SIDD_CMP_EQUAL_ANY to find the index of any bytes in b /// let idx = _mm_cmpistri(a.into(), b.into(), _SIDD_CMP_EQUAL_ANY); /// /// if idx < 16 { /// println!("Congrats! Your password contains a special character"); /// # panic!("{:?} does not contain a special character", password); /// } else { /// println!("Your password should contain a special character"); /// } /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// Finds the index of the first character in the haystack that is within a /// range of characters. /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// # let b = b":;<=>?@[\\]^_`abc"; /// # let b = _mm_loadu_si128(b.as_ptr() as *const _); /// /// // Specify the ranges of values to be searched for [A-Za-z0-9]. /// let a = b"AZaz09\0\0\0\0\0\0\0\0\0\0"; /// let a = _mm_loadu_si128(a.as_ptr() as *const _); /// /// // Use _SIDD_CMP_RANGES to find the index of first byte in ranges. /// // Which in this case will be the first alpha numeric byte found /// // in the string. /// let idx = _mm_cmpistri(a, b, _SIDD_CMP_RANGES); /// /// if idx < 16 { /// println!("Found an alpha numeric character"); /// # assert_eq!(idx, 13); /// } else { /// println!("Did not find an alpha numeric character"); /// } /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// Working with 16-bit characters. /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// # let mut some_utf16_words = [0u16; 8]; /// # let mut more_utf16_words = [0u16; 8]; /// # '❤'.encode_utf16(&mut some_utf16_words); /// # '𝕊'.encode_utf16(&mut more_utf16_words); /// // Load the input /// let a = _mm_loadu_si128(some_utf16_words.as_ptr() as *const _); /// let b = _mm_loadu_si128(more_utf16_words.as_ptr() as *const _); /// /// // Specify _SIDD_UWORD_OPS to compare words instead of bytes, and /// // use _SIDD_CMP_EQUAL_EACH to compare the two strings. /// let idx = _mm_cmpistri(a, b, _SIDD_UWORD_OPS | _SIDD_CMP_EQUAL_EACH); /// /// if idx == 0 { /// println!("16-bit unicode strings were equal!"); /// # panic!("Strings should not be equal!") /// } else { /// println!("16-bit unicode strings were not equal!"); /// } /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistri) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistri(a: __m128i, b: __m128i) -> i32 { static_assert_imm8!(IMM8); pcmpistri128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8`, and return `1` if any character in `b` was null. /// and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrz) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrz(a: __m128i, b: __m128i) -> i32 { static_assert_imm8!(IMM8); pcmpistriz128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8`, and return `1` if the resulting mask was non-zero, /// and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrc) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrc(a: __m128i, b: __m128i) -> i32 { static_assert_imm8!(IMM8); pcmpistric128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8`, and returns `1` if any character in `a` was null, /// and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistrs) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistrs(a: __m128i, b: __m128i) -> i32 { static_assert_imm8!(IMM8); pcmpistris128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8`, and return bit `0` of the resulting bit mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistro) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistro(a: __m128i, b: __m128i) -> i32 { static_assert_imm8!(IMM8); pcmpistrio128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } /// Compares packed strings with implicit lengths in `a` and `b` using the /// control in `IMM8`, and return `1` if `b` did not contain a null /// character and the resulting mask was zero, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpistra) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpistri, IMM8 = 0))] #[rustc_legacy_const_generics(2)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpistra(a: __m128i, b: __m128i) -> i32 { static_assert_imm8!(IMM8); pcmpistria128(a.as_i8x16(), b.as_i8x16(), IMM8 as i8) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `IMM8`, and return the generated mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrm) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestrm, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrm(a: __m128i, la: i32, b: __m128i, lb: i32) -> __m128i { static_assert_imm8!(IMM8); transmute(pcmpestrm128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8)) } /// Compares packed strings `a` and `b` with lengths `la` and `lb` using the /// control in `IMM8` and return the generated index. Similar to /// [`_mm_cmpistri`] with the exception that [`_mm_cmpistri`] implicitly /// determines the length of `a` and `b`. /// /// # Control modes /// /// The control specified by `IMM8` may be one or more of the following. /// /// ## Data size and signedness /// /// - [`_SIDD_UBYTE_OPS`] - Default /// - [`_SIDD_UWORD_OPS`] /// - [`_SIDD_SBYTE_OPS`] /// - [`_SIDD_SWORD_OPS`] /// /// ## Comparison options /// - [`_SIDD_CMP_EQUAL_ANY`] - Default /// - [`_SIDD_CMP_RANGES`] /// - [`_SIDD_CMP_EQUAL_EACH`] /// - [`_SIDD_CMP_EQUAL_ORDERED`] /// /// ## Result polarity /// - [`_SIDD_POSITIVE_POLARITY`] - Default /// - [`_SIDD_NEGATIVE_POLARITY`] /// /// ## Bit returned /// - [`_SIDD_LEAST_SIGNIFICANT`] - Default /// - [`_SIDD_MOST_SIGNIFICANT`] /// /// # Examples /// /// ``` /// #[cfg(target_arch = "x86")] /// use std::arch::x86::*; /// #[cfg(target_arch = "x86_64")] /// use std::arch::x86_64::*; /// /// # fn main() { /// # if is_x86_feature_detected!("sse4.2") { /// # #[target_feature(enable = "sse4.2")] /// # unsafe fn worker() { /// /// // The string we want to find a substring in /// let haystack = b"Split \r\n\t line "; /// /// // The string we want to search for with some /// // extra bytes we do not want to search for. /// let needle = b"\r\n\t ignore this "; /// /// let a = _mm_loadu_si128(needle.as_ptr() as *const _); /// let b = _mm_loadu_si128(haystack.as_ptr() as *const _); /// /// // Note: We explicitly specify we only want to search `b` for the /// // first 3 characters of a. /// let idx = _mm_cmpestri(a, 3, b, 15, _SIDD_CMP_EQUAL_ORDERED); /// /// assert_eq!(idx, 6); /// # } /// # unsafe { worker(); } /// # } /// # } /// ``` /// /// [`_SIDD_UBYTE_OPS`]: constant._SIDD_UBYTE_OPS.html /// [`_SIDD_UWORD_OPS`]: constant._SIDD_UWORD_OPS.html /// [`_SIDD_SBYTE_OPS`]: constant._SIDD_SBYTE_OPS.html /// [`_SIDD_SWORD_OPS`]: constant._SIDD_SWORD_OPS.html /// [`_SIDD_CMP_EQUAL_ANY`]: constant._SIDD_CMP_EQUAL_ANY.html /// [`_SIDD_CMP_RANGES`]: constant._SIDD_CMP_RANGES.html /// [`_SIDD_CMP_EQUAL_EACH`]: constant._SIDD_CMP_EQUAL_EACH.html /// [`_SIDD_CMP_EQUAL_ORDERED`]: constant._SIDD_CMP_EQUAL_ORDERED.html /// [`_SIDD_POSITIVE_POLARITY`]: constant._SIDD_POSITIVE_POLARITY.html /// [`_SIDD_NEGATIVE_POLARITY`]: constant._SIDD_NEGATIVE_POLARITY.html /// [`_SIDD_LEAST_SIGNIFICANT`]: constant._SIDD_LEAST_SIGNIFICANT.html /// [`_SIDD_MOST_SIGNIFICANT`]: constant._SIDD_MOST_SIGNIFICANT.html /// [`_mm_cmpistri`]: fn._mm_cmpistri.html /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestri) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestri(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_imm8!(IMM8); pcmpestri128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `IMM8`, and return `1` if any character in /// `b` was null, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrz) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrz(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_imm8!(IMM8); pcmpestriz128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `IMM8`, and return `1` if the resulting mask /// was non-zero, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrc) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrc(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_imm8!(IMM8); pcmpestric128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `IMM8`, and return `1` if any character in /// a was null, and `0` otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestrs) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestrs(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_imm8!(IMM8); pcmpestris128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `IMM8`, and return bit `0` of the resulting /// bit mask. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestro) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestro(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_imm8!(IMM8); pcmpestrio128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } /// Compares packed strings in `a` and `b` with lengths `la` and `lb` /// using the control in `IMM8`, and return `1` if `b` did not /// contain a null character and the resulting mask was zero, and `0` /// otherwise. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpestra) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpestri, IMM8 = 0))] #[rustc_legacy_const_generics(4)] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpestra(a: __m128i, la: i32, b: __m128i, lb: i32) -> i32 { static_assert_imm8!(IMM8); pcmpestria128(a.as_i8x16(), la, b.as_i8x16(), lb, IMM8 as i8) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32-C value for unsigned 8-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u8) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u8(crc: u32, v: u8) -> u32 { crc32_32_8(crc, v) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32-C value for unsigned 16-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u16) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u16(crc: u32, v: u16) -> u32 { crc32_32_16(crc, v) } /// Starting with the initial value in `crc`, return the accumulated /// CRC32-C value for unsigned 32-bit integer `v`. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_crc32_u32) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(crc32))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_crc32_u32(crc: u32, v: u32) -> u32 { crc32_32_32(crc, v) } /// Compares packed 64-bit integers in `a` and `b` for greater-than, /// return the results. /// /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi64) #[inline] #[target_feature(enable = "sse4.2")] #[cfg_attr(test, assert_instr(pcmpgtq))] #[stable(feature = "simd_x86", since = "1.27.0")] pub unsafe fn _mm_cmpgt_epi64(a: __m128i, b: __m128i) -> __m128i { transmute(simd_gt::<_, i64x2>(a.as_i64x2(), b.as_i64x2())) } #[allow(improper_ctypes)] extern "C" { // SSE 4.2 string and text comparison ops #[link_name = "llvm.x86.sse42.pcmpestrm128"] fn pcmpestrm128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> u8x16; #[link_name = "llvm.x86.sse42.pcmpestri128"] fn pcmpestri128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestriz128"] fn pcmpestriz128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestric128"] fn pcmpestric128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestris128"] fn pcmpestris128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestrio128"] fn pcmpestrio128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpestria128"] fn pcmpestria128(a: i8x16, la: i32, b: i8x16, lb: i32, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistrm128"] fn pcmpistrm128(a: i8x16, b: i8x16, imm8: i8) -> i8x16; #[link_name = "llvm.x86.sse42.pcmpistri128"] fn pcmpistri128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistriz128"] fn pcmpistriz128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistric128"] fn pcmpistric128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistris128"] fn pcmpistris128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistrio128"] fn pcmpistrio128(a: i8x16, b: i8x16, imm8: i8) -> i32; #[link_name = "llvm.x86.sse42.pcmpistria128"] fn pcmpistria128(a: i8x16, b: i8x16, imm8: i8) -> i32; // SSE 4.2 CRC instructions #[link_name = "llvm.x86.sse42.crc32.32.8"] fn crc32_32_8(crc: u32, v: u8) -> u32; #[link_name = "llvm.x86.sse42.crc32.32.16"] fn crc32_32_16(crc: u32, v: u16) -> u32; #[link_name = "llvm.x86.sse42.crc32.32.32"] fn crc32_32_32(crc: u32, v: u32) -> u32; } #[cfg(test)] mod tests { use stdarch_test::simd_test; use crate::core_arch::x86::*; use std::ptr; // Currently one cannot `load` a &[u8] that is less than 16 // in length. This makes loading strings less than 16 in length // a bit difficult. Rather than `load` and mutate the __m128i, // it is easier to memcpy the given string to a local slice with // length 16 and `load` the local slice. #[target_feature(enable = "sse4.2")] unsafe fn str_to_m128i(s: &[u8]) -> __m128i { assert!(s.len() <= 16); let slice = &mut [0u8; 16]; ptr::copy_nonoverlapping(s.as_ptr(), slice.as_mut_ptr(), s.len()); _mm_loadu_si128(slice.as_ptr() as *const _) } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrm() { let a = str_to_m128i(b"Hello! Good-Bye!"); let b = str_to_m128i(b"hello! good-bye!"); let i = _mm_cmpistrm::<_SIDD_UNIT_MASK>(a, b); #[rustfmt::skip] let res = _mm_setr_epi8( 0x00, !0, !0, !0, !0, !0, !0, 0x00, !0, !0, !0, !0, 0x00, !0, !0, !0, ); assert_eq_m128i(i, res); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistri() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b" Hello "); let i = _mm_cmpistri::<_SIDD_CMP_EQUAL_ORDERED>(a, b); assert_eq!(3, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); let i = _mm_cmpistrz::<_SIDD_CMP_EQUAL_ORDERED>(a, b); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrc() { let a = str_to_m128i(b" "); let b = str_to_m128i(b" ! "); let i = _mm_cmpistrc::<_SIDD_UNIT_MASK>(a, b); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistrs() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b""); let i = _mm_cmpistrs::<_SIDD_CMP_EQUAL_ORDERED>(a, b); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistro() { #[rustfmt::skip] let a_bytes = _mm_setr_epi8( 0x00, 0x47, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); #[rustfmt::skip] let b_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); let a = a_bytes; let b = b_bytes; let i = _mm_cmpistro::<{ _SIDD_UWORD_OPS | _SIDD_UNIT_MASK }>(a, b); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpistra() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello!!!!!!!!!!!"); let i = _mm_cmpistra::<_SIDD_UNIT_MASK>(a, b); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrm() { let a = str_to_m128i(b"Hello!"); let b = str_to_m128i(b"Hello."); let i = _mm_cmpestrm::<_SIDD_UNIT_MASK>(a, 5, b, 5); #[rustfmt::skip] let r = _mm_setr_epi8( !0, !0, !0, !0, !0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 ); assert_eq_m128i(i, r); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestri() { let a = str_to_m128i(b"bar - garbage"); let b = str_to_m128i(b"foobar"); let i = _mm_cmpestri::<_SIDD_CMP_EQUAL_ORDERED>(a, 3, b, 6); assert_eq!(3, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrz() { let a = str_to_m128i(b""); let b = str_to_m128i(b"Hello"); let i = _mm_cmpestrz::<_SIDD_CMP_EQUAL_ORDERED>(a, 16, b, 6); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrc() { let va = str_to_m128i(b"!!!!!!!!"); let vb = str_to_m128i(b" "); let i = _mm_cmpestrc::<_SIDD_UNIT_MASK>(va, 7, vb, 7); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestrs() { #[rustfmt::skip] let a_bytes = _mm_setr_epi8( 0x00, 0x48, 0x00, 0x65, 0x00, 0x6c, 0x00, 0x6c, 0x00, 0x6f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, ); let a = a_bytes; let b = _mm_set1_epi8(0x00); let i = _mm_cmpestrs::<_SIDD_UWORD_OPS>(a, 8, b, 0); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestro() { let a = str_to_m128i(b"Hello"); let b = str_to_m128i(b"World"); let i = _mm_cmpestro::<_SIDD_UBYTE_OPS>(a, 5, b, 5); assert_eq!(0, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpestra() { let a = str_to_m128i(b"Cannot match a"); let b = str_to_m128i(b"Null after 14"); let i = _mm_cmpestra::<{ _SIDD_CMP_EQUAL_EACH | _SIDD_UNIT_MASK }>(a, 14, b, 16); assert_eq!(1, i); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u8() { let crc = 0x2aa1e72b; let v = 0x2a; let i = _mm_crc32_u8(crc, v); assert_eq!(i, 0xf24122e4); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u16() { let crc = 0x8ecec3b5; let v = 0x22b; let i = _mm_crc32_u16(crc, v); assert_eq!(i, 0x13bb2fb); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_crc32_u32() { let crc = 0xae2912c8; let v = 0x845fed; let i = _mm_crc32_u32(crc, v); assert_eq!(i, 0xffae2ed1); } #[simd_test(enable = "sse4.2")] unsafe fn test_mm_cmpgt_epi64() { let a = _mm_setr_epi64x(0, 0x2a); let b = _mm_set1_epi64x(0x00); let i = _mm_cmpgt_epi64(a, b); assert_eq_m128i(i, _mm_setr_epi64x(0x00, 0xffffffffffffffffu64 as i64)); } }