summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86/ssse3.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86/ssse3.rs')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/ssse3.rs537
1 files changed, 537 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/ssse3.rs b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
new file mode 100644
index 000000000..4beb496b6
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/ssse3.rs
@@ -0,0 +1,537 @@
+//! Supplemental Streaming SIMD Extensions 3 (SSSE3)
+
+use crate::{
+ core_arch::{simd::*, simd_llvm::*, x86::*},
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Computes the absolute value of packed 8-bit signed integers in `a` and
+/// return the unsigned results.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi8)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(pabsb))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_abs_epi8(a: __m128i) -> __m128i {
+ transmute(pabsb128(a.as_i8x16()))
+}
+
+/// Computes the absolute value of each of the packed 16-bit signed integers in
+/// `a` and
+/// return the 16-bit unsigned integer
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(pabsw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_abs_epi16(a: __m128i) -> __m128i {
+ transmute(pabsw128(a.as_i16x8()))
+}
+
+/// Computes the absolute value of each of the packed 32-bit signed integers in
+/// `a` and
+/// return the 32-bit unsigned integer
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_abs_epi32)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(pabsd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_abs_epi32(a: __m128i) -> __m128i {
+ transmute(pabsd128(a.as_i32x4()))
+}
+
+/// Shuffles bytes from `a` according to the content of `b`.
+///
+/// The last 4 bits of each byte of `b` are used as addresses
+/// into the 16 bytes of `a`.
+///
+/// In addition, if the highest significant bit of a byte of `b`
+/// is set, the respective destination byte is set to 0.
+///
+/// Picturing `a` and `b` as `[u8; 16]`, `_mm_shuffle_epi8` is
+/// logically equivalent to:
+///
+/// ```
+/// fn mm_shuffle_epi8(a: [u8; 16], b: [u8; 16]) -> [u8; 16] {
+/// let mut r = [0u8; 16];
+/// for i in 0..16 {
+/// // if the most significant bit of b is set,
+/// // then the destination byte is set to 0.
+/// if b[i] & 0x80 == 0u8 {
+/// r[i] = a[(b[i] % 16) as usize];
+/// }
+/// }
+/// r
+/// }
+/// ```
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi8)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(pshufb))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_shuffle_epi8(a: __m128i, b: __m128i) -> __m128i {
+ transmute(pshufb128(a.as_u8x16(), b.as_u8x16()))
+}
+
+/// Concatenate 16-byte blocks in `a` and `b` into a 32-byte temporary result,
+/// shift the result right by `n` bytes, and returns the low 16 bytes.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_alignr_epi8)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(palignr, IMM8 = 15))]
+#[rustc_legacy_const_generics(2)]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_alignr_epi8<const IMM8: i32>(a: __m128i, b: __m128i) -> __m128i {
+ static_assert_imm8!(IMM8);
+ // If palignr is shifting the pair of vectors more than the size of two
+ // lanes, emit zero.
+ if IMM8 > 32 {
+ return _mm_set1_epi8(0);
+ }
+ // If palignr is shifting the pair of input vectors more than one lane,
+ // but less than two lanes, convert to shifting in zeroes.
+ let (a, b) = if IMM8 > 16 {
+ (_mm_set1_epi8(0), a)
+ } else {
+ (a, b)
+ };
+ const fn mask(shift: u32, i: u32) -> u32 {
+ if shift > 32 {
+ // Unused, but needs to be a valid index.
+ i
+ } else if shift > 16 {
+ shift - 16 + i
+ } else {
+ shift + i
+ }
+ }
+ let r: i8x16 = simd_shuffle16!(
+ b.as_i8x16(),
+ a.as_i8x16(),
+ <const IMM8: i32> [
+ mask(IMM8 as u32, 0),
+ mask(IMM8 as u32, 1),
+ mask(IMM8 as u32, 2),
+ mask(IMM8 as u32, 3),
+ mask(IMM8 as u32, 4),
+ mask(IMM8 as u32, 5),
+ mask(IMM8 as u32, 6),
+ mask(IMM8 as u32, 7),
+ mask(IMM8 as u32, 8),
+ mask(IMM8 as u32, 9),
+ mask(IMM8 as u32, 10),
+ mask(IMM8 as u32, 11),
+ mask(IMM8 as u32, 12),
+ mask(IMM8 as u32, 13),
+ mask(IMM8 as u32, 14),
+ mask(IMM8 as u32, 15),
+ ],
+ );
+ transmute(r)
+}
+
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
+/// 128-bit vectors of `[8 x i16]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(phaddw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hadd_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(phaddw128(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
+/// 128-bit vectors of `[8 x i16]`. Positive sums greater than 7FFFh are
+/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadds_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(phaddsw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hadds_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(phaddsw128(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Horizontally adds the adjacent pairs of values contained in 2 packed
+/// 128-bit vectors of `[4 x i32]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_epi32)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(phaddd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hadd_epi32(a: __m128i, b: __m128i) -> __m128i {
+ transmute(phaddd128(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Horizontally subtract the adjacent pairs of values contained in 2
+/// packed 128-bit vectors of `[8 x i16]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(phsubw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hsub_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(phsubw128(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Horizontally subtract the adjacent pairs of values contained in 2
+/// packed 128-bit vectors of `[8 x i16]`. Positive differences greater than
+/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
+/// saturated to 8000h.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsubs_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(phsubsw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hsubs_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(phsubsw128(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Horizontally subtract the adjacent pairs of values contained in 2
+/// packed 128-bit vectors of `[4 x i32]`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_epi32)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(phsubd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hsub_epi32(a: __m128i, b: __m128i) -> __m128i {
+ transmute(phsubd128(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Multiplies corresponding pairs of packed 8-bit unsigned integer
+/// values contained in the first source operand and packed 8-bit signed
+/// integer values contained in the second source operand, add pairs of
+/// contiguous products with signed saturation, and writes the 16-bit sums to
+/// the corresponding bits in the destination.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maddubs_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(pmaddubsw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_maddubs_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(pmaddubsw128(a.as_u8x16(), b.as_i8x16()))
+}
+
+/// Multiplies packed 16-bit signed integer values, truncate the 32-bit
+/// product to the 18 most significant bits by right-shifting, round the
+/// truncated value by adding 1, and write bits `[16:1]` to the destination.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhrs_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(pmulhrsw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_mulhrs_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(pmulhrsw128(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Negates packed 8-bit integers in `a` when the corresponding signed 8-bit
+/// integer in `b` is negative, and returns the result.
+/// Elements in result are zeroed out when the corresponding element in `b`
+/// is zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi8)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(psignb))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_sign_epi8(a: __m128i, b: __m128i) -> __m128i {
+ transmute(psignb128(a.as_i8x16(), b.as_i8x16()))
+}
+
+/// Negates packed 16-bit integers in `a` when the corresponding signed 16-bit
+/// integer in `b` is negative, and returns the results.
+/// Elements in result are zeroed out when the corresponding element in `b`
+/// is zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi16)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(psignw))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_sign_epi16(a: __m128i, b: __m128i) -> __m128i {
+ transmute(psignw128(a.as_i16x8(), b.as_i16x8()))
+}
+
+/// Negates packed 32-bit integers in `a` when the corresponding signed 32-bit
+/// integer in `b` is negative, and returns the results.
+/// Element in result are zeroed out when the corresponding element in `b`
+/// is zero.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sign_epi32)
+#[inline]
+#[target_feature(enable = "ssse3")]
+#[cfg_attr(test, assert_instr(psignd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_sign_epi32(a: __m128i, b: __m128i) -> __m128i {
+ transmute(psignd128(a.as_i32x4(), b.as_i32x4()))
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.ssse3.pabs.b.128"]
+ fn pabsb128(a: i8x16) -> u8x16;
+
+ #[link_name = "llvm.x86.ssse3.pabs.w.128"]
+ fn pabsw128(a: i16x8) -> u16x8;
+
+ #[link_name = "llvm.x86.ssse3.pabs.d.128"]
+ fn pabsd128(a: i32x4) -> u32x4;
+
+ #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
+ fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
+
+ #[link_name = "llvm.x86.ssse3.phadd.w.128"]
+ fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
+ fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.phadd.d.128"]
+ fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
+
+ #[link_name = "llvm.x86.ssse3.phsub.w.128"]
+ fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
+ fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.phsub.d.128"]
+ fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
+
+ #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
+ fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
+ fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.psign.b.128"]
+ fn psignb128(a: i8x16, b: i8x16) -> i8x16;
+
+ #[link_name = "llvm.x86.ssse3.psign.w.128"]
+ fn psignw128(a: i16x8, b: i16x8) -> i16x8;
+
+ #[link_name = "llvm.x86.ssse3.psign.d.128"]
+ fn psignd128(a: i32x4, b: i32x4) -> i32x4;
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_abs_epi8() {
+ let r = _mm_abs_epi8(_mm_set1_epi8(-5));
+ assert_eq_m128i(r, _mm_set1_epi8(5));
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_abs_epi16() {
+ let r = _mm_abs_epi16(_mm_set1_epi16(-5));
+ assert_eq_m128i(r, _mm_set1_epi16(5));
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_abs_epi32() {
+ let r = _mm_abs_epi32(_mm_set1_epi32(-5));
+ assert_eq_m128i(r, _mm_set1_epi32(5));
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_shuffle_epi8() {
+ #[rustfmt::skip]
+ let a = _mm_setr_epi8(
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ );
+ #[rustfmt::skip]
+ let b = _mm_setr_epi8(
+ 4, 128_u8 as i8, 4, 3,
+ 24, 12, 6, 19,
+ 12, 5, 5, 10,
+ 4, 1, 8, 0,
+ );
+ let expected = _mm_setr_epi8(5, 0, 5, 4, 9, 13, 7, 4, 13, 6, 6, 11, 5, 2, 9, 1);
+ let r = _mm_shuffle_epi8(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_alignr_epi8() {
+ #[rustfmt::skip]
+ let a = _mm_setr_epi8(
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ );
+ #[rustfmt::skip]
+ let b = _mm_setr_epi8(
+ 4, 63, 4, 3,
+ 24, 12, 6, 19,
+ 12, 5, 5, 10,
+ 4, 1, 8, 0,
+ );
+ let r = _mm_alignr_epi8::<33>(a, b);
+ assert_eq_m128i(r, _mm_set1_epi8(0));
+
+ let r = _mm_alignr_epi8::<17>(a, b);
+ #[rustfmt::skip]
+ let expected = _mm_setr_epi8(
+ 2, 3, 4, 5, 6, 7, 8, 9,
+ 10, 11, 12, 13, 14, 15, 16, 0,
+ );
+ assert_eq_m128i(r, expected);
+
+ let r = _mm_alignr_epi8::<16>(a, b);
+ assert_eq_m128i(r, a);
+
+ let r = _mm_alignr_epi8::<15>(a, b);
+ #[rustfmt::skip]
+ let expected = _mm_setr_epi8(
+ 0, 1, 2, 3, 4, 5, 6, 7,
+ 8, 9, 10, 11, 12, 13, 14, 15,
+ );
+ assert_eq_m128i(r, expected);
+
+ let r = _mm_alignr_epi8::<0>(a, b);
+ assert_eq_m128i(r, b);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_hadd_epi16() {
+ let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
+ let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 36, 25);
+ let r = _mm_hadd_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_hadds_epi16() {
+ let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_setr_epi16(4, 128, 4, 3, 32767, 1, -32768, -1);
+ let expected = _mm_setr_epi16(3, 7, 11, 15, 132, 7, 32767, -32768);
+ let r = _mm_hadds_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_hadd_epi32() {
+ let a = _mm_setr_epi32(1, 2, 3, 4);
+ let b = _mm_setr_epi32(4, 128, 4, 3);
+ let expected = _mm_setr_epi32(3, 7, 132, 7);
+ let r = _mm_hadd_epi32(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_hsub_epi16() {
+ let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_setr_epi16(4, 128, 4, 3, 24, 12, 6, 19);
+ let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 12, -13);
+ let r = _mm_hsub_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_hsubs_epi16() {
+ let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
+ let expected = _mm_setr_epi16(-1, -1, -1, -1, -124, 1, 32767, -32768);
+ let r = _mm_hsubs_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_hsub_epi32() {
+ let a = _mm_setr_epi32(1, 2, 3, 4);
+ let b = _mm_setr_epi32(4, 128, 4, 3);
+ let expected = _mm_setr_epi32(-1, -1, -124, 1);
+ let r = _mm_hsub_epi32(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_maddubs_epi16() {
+ #[rustfmt::skip]
+ let a = _mm_setr_epi8(
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, 14, 15, 16,
+ );
+ #[rustfmt::skip]
+ let b = _mm_setr_epi8(
+ 4, 63, 4, 3,
+ 24, 12, 6, 19,
+ 12, 5, 5, 10,
+ 4, 1, 8, 0,
+ );
+ let expected = _mm_setr_epi16(130, 24, 192, 194, 158, 175, 66, 120);
+ let r = _mm_maddubs_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_mulhrs_epi16() {
+ let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8);
+ let b = _mm_setr_epi16(4, 128, 4, 3, 32767, -1, -32768, 1);
+ let expected = _mm_setr_epi16(0, 0, 0, 0, 5, 0, -7, 0);
+ let r = _mm_mulhrs_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_sign_epi8() {
+ #[rustfmt::skip]
+ let a = _mm_setr_epi8(
+ 1, 2, 3, 4, 5, 6, 7, 8,
+ 9, 10, 11, 12, 13, -14, -15, 16,
+ );
+ #[rustfmt::skip]
+ let b = _mm_setr_epi8(
+ 4, 63, -4, 3, 24, 12, -6, -19,
+ 12, 5, -5, 10, 4, 1, -8, 0,
+ );
+ #[rustfmt::skip]
+ let expected = _mm_setr_epi8(
+ 1, 2, -3, 4, 5, 6, -7, -8,
+ 9, 10, -11, 12, 13, -14, 15, 0,
+ );
+ let r = _mm_sign_epi8(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_sign_epi16() {
+ let a = _mm_setr_epi16(1, 2, 3, 4, -5, -6, 7, 8);
+ let b = _mm_setr_epi16(4, 128, 0, 3, 1, -1, -2, 1);
+ let expected = _mm_setr_epi16(1, 2, 0, 4, -5, 6, -7, 8);
+ let r = _mm_sign_epi16(a, b);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[simd_test(enable = "ssse3")]
+ unsafe fn test_mm_sign_epi32() {
+ let a = _mm_setr_epi32(-1, 2, 3, 4);
+ let b = _mm_setr_epi32(1, -1, 1, 0);
+ let expected = _mm_setr_epi32(-1, -2, 3, 0);
+ let r = _mm_sign_epi32(a, b);
+ assert_eq_m128i(r, expected);
+ }
+}