summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86/sse3.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86/sse3.rs')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse3.rs260
1 files changed, 260 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/sse3.rs b/library/stdarch/crates/core_arch/src/x86/sse3.rs
new file mode 100644
index 000000000..ab0dd38fe
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/sse3.rs
@@ -0,0 +1,260 @@
+//! Streaming SIMD Extensions 3 (SSE3)
+
+use crate::{
+ core_arch::{
+ simd::*,
+ simd_llvm::{simd_shuffle2, simd_shuffle4},
+ x86::*,
+ },
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Alternatively add and subtract packed single-precision (32-bit)
+/// floating-point elements in `a` to/from packed elements in `b`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_ps)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(addsubps))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_addsub_ps(a: __m128, b: __m128) -> __m128 {
+ addsubps(a, b)
+}
+
+/// Alternatively add and subtract packed double-precision (64-bit)
+/// floating-point elements in `a` to/from packed elements in `b`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_addsub_pd)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(addsubpd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_addsub_pd(a: __m128d, b: __m128d) -> __m128d {
+ addsubpd(a, b)
+}
+
+/// Horizontally adds adjacent pairs of double-precision (64-bit)
+/// floating-point elements in `a` and `b`, and pack the results.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_pd)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(haddpd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hadd_pd(a: __m128d, b: __m128d) -> __m128d {
+ haddpd(a, b)
+}
+
+/// Horizontally adds adjacent pairs of single-precision (32-bit)
+/// floating-point elements in `a` and `b`, and pack the results.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hadd_ps)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(haddps))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hadd_ps(a: __m128, b: __m128) -> __m128 {
+ haddps(a, b)
+}
+
+/// Horizontally subtract adjacent pairs of double-precision (64-bit)
+/// floating-point elements in `a` and `b`, and pack the results.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_pd)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(hsubpd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hsub_pd(a: __m128d, b: __m128d) -> __m128d {
+ hsubpd(a, b)
+}
+
+/// Horizontally adds adjacent pairs of single-precision (32-bit)
+/// floating-point elements in `a` and `b`, and pack the results.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_hsub_ps)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(hsubps))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_hsub_ps(a: __m128, b: __m128) -> __m128 {
+ hsubps(a, b)
+}
+
+/// Loads 128-bits of integer data from unaligned memory.
+/// This intrinsic may perform better than `_mm_loadu_si128`
+/// when the data crosses a cache line boundary.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lddqu_si128)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(lddqu))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_lddqu_si128(mem_addr: *const __m128i) -> __m128i {
+ transmute(lddqu(mem_addr as *const _))
+}
+
+/// Duplicate the low double-precision (64-bit) floating-point element
+/// from `a`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movedup_pd)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(movddup))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_movedup_pd(a: __m128d) -> __m128d {
+ simd_shuffle2!(a, a, [0, 0])
+}
+
+/// Loads a double-precision (64-bit) floating-point element from memory
+/// into both elements of return vector.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loaddup_pd)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(movddup))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_loaddup_pd(mem_addr: *const f64) -> __m128d {
+ _mm_load1_pd(mem_addr)
+}
+
+/// Duplicate odd-indexed single-precision (32-bit) floating-point elements
+/// from `a`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movehdup_ps)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(movshdup))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_movehdup_ps(a: __m128) -> __m128 {
+ simd_shuffle4!(a, a, [1, 1, 3, 3])
+}
+
+/// Duplicate even-indexed single-precision (32-bit) floating-point elements
+/// from `a`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_moveldup_ps)
+#[inline]
+#[target_feature(enable = "sse3")]
+#[cfg_attr(test, assert_instr(movsldup))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_moveldup_ps(a: __m128) -> __m128 {
+ simd_shuffle4!(a, a, [0, 0, 2, 2])
+}
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.sse3.addsub.ps"]
+ fn addsubps(a: __m128, b: __m128) -> __m128;
+ #[link_name = "llvm.x86.sse3.addsub.pd"]
+ fn addsubpd(a: __m128d, b: __m128d) -> __m128d;
+ #[link_name = "llvm.x86.sse3.hadd.pd"]
+ fn haddpd(a: __m128d, b: __m128d) -> __m128d;
+ #[link_name = "llvm.x86.sse3.hadd.ps"]
+ fn haddps(a: __m128, b: __m128) -> __m128;
+ #[link_name = "llvm.x86.sse3.hsub.pd"]
+ fn hsubpd(a: __m128d, b: __m128d) -> __m128d;
+ #[link_name = "llvm.x86.sse3.hsub.ps"]
+ fn hsubps(a: __m128, b: __m128) -> __m128;
+ #[link_name = "llvm.x86.sse3.ldu.dq"]
+ fn lddqu(mem_addr: *const i8) -> i8x16;
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86::*;
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_addsub_ps() {
+ let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+ let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+ let r = _mm_addsub_ps(a, b);
+ assert_eq_m128(r, _mm_setr_ps(99.0, 25.0, 0.0, -15.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_addsub_pd() {
+ let a = _mm_setr_pd(-1.0, 5.0);
+ let b = _mm_setr_pd(-100.0, 20.0);
+ let r = _mm_addsub_pd(a, b);
+ assert_eq_m128d(r, _mm_setr_pd(99.0, 25.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_hadd_pd() {
+ let a = _mm_setr_pd(-1.0, 5.0);
+ let b = _mm_setr_pd(-100.0, 20.0);
+ let r = _mm_hadd_pd(a, b);
+ assert_eq_m128d(r, _mm_setr_pd(4.0, -80.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_hadd_ps() {
+ let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+ let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+ let r = _mm_hadd_ps(a, b);
+ assert_eq_m128(r, _mm_setr_ps(4.0, -10.0, -80.0, -5.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_hsub_pd() {
+ let a = _mm_setr_pd(-1.0, 5.0);
+ let b = _mm_setr_pd(-100.0, 20.0);
+ let r = _mm_hsub_pd(a, b);
+ assert_eq_m128d(r, _mm_setr_pd(-6.0, -120.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_hsub_ps() {
+ let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+ let b = _mm_setr_ps(-100.0, 20.0, 0.0, -5.0);
+ let r = _mm_hsub_ps(a, b);
+ assert_eq_m128(r, _mm_setr_ps(-6.0, 10.0, -120.0, 5.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_lddqu_si128() {
+ #[rustfmt::skip]
+ let a = _mm_setr_epi8(
+ 1, 2, 3, 4,
+ 5, 6, 7, 8,
+ 9, 10, 11, 12,
+ 13, 14, 15, 16,
+ );
+ let r = _mm_lddqu_si128(&a);
+ assert_eq_m128i(a, r);
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_movedup_pd() {
+ let a = _mm_setr_pd(-1.0, 5.0);
+ let r = _mm_movedup_pd(a);
+ assert_eq_m128d(r, _mm_setr_pd(-1.0, -1.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_movehdup_ps() {
+ let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+ let r = _mm_movehdup_ps(a);
+ assert_eq_m128(r, _mm_setr_ps(5.0, 5.0, -10.0, -10.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_moveldup_ps() {
+ let a = _mm_setr_ps(-1.0, 5.0, 0.0, -10.0);
+ let r = _mm_moveldup_ps(a);
+ assert_eq_m128(r, _mm_setr_ps(-1.0, -1.0, 0.0, 0.0));
+ }
+
+ #[simd_test(enable = "sse3")]
+ unsafe fn test_mm_loaddup_pd() {
+ let d = -5.0;
+ let r = _mm_loaddup_pd(&d);
+ assert_eq_m128d(r, _mm_setr_pd(d, d));
+ }
+}