summaryrefslogtreecommitdiffstats
path: root/third_party/rust/packed_simd/src/codegen/reductions/mask/x86
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/packed_simd/src/codegen/reductions/mask/x86')
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs95
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs35
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs35
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs68
4 files changed, 233 insertions, 0 deletions
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
new file mode 100644
index 0000000000..61f352d228
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs
@@ -0,0 +1,95 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX`
+
+/// `x86`/`x86_64` 256-bit `AVX` implementation
+/// FIXME: it might be faster here to do two `_mm_movmask_epi8`
+#[cfg(target_feature = "avx")]
+macro_rules! x86_m8x32_avx_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "avx")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_testc_si256;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_testc_si256;
+ _mm256_testc_si256(crate::mem::transmute(self), crate::mem::transmute($id::splat(true))) != 0
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "avx")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_testz_si256;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_testz_si256;
+ _mm256_testz_si256(crate::mem::transmute(self), crate::mem::transmute(self)) == 0
+ }
+ }
+ };
+}
+
+/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation
+macro_rules! x86_m32x8_avx_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_ps;
+ // _mm256_movemask_ps(a) creates a 8bit mask containing the
+ // most significant bit of each lane of `a`. If all bits are
+ // set, then all 8 lanes of the mask are true.
+ _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_ps;
+
+ _mm256_movemask_ps(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
+
+/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation
+macro_rules! x86_m64x4_avx_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_pd;
+ // _mm256_movemask_pd(a) creates a 4bit mask containing the
+ // most significant bit of each lane of `a`. If all bits are
+ // set, then all 4 lanes of the mask are true.
+ _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_pd;
+
+ _mm256_movemask_pd(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
new file mode 100644
index 0000000000..d37d023420
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs
@@ -0,0 +1,35 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`.
+#![allow(unused)]
+
+/// x86/x86_64 256-bit m8x32 AVX2 implementation
+macro_rules! x86_m8x32_avx2_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_epi8;
+ // _mm256_movemask_epi8(a) creates a 32bit mask containing the
+ // most significant bit of each byte of `a`. If all
+ // bits are set, then all 32 lanes of the mask are
+ // true.
+ _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm256_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm256_movemask_epi8;
+
+ _mm256_movemask_epi8(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
new file mode 100644
index 0000000000..e0c9aee92b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs
@@ -0,0 +1,35 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation
+macro_rules! x86_m32x4_sse_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_ps;
+ // _mm_movemask_ps(a) creates a 4bit mask containing the
+ // most significant bit of each lane of `a`. If all
+ // bits are set, then all 4 lanes of the mask are
+ // true.
+ _mm_movemask_ps(crate::mem::transmute(self)) == 0b_1111_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_ps;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_ps;
+
+ _mm_movemask_ps(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
new file mode 100644
index 0000000000..bbb52fa47e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs
@@ -0,0 +1,68 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation
+macro_rules! x86_m64x2_sse2_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_pd;
+ // _mm_movemask_pd(a) creates a 2bit mask containing the
+ // most significant bit of each lane of `a`. If all
+ // bits are set, then all 2 lanes of the mask are
+ // true.
+ _mm_movemask_pd(crate::mem::transmute(self)) == 0b_11_i32
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_pd;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_pd;
+
+ _mm_movemask_pd(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
+
+/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation
+macro_rules! x86_m8x16_sse2_impl {
+ ($id:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn all(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_epi8;
+ // _mm_movemask_epi8(a) creates a 16bit mask containing the
+ // most significant bit of each byte of `a`. If all
+ // bits are set, then all 16 lanes of the mask are
+ // true.
+ _mm_movemask_epi8(crate::mem::transmute(self)) == i32::from(u16::max_value())
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "sse2")]
+ unsafe fn any(self) -> bool {
+ #[cfg(target_arch = "x86")]
+ use crate::arch::x86::_mm_movemask_epi8;
+ #[cfg(target_arch = "x86_64")]
+ use crate::arch::x86_64::_mm_movemask_epi8;
+
+ _mm_movemask_epi8(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}