summaryrefslogtreecommitdiffstats
path: root/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
diff options
context:
space:
mode:
Diffstat (limited to 'third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs')
-rw-r--r--third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs81
1 files changed, 81 insertions, 0 deletions
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
new file mode 100644
index 0000000000..b2db52c891
--- /dev/null
+++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs
@@ -0,0 +1,81 @@
+//! Mask reductions implementation for `aarch64` targets
+
+/// 128-bit wide vectors
+macro_rules! aarch64_128_neon_impl {
+ ($id:ident, $vmin:ident, $vmax:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn all(self) -> bool {
+ use crate::arch::aarch64::$vmin;
+ $vmin(crate::mem::transmute(self)) != 0
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn any(self) -> bool {
+ use crate::arch::aarch64::$vmax;
+ $vmax(crate::mem::transmute(self)) != 0
+ }
+ }
+ };
+}
+
+/// 64-bit wide vectors
+macro_rules! aarch64_64_neon_impl {
+ ($id:ident, $vec128:ident) => {
+ impl All for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn all(self) -> bool {
+ // Duplicates the 64-bit vector into a 128-bit one and
+ // calls all on that.
+ union U {
+ halves: ($id, $id),
+ vec: $vec128,
+ }
+ U { halves: (self, self) }.vec.all()
+ }
+ }
+ impl Any for $id {
+ #[inline]
+ #[target_feature(enable = "neon")]
+ unsafe fn any(self) -> bool {
+ union U {
+ halves: ($id, $id),
+ vec: $vec128,
+ }
+ U { halves: (self, self) }.vec.any()
+ }
+ }
+ };
+}
+
+/// Mask reduction implementation for `aarch64` targets
+macro_rules! impl_mask_reductions {
+ // 64-bit wide masks
+ (m8x8) => {
+ aarch64_64_neon_impl!(m8x8, m8x16);
+ };
+ (m16x4) => {
+ aarch64_64_neon_impl!(m16x4, m16x8);
+ };
+ (m32x2) => {
+ aarch64_64_neon_impl!(m32x2, m32x4);
+ };
+ // 128-bit wide masks
+ (m8x16) => {
+ aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8);
+ };
+ (m16x8) => {
+ aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16);
+ };
+ (m32x4) => {
+ aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32);
+ };
+ // Fallback to LLVM's default code-generation:
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
+}