From 698f8c2f01ea549d77d7dc3338a12e04c11057b9 Mon Sep 17 00:00:00 2001
From: Daniel Baumann <daniel.baumann@progress-linux.org>
Date: Wed, 17 Apr 2024 14:02:58 +0200
Subject: Adding upstream version 1.64.0+dfsg1.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
---
 .../packed_simd_2/src/codegen/reductions/mask.rs   |  69 ++++++
 .../src/codegen/reductions/mask/aarch64.rs         |  71 ++++++
 .../src/codegen/reductions/mask/arm.rs             |  54 +++++
 .../src/codegen/reductions/mask/fallback.rs        |   6 +
 .../src/codegen/reductions/mask/fallback_impl.rs   | 237 +++++++++++++++++++++
 .../src/codegen/reductions/mask/x86.rs             | 188 ++++++++++++++++
 .../src/codegen/reductions/mask/x86/avx.rs         | 101 +++++++++
 .../src/codegen/reductions/mask/x86/avx2.rs        |  35 +++
 .../src/codegen/reductions/mask/x86/sse.rs         |  36 ++++
 .../src/codegen/reductions/mask/x86/sse2.rs        |  70 ++++++
 10 files changed, 867 insertions(+)
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
 create mode 100644 vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs

(limited to 'vendor/packed_simd_2/src/codegen/reductions')

diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask.rs b/vendor/packed_simd_2/src/codegen/reductions/mask.rs
new file mode 100644
index 000000000..97260c6d4
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask.rs
@@ -0,0 +1,69 @@
+//! Code generation workaround for `all()` mask horizontal reduction.
+//!
+//! Works arround [LLVM bug 36702].
+//!
+//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702
+#![allow(unused_macros)]
+
+use crate::*;
+
+crate trait All: crate::marker::Sized {
+    unsafe fn all(self) -> bool;
+}
+
+crate trait Any: crate::marker::Sized {
+    unsafe fn any(self) -> bool;
+}
+
+#[macro_use]
+mod fallback_impl;
+
+cfg_if! {
+    if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] {
+        #[macro_use]
+        mod x86;
+    } else if #[cfg(all(target_arch = "arm", target_feature = "v7",
+                        target_feature = "neon",
+                        any(feature = "core_arch", libcore_neon)))] {
+        #[macro_use]
+        mod arm;
+    } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] {
+        #[macro_use]
+        mod aarch64;
+    } else {
+        #[macro_use]
+        mod fallback;
+    }
+}
+
+impl_mask_reductions!(m8x2);
+impl_mask_reductions!(m8x4);
+impl_mask_reductions!(m8x8);
+impl_mask_reductions!(m8x16);
+impl_mask_reductions!(m8x32);
+impl_mask_reductions!(m8x64);
+
+impl_mask_reductions!(m16x2);
+impl_mask_reductions!(m16x4);
+impl_mask_reductions!(m16x8);
+impl_mask_reductions!(m16x16);
+impl_mask_reductions!(m16x32);
+
+impl_mask_reductions!(m32x2);
+impl_mask_reductions!(m32x4);
+impl_mask_reductions!(m32x8);
+impl_mask_reductions!(m32x16);
+
+// FIXME: 64-bit single element vector
+// impl_mask_reductions!(m64x1);
+impl_mask_reductions!(m64x2);
+impl_mask_reductions!(m64x4);
+impl_mask_reductions!(m64x8);
+
+impl_mask_reductions!(m128x1);
+impl_mask_reductions!(m128x2);
+impl_mask_reductions!(m128x4);
+
+impl_mask_reductions!(msizex2);
+impl_mask_reductions!(msizex4);
+impl_mask_reductions!(msizex8);
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
new file mode 100644
index 000000000..e9586eace
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
@@ -0,0 +1,71 @@
+//! Mask reductions implementation for `aarch64` targets
+
+/// 128-bit wide vectors
+macro_rules! aarch64_128_neon_impl {
+    ($id:ident, $vmin:ident, $vmax:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn all(self) -> bool {
+                use crate::arch::aarch64::$vmin;
+                $vmin(crate::mem::transmute(self)) != 0
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn any(self) -> bool {
+                use crate::arch::aarch64::$vmax;
+                $vmax(crate::mem::transmute(self)) != 0
+            }
+        }
+    }
+}
+
+/// 64-bit wide vectors
+macro_rules! aarch64_64_neon_impl {
+    ($id:ident, $vec128:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn all(self) -> bool {
+                // Duplicates the 64-bit vector into a 128-bit one and
+                // calls all on that.
+                union U {
+                    halves: ($id, $id),
+                    vec: $vec128,
+                }
+                U {
+                    halves: (self, self),
+                }.vec.all()
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "neon")]
+            unsafe fn any(self) -> bool {
+                union U {
+                    halves: ($id, $id),
+                    vec: $vec128,
+                }
+                U {
+                    halves: (self, self),
+                }.vec.any()
+            }
+        }
+    };
+}
+
+/// Mask reduction implementation for `aarch64` targets
+macro_rules! impl_mask_reductions {
+    // 64-bit wide masks
+    (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); };
+    (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); };
+    (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); };
+    // 128-bit wide masks
+    (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); };
+    (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); };
+    (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); };
+    // Fallback to LLVM's default code-generation:
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
new file mode 100644
index 000000000..1987af7a9
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
@@ -0,0 +1,54 @@
+//! Mask reductions implementation for `arm` targets
+
+/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with
+/// more than two elements.
+macro_rules! arm_128_v7_neon_impl {
+    ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "v7,neon")]
+            unsafe fn all(self) -> bool {
+                use crate::arch::arm::$vpmin;
+                use crate::mem::transmute;
+                union U {
+                    halves: ($half, $half),
+                    vec: $id,
+                }
+                let halves = U { vec: self }.halves;
+                let h: $half = transmute($vpmin(
+                    transmute(halves.0),
+                    transmute(halves.1),
+                ));
+                h.all()
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "v7,neon")]
+            unsafe fn any(self) -> bool {
+                use crate::arch::arm::$vpmax;
+                use crate::mem::transmute;
+                union U {
+                    halves: ($half, $half),
+                    vec: $id,
+                }
+                let halves = U { vec: self }.halves;
+                let h: $half = transmute($vpmax(
+                    transmute(halves.0),
+                    transmute(halves.1),
+                ));
+                h.any()
+            }
+        }
+    };
+}
+
+/// Mask reduction implementation for `arm` targets
+macro_rules! impl_mask_reductions {
+    // 128-bit wide masks
+    (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); };
+    (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); };
+    (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); };
+    // Fallback to LLVM's default code-generation:
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
new file mode 100644
index 000000000..25e5c813a
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
@@ -0,0 +1,6 @@
+//! Default mask reduction implementations.
+
+/// Default mask reduction implementation
+macro_rules! impl_mask_reductions {
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs
new file mode 100644
index 000000000..0d246e2fd
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback_impl.rs
@@ -0,0 +1,237 @@
+//! Default implementation of a mask reduction for any target.
+
+macro_rules! fallback_to_other_impl {
+    ($id:ident, $other:ident) => {
+        impl All for $id {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let m: $other = crate::mem::transmute(self);
+                m.all()
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let m: $other = crate::mem::transmute(self);
+                m.any()
+            }
+        }
+    };
+}
+
+/// Fallback implementation.
+macro_rules! fallback_impl {
+    // 16-bit wide masks:
+    (m8x2) => {
+        impl All for m8x2 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u16 = crate::mem::transmute(self);
+                i == u16::max_value()
+            }
+        }
+        impl Any for m8x2 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u16 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    // 32-bit wide masks
+    (m8x4) => {
+        impl All for m8x4 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u32 = crate::mem::transmute(self);
+                i == u32::max_value()
+            }
+        }
+        impl Any for m8x4 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u32 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    (m16x2) => {
+        fallback_to_other_impl!(m16x2, m8x4);
+    };
+    // 64-bit wide masks:
+    (m8x8) => {
+        impl All for m8x8 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u64 = crate::mem::transmute(self);
+                i == u64::max_value()
+            }
+        }
+        impl Any for m8x8 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u64 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    (m16x4) => {
+        fallback_to_other_impl!(m16x4, m8x8);
+    };
+    (m32x2) => {
+        fallback_to_other_impl!(m32x2, m16x4);
+    };
+    // FIXME: 64x1 maxk
+    // 128-bit wide masks:
+    (m8x16) => {
+        impl All for m8x16 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: u128 = crate::mem::transmute(self);
+                i == u128::max_value()
+            }
+        }
+        impl Any for m8x16 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: u128 = crate::mem::transmute(self);
+                i != 0
+            }
+        }
+    };
+    (m16x8) => {
+        fallback_to_other_impl!(m16x8, m8x16);
+    };
+    (m32x4) => {
+        fallback_to_other_impl!(m32x4, m16x8);
+    };
+    (m64x2) => {
+        fallback_to_other_impl!(m64x2, m32x4);
+    };
+    (m128x1) => {
+        fallback_to_other_impl!(m128x1, m64x2);
+    };
+    // 256-bit wide masks
+    (m8x32) => {
+        impl All for m8x32 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: [u128; 2] = crate::mem::transmute(self);
+                let o: [u128; 2] = [u128::max_value(); 2];
+                i == o
+            }
+        }
+        impl Any for m8x32 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: [u128; 2] = crate::mem::transmute(self);
+                let o: [u128; 2] = [0; 2];
+                i != o
+            }
+        }
+    };
+    (m16x16) => {
+        fallback_to_other_impl!(m16x16, m8x32);
+    };
+    (m32x8) => {
+        fallback_to_other_impl!(m32x8, m16x16);
+    };
+    (m64x4) => {
+        fallback_to_other_impl!(m64x4, m32x8);
+    };
+    (m128x2) => {
+        fallback_to_other_impl!(m128x2, m64x4);
+    };
+    // 512-bit wide masks
+    (m8x64) => {
+        impl All for m8x64 {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                let i: [u128; 4] = crate::mem::transmute(self);
+                let o: [u128; 4] = [u128::max_value(); 4];
+                i == o
+            }
+        }
+        impl Any for m8x64 {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                let i: [u128; 4] = crate::mem::transmute(self);
+                let o: [u128; 4] = [0; 4];
+                i != o
+            }
+        }
+    };
+    (m16x32) => {
+        fallback_to_other_impl!(m16x32, m8x64);
+    };
+    (m32x16) => {
+        fallback_to_other_impl!(m32x16, m16x32);
+    };
+    (m64x8) => {
+        fallback_to_other_impl!(m64x8, m32x16);
+    };
+    (m128x4) => {
+        fallback_to_other_impl!(m128x4, m64x8);
+    };
+    // Masks with pointer-sized elements64
+    (msizex2) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex2, m64x2);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex2, m32x2);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex4) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex4, m64x4);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex4, m32x4);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex8) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex8, m64x8);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex8, m32x8);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+}
+
+macro_rules! recurse_half {
+    ($vid:ident, $vid_h:ident) => {
+        impl All for $vid {
+            #[inline]
+            unsafe fn all(self) -> bool {
+                union U {
+                    halves: ($vid_h, $vid_h),
+                    vec: $vid,
+                }
+                let halves = U { vec: self }.halves;
+                halves.0.all() && halves.1.all()
+            }
+        }
+        impl Any for $vid {
+            #[inline]
+            unsafe fn any(self) -> bool {
+                union U {
+                    halves: ($vid_h, $vid_h),
+                    vec: $vid,
+                }
+                let halves = U { vec: self }.halves;
+                halves.0.any() || halves.1.any()
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
new file mode 100644
index 000000000..bcfb1a6e1
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
@@ -0,0 +1,188 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets
+
+#[cfg(target_feature = "sse")]
+#[macro_use]
+mod sse;
+
+#[cfg(target_feature = "sse2")]
+#[macro_use]
+mod sse2;
+
+#[cfg(target_feature = "avx")]
+#[macro_use]
+mod avx;
+
+#[cfg(target_feature = "avx2")]
+#[macro_use]
+mod avx2;
+
+/// x86 64-bit m8x8 implementation
+macro_rules! x86_m8x8_impl {
+    ($id:ident) => {
+        fallback_impl!($id);
+    };
+}
+
+/// x86 128-bit m8x16 implementation
+macro_rules! x86_m8x16_impl {
+    ($id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "sse2")] {
+                x86_m8x16_sse2_impl!($id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 128-bit m32x4 implementation
+macro_rules! x86_m32x4_impl {
+    ($id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "sse")] {
+                x86_m32x4_sse_impl!($id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 128-bit m64x2 implementation
+macro_rules! x86_m64x2_impl {
+    ($id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "sse2")] {
+                x86_m64x2_sse2_impl!($id);
+            } else if #[cfg(target_feature = "sse")] {
+                x86_m32x4_sse_impl!($id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 256-bit m8x32 implementation
+macro_rules! x86_m8x32_impl {
+    ($id:ident, $half_id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "avx2")] {
+                x86_m8x32_avx2_impl!($id);
+            } else if #[cfg(target_feature = "avx")] {
+                x86_m8x32_avx_impl!($id);
+            } else if #[cfg(target_feature = "sse2")] {
+                recurse_half!($id, $half_id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 256-bit m32x8 implementation
+macro_rules! x86_m32x8_impl {
+    ($id:ident, $half_id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "avx")] {
+                x86_m32x8_avx_impl!($id);
+            } else if #[cfg(target_feature = "sse")] {
+                recurse_half!($id, $half_id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// x86 256-bit m64x4 implementation
+macro_rules! x86_m64x4_impl {
+    ($id:ident, $half_id:ident) => {
+        cfg_if! {
+            if #[cfg(target_feature = "avx")] {
+                x86_m64x4_avx_impl!($id);
+            } else if #[cfg(target_feature = "sse")] {
+                recurse_half!($id, $half_id);
+            } else {
+                fallback_impl!($id);
+            }
+        }
+    };
+}
+
+/// Fallback implementation.
+macro_rules! x86_intr_impl {
+    ($id:ident) => {
+    impl All for $id {
+        #[inline]
+        unsafe fn all(self) -> bool {
+        use crate::llvm::simd_reduce_all;
+            simd_reduce_all(self.0)
+        }
+    }
+        impl Any for $id {
+            #[inline]
+            unsafe fn any(self) -> bool {
+            use crate::llvm::simd_reduce_any;
+                simd_reduce_any(self.0)
+            }
+        }
+    };
+}
+
+/// Mask reduction implementation for `x86` and `x86_64` targets
+macro_rules! impl_mask_reductions {
+    // 64-bit wide masks
+    (m8x8) => { x86_m8x8_impl!(m8x8); };
+    (m16x4) => { x86_m8x8_impl!(m16x4); };
+    (m32x2) => { x86_m8x8_impl!(m32x2); };
+    // 128-bit wide masks
+    (m8x16) => { x86_m8x16_impl!(m8x16); };
+    (m16x8) => { x86_m8x16_impl!(m16x8); };
+    (m32x4) => { x86_m32x4_impl!(m32x4); };
+    (m64x2) => { x86_m64x2_impl!(m64x2); };
+    (m128x1) => { x86_intr_impl!(m128x1); };
+    // 256-bit wide masks:
+    (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); };
+    (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); };
+    (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); };
+    (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); };
+    (m128x2) => { x86_intr_impl!(m128x2); };
+    (msizex2) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex2, m64x2);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex2, m32x2);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex4) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex4, m64x4);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex4, m32x4);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+    (msizex8) => {
+        cfg_if! {
+            if #[cfg(target_pointer_width = "64")] {
+                fallback_to_other_impl!(msizex8, m64x8);
+            } else if #[cfg(target_pointer_width = "32")] {
+                fallback_to_other_impl!(msizex8, m32x8);
+            } else {
+                compile_error!("unsupported target_pointer_width");
+            }
+        }
+    };
+
+    // Fallback to LLVM's default code-generation:
+    ($id:ident) => { fallback_impl!($id); };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
new file mode 100644
index 000000000..d18736fb0
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
@@ -0,0 +1,101 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX`
+
+/// `x86`/`x86_64` 256-bit `AVX` implementation
+/// FIXME: it might be faster here to do two `_mm_movmask_epi8`
+#[cfg(target_feature = "avx")]
+macro_rules! x86_m8x32_avx_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "avx")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_testc_si256;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_testc_si256;
+                _mm256_testc_si256(
+                    crate::mem::transmute(self),
+                    crate::mem::transmute($id::splat(true)),
+                ) != 0
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "avx")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_testz_si256;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_testz_si256;
+                _mm256_testz_si256(
+                    crate::mem::transmute(self),
+                    crate::mem::transmute(self),
+                ) == 0
+            }
+        }
+    };
+}
+
+/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation
+macro_rules! x86_m32x8_avx_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_ps;
+                // _mm256_movemask_ps(a) creates a 8bit mask containing the
+                // most significant bit of each lane of `a`. If all bits are
+                // set, then all 8 lanes of the mask are true.
+                _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_ps;
+
+                _mm256_movemask_ps(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
+
+/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation
+macro_rules! x86_m64x4_avx_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_pd;
+                // _mm256_movemask_pd(a) creates a 4bit mask containing the
+                // most significant bit of each lane of `a`. If all bits are
+                // set, then all 4 lanes of the mask are true.
+                _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_pd;
+
+                _mm256_movemask_pd(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs
new file mode 100644
index 000000000..d37d02342
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx2.rs
@@ -0,0 +1,35 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`.
+#![allow(unused)]
+
+/// x86/x86_64 256-bit m8x32 AVX2 implementation
+macro_rules! x86_m8x32_avx2_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_epi8;
+                // _mm256_movemask_epi8(a) creates a 32bit mask containing the
+                // most significant bit of each byte of `a`. If all
+                // bits are set, then all 32 lanes of the mask are
+                // true.
+                _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm256_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm256_movemask_epi8;
+
+                _mm256_movemask_epi8(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
new file mode 100644
index 000000000..eb1ef7fac
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
@@ -0,0 +1,36 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation
+macro_rules! x86_m32x4_sse_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_ps;
+                // _mm_movemask_ps(a) creates a 4bit mask containing the
+                // most significant bit of each lane of `a`. If all
+                // bits are set, then all 4 lanes of the mask are
+                // true.
+                _mm_movemask_ps(crate::mem::transmute(self))
+                    == 0b_1111_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_ps;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_ps;
+
+                _mm_movemask_ps(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
new file mode 100644
index 000000000..a99c606f5
--- /dev/null
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
@@ -0,0 +1,70 @@
+//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`.
+#![allow(unused)]
+
+/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation
+macro_rules! x86_m64x2_sse2_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_pd;
+                // _mm_movemask_pd(a) creates a 2bit mask containing the
+                // most significant bit of each lane of `a`. If all
+                // bits are set, then all 2 lanes of the mask are
+                // true.
+                _mm_movemask_pd(crate::mem::transmute(self))
+                    == 0b_11_i32
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_pd;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_pd;
+
+                _mm_movemask_pd(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
+
+/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation
+macro_rules! x86_m8x16_sse2_impl {
+    ($id:ident) => {
+        impl All for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn all(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_epi8;
+                // _mm_movemask_epi8(a) creates a 16bit mask containing the
+                // most significant bit of each byte of `a`. If all
+                // bits are set, then all 16 lanes of the mask are
+                // true.
+                _mm_movemask_epi8(crate::mem::transmute(self))
+                    == i32::from(u16::max_value())
+            }
+        }
+        impl Any for $id {
+            #[inline]
+            #[target_feature(enable = "sse2")]
+            unsafe fn any(self) -> bool {
+                #[cfg(target_arch = "x86")]
+                use crate::arch::x86::_mm_movemask_epi8;
+                #[cfg(target_arch = "x86_64")]
+                use crate::arch::x86_64::_mm_movemask_epi8;
+
+                _mm_movemask_epi8(crate::mem::transmute(self)) != 0
+            }
+        }
+    };
+}
-- 
cgit v1.2.3