5 files changed, 1106 insertions, 0 deletions
diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs
new file mode 100644
index 0000000000..5bad4f474b
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs
@@ -0,0 +1,151 @@
+//! Implements portable horizontal bitwise vector reductions.
+#![allow(unused)]
+
+macro_rules! impl_reduction_bitwise {
+    (
+        [$elem_ty:ident; $elem_count:expr]:
+        $id:ident | $ielem_ty:ident | $test_tt:tt |
+        ($convert:expr) |
+        ($true:expr, $false:expr)
+    ) => {
+        impl $id {
+            /// Lane-wise bitwise `and` of the vector elements.
+            ///
+            /// Note: if the vector has one lane, the first element of the
+            /// vector is returned.
+            #[inline]
+            pub fn and(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_and;
+                    let r: $ielem_ty = unsafe { simd_reduce_and(self.0) };
+                    $convert(r)
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on aarch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x &= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+
+            /// Lane-wise bitwise `or` of the vector elements.
+            ///
+            /// Note: if the vector has one lane, the first element of the
+            /// vector is returned.
+            #[inline]
+            pub fn or(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_or;
+                    let r: $ielem_ty = unsafe { simd_reduce_or(self.0) };
+                    $convert(r)
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on aarch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x |= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+
+            /// Lane-wise bitwise `xor` of the vector elements.
+            ///
+            /// Note: if the vector has one lane, the first element of the
+            /// vector is returned.
+            #[inline]
+            pub fn xor(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_xor;
+                    let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) };
+                    $convert(r)
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on aarch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x ^= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+        }
+
+        test_if!{
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _reduction_bitwise>] {
+                    use super::*;
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn and() {
+                        let v = $id::splat($false);
+                        assert_eq!(v.and(), $false);
+                        let v = $id::splat($true);
+                        assert_eq!(v.and(), $true);
+                        let v = $id::splat($false);
+                        let v = v.replace(0, $true);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.and(), $false);
+                        } else {
+                            assert_eq!(v.and(), $true);
+                        }
+                        let v = $id::splat($true);
+                        let v = v.replace(0, $false);
+                        assert_eq!(v.and(), $false);
+
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn or() {
+                        let v = $id::splat($false);
+                        assert_eq!(v.or(), $false);
+                        let v = $id::splat($true);
+                        assert_eq!(v.or(), $true);
+                        let v = $id::splat($false);
+                        let v = v.replace(0, $true);
+                        assert_eq!(v.or(), $true);
+                        let v = $id::splat($true);
+                        let v = v.replace(0, $false);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.or(), $true);
+                        } else {
+                            assert_eq!(v.or(), $false);
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn xor() {
+                        let v = $id::splat($false);
+                        assert_eq!(v.xor(), $false);
+                        let v = $id::splat($true);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.xor(), $false);
+                        } else {
+                            assert_eq!(v.xor(), $true);
+                        }
+                        let v = $id::splat($false);
+                        let v = v.replace(0, $true);
+                        assert_eq!(v.xor(), $true);
+                        let v = $id::splat($true);
+                        let v = v.replace(0, $false);
+                        if $id::lanes() > 1 {
+                            assert_eq!(v.xor(), $true);
+                        } else {
+                            assert_eq!(v.xor(), $false);
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
new file mode 100644
index 0000000000..9dc8783dbb
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs
@@ -0,0 +1,313 @@
+//! Implements portable horizontal float vector arithmetic reductions.
+
+macro_rules! impl_reduction_float_arithmetic {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Horizontal sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[inline]
+            pub fn sum(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_add_ordered;
+                    unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) }
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x += self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+
+            /// Horizontal product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// If one of the vector element is `NaN` the reduction returns
+            /// `NaN`. The resulting `NaN` is not required to be equal to any
+            /// of the `NaN`s in the vector.
+            #[inline]
+            pub fn product(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_mul_ordered;
+                    unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) }
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x *= self.extract(i) as $elem_ty;
+                    }
+                    x
+                }
+            }
+        }
+
+        impl crate::iter::Sum for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0.), crate::ops::Add::add)
+            }
+        }
+
+        impl crate::iter::Product for $id {
+            #[inline]
+            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1.), crate::ops::Mul::mul)
+            }
+        }
+
+        impl<'a> crate::iter::Sum<&'a $id> for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b))
+            }
+        }
+
+        impl<'a> crate::iter::Product<&'a $id> for $id {
+            #[inline]
+            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b))
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _reduction_float_arith>] {
+                    use super::*;
+                    fn alternating(x: usize) -> $id {
+                        let mut v = $id::splat(1 as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            if i % x == 0 {
+                                v = v.replace(i, 2 as $elem_ty);
+                            }
+                        }
+                        v
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn sum() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.sum(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.sum(), $id::lanes() as $elem_ty);
+                        let v = alternating(2);
+                        assert_eq!(
+                            v.sum(),
+                            ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+                        );
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn product() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.product(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.product(), 1 as $elem_ty);
+                        let f = match $id::lanes() {
+                            64 => 16,
+                            32 => 8,
+                            16 => 4,
+                            _ => 2,
+                        };
+                        let v = alternating(f);
+                        assert_eq!(
+                            v.product(),
+                            (2_usize.pow(($id::lanes() / f) as u32)
+                             as $elem_ty)
+                        );
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unreachable_code)]
+                    fn sum_nan() {
+                        // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
+                        // https://github.com/rust-lang-nursery/packed_simd/issues/6
+                        return;
+
+                        let n0 = crate::$elem_ty::NAN;
+                        let v0 = $id::splat(-3.0);
+                        for i in 0..$id::lanes() {
+                            let mut v = v0.replace(i, n0);
+                            // If the vector contains a NaN the result is NaN:
+                            assert!(
+                                v.sum().is_nan(),
+                                "nan at {} => {} | {:?}",
+                                i,
+                                v.sum(),
+                                v
+                            );
+                            for j in 0..i {
+                                v = v.replace(j, n0);
+                                assert!(v.sum().is_nan());
+                            }
+                        }
+                        let v = $id::splat(n0);
+                        assert!(v.sum().is_nan(), "all nans | {:?}", v);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unreachable_code)]
+                    fn product_nan() {
+                        // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732
+                        // https://github.com/rust-lang-nursery/packed_simd/issues/6
+                        return;
+
+                        let n0 = crate::$elem_ty::NAN;
+                        let v0 = $id::splat(-3.0);
+                        for i in 0..$id::lanes() {
+                            let mut v = v0.replace(i, n0);
+                            // If the vector contains a NaN the result is NaN:
+                            assert!(
+                                v.product().is_nan(),
+                                "nan at {} => {} | {:?}",
+                                i,
+                                v.product(),
+                                v
+                            );
+                            for j in 0..i {
+                                v = v.replace(j, n0);
+                                assert!(v.product().is_nan());
+                            }
+                        }
+                        let v = $id::splat(n0);
+                        assert!(v.product().is_nan(), "all nans | {:?}", v);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unused, dead_code)]
+                    fn sum_roundoff() {
+                        // Performs a tree-reduction
+                        fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty {
+                            assert!(!a.is_empty());
+                            if a.len() == 1 {
+                                a[0]
+                            } else if a.len() == 2 {
+                                a[0] + a[1]
+                            } else {
+                                let mid = a.len() / 2;
+                                let (left, right) = a.split_at(mid);
+                                tree_reduce_sum(left) + tree_reduce_sum(right)
+                            }
+                        }
+
+                        let mut start = crate::$elem_ty::EPSILON;
+                        let mut scalar_reduction = 0. as $elem_ty;
+
+                        let mut v = $id::splat(0. as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            let c = if i % 2 == 0 { 1e3 } else { -1. };
+                            start *= ::core::$elem_ty::consts::PI * c;
+                            scalar_reduction += start;
+                            v = v.replace(i, start);
+                        }
+                        let simd_reduction = v.sum();
+
+                        let mut a = [0. as $elem_ty; $id::lanes()];
+                        v.write_to_slice_unaligned(&mut a);
+                        let tree_reduction = tree_reduce_sum(&a);
+
+                        // tolerate 1 ULP difference:
+                        let red_bits = simd_reduction.to_bits();
+                        let tree_bits = tree_reduction.to_bits();
+                        assert!(
+                            if red_bits > tree_bits {
+                                red_bits - tree_bits
+                            } else {
+                                tree_bits - red_bits
+                            } < 2,
+                            "vector: {:?} | simd_reduction: {:?} | \
+tree_reduction: {} | scalar_reduction: {}",
+                            v,
+                            simd_reduction,
+                            tree_reduction,
+                            scalar_reduction
+                        );
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    #[allow(unused, dead_code)]
+                    fn product_roundoff() {
+                        use ::core::convert::TryInto;
+                        // Performs a tree-reduction
+                        fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty {
+                            assert!(!a.is_empty());
+                            if a.len() == 1 {
+                                a[0]
+                            } else if a.len() == 2 {
+                                a[0] * a[1]
+                            } else {
+                                let mid = a.len() / 2;
+                                let (left, right) = a.split_at(mid);
+                                tree_reduce_product(left)
+                                    * tree_reduce_product(right)
+                            }
+                        }
+
+                        let mut start = crate::$elem_ty::EPSILON;
+                        let mut scalar_reduction = 1. as $elem_ty;
+
+                        let mut v = $id::splat(0. as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            let c = if i % 2 == 0 { 1e3 } else { -1. };
+                            start *= ::core::$elem_ty::consts::PI * c;
+                            scalar_reduction *= start;
+                            v = v.replace(i, start);
+                        }
+                        let simd_reduction = v.product();
+
+                        let mut a = [0. as $elem_ty; $id::lanes()];
+                        v.write_to_slice_unaligned(&mut a);
+                        let tree_reduction = tree_reduce_product(&a);
+
+                        // FIXME: Too imprecise, even only for product(f32x8).
+                        // Figure out how to narrow this down.
+                        let ulp_limit = $id::lanes() / 2;
+                        let red_bits = simd_reduction.to_bits();
+                        let tree_bits = tree_reduction.to_bits();
+                        assert!(
+                            if red_bits > tree_bits {
+                                red_bits - tree_bits
+                            } else {
+                                tree_bits - red_bits
+                            } < ulp_limit.try_into().unwrap(),
+                            "vector: {:?} | simd_reduction: {:?} | \
+tree_reduction: {} | scalar_reduction: {}",
+                            v,
+                            simd_reduction,
+                            tree_reduction,
+                            scalar_reduction
+                        );
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
new file mode 100644
index 0000000000..e99e6cb5d7
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs
@@ -0,0 +1,193 @@
+//! Implements portable horizontal integer vector arithmetic reductions.
+
+macro_rules! impl_reduction_integer_arithmetic {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident
+     | $test_tt:tt) => {
+        impl $id {
+            /// Horizontal wrapping sum of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7))
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            #[inline]
+            pub fn wrapping_sum(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_add_ordered;
+                    let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) };
+                    v as $elem_ty
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x = x.wrapping_add(self.extract(i) as $elem_ty);
+                    }
+                    x
+                }
+            }
+
+            /// Horizontal wrapping product of the vector elements.
+            ///
+            /// The intrinsic performs a tree-reduction of the vector elements.
+            /// That is, for an 8 element vector:
+            ///
+            /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7))
+            ///
+            /// If an operation overflows it returns the mathematical result
+            /// modulo `2^n` where `n` is the number of times it overflows.
+            #[inline]
+            pub fn wrapping_product(self) -> $elem_ty {
+                #[cfg(not(target_arch = "aarch64"))]
+                {
+                    use crate::llvm::simd_reduce_mul_ordered;
+                    let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) };
+                    v as $elem_ty
+                }
+                #[cfg(target_arch = "aarch64")]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    let mut x = self.extract(0) as $elem_ty;
+                    for i in 1..$id::lanes() {
+                        x = x.wrapping_mul(self.extract(i) as $elem_ty);
+                    }
+                    x
+                }
+            }
+        }
+
+        impl crate::iter::Sum for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0), crate::ops::Add::add)
+            }
+        }
+
+        impl crate::iter::Product for $id {
+            #[inline]
+            fn product<I: Iterator<Item = $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1), crate::ops::Mul::mul)
+            }
+        }
+
+        impl<'a> crate::iter::Sum<&'a $id> for $id {
+            #[inline]
+            fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b))
+            }
+        }
+
+        impl<'a> crate::iter::Product<&'a $id> for $id {
+            #[inline]
+            fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id {
+                iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b))
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _reduction_int_arith>] {
+                    use super::*;
+
+                    fn alternating(x: usize) -> $id {
+                        let mut v = $id::splat(1 as $elem_ty);
+                        for i in 0..$id::lanes() {
+                            if i % x == 0 {
+                                v = v.replace(i, 2 as $elem_ty);
+                            }
+                        }
+                        v
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_sum() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.wrapping_sum(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty);
+                        let v = alternating(2);
+                        if $id::lanes() > 1 {
+                            assert_eq!(
+                                v.wrapping_sum(),
+                                ($id::lanes() / 2 + $id::lanes()) as $elem_ty
+                            );
+                        } else {
+                            assert_eq!(
+                                v.wrapping_sum(),
+                                2 as $elem_ty
+                            );
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_sum_overflow() {
+                        let start = $elem_ty::max_value()
+                            - ($id::lanes() as $elem_ty / 2);
+
+                        let v = $id::splat(start as $elem_ty);
+                        let vwrapping_sum = v.wrapping_sum();
+
+                        let mut wrapping_sum = start;
+                        for _ in 1..$id::lanes() {
+                            wrapping_sum = wrapping_sum.wrapping_add(start);
+                        }
+                        assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v);
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_product() {
+                        let v = $id::splat(0 as $elem_ty);
+                        assert_eq!(v.wrapping_product(), 0 as $elem_ty);
+                        let v = $id::splat(1 as $elem_ty);
+                        assert_eq!(v.wrapping_product(), 1 as $elem_ty);
+                        let f = match $id::lanes() {
+                            64 => 16,
+                            32 => 8,
+                            16 => 4,
+                            _ => 2,
+                        };
+                        let v = alternating(f);
+                        if $id::lanes() > 1 {
+                            assert_eq!(
+                                v.wrapping_product(),
+                                (2_usize.pow(($id::lanes() / f) as u32)
+                                 as $elem_ty)
+                            );
+                        } else {
+                            assert_eq!(
+                                v.wrapping_product(),
+                                2 as $elem_ty
+                            );
+                        }
+                    }
+
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn wrapping_product_overflow() {
+                        let start = $elem_ty::max_value()
+                            - ($id::lanes() as $elem_ty / 2);
+
+                        let v = $id::splat(start as $elem_ty);
+                        let vmul = v.wrapping_product();
+
+                        let mut mul = start;
+                        for _ in 1..$id::lanes() {
+                            mul = mul.wrapping_mul(start);
+                        }
+                        assert_eq!(mul, vmul, "v = {:?}", v);
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs
new file mode 100644
index 0000000000..0dd6a84e7e
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs
@@ -0,0 +1,89 @@
+//! Implements portable horizontal mask reductions.
+
+macro_rules! impl_reduction_mask {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        impl $id {
+            /// Are `all` vector lanes `true`?
+            #[inline]
+            pub fn all(self) -> bool {
+                unsafe { crate::codegen::reductions::mask::All::all(self) }
+            }
+            /// Is `any` vector lane `true`?
+            #[inline]
+            pub fn any(self) -> bool {
+                unsafe { crate::codegen::reductions::mask::Any::any(self) }
+            }
+            /// Are `all` vector lanes `false`?
+            #[inline]
+            pub fn none(self) -> bool {
+                !self.any()
+            }
+        }
+
+        test_if! {
+            $test_tt:
+            paste::item! {
+                pub mod [<$id _reduction>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn all() {
+                        let a = $id::splat(true);
+                        assert!(a.all());
+                        let a = $id::splat(false);
+                        assert!(!a.all());
+
+                        if $id::lanes() > 1 {
+                            for i in 0..$id::lanes() {
+                                let mut a = $id::splat(true);
+                                a = a.replace(i, false);
+                                assert!(!a.all());
+                                let mut a = $id::splat(false);
+                                a = a.replace(i, true);
+                                assert!(!a.all());
+                            }
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn any() {
+                        let a = $id::splat(true);
+                        assert!(a.any());
+                        let a = $id::splat(false);
+                        assert!(!a.any());
+
+                        if $id::lanes() > 1 {
+                            for i in 0..$id::lanes() {
+                                let mut a = $id::splat(true);
+                                a = a.replace(i, false);
+                                assert!(a.any());
+                                let mut a = $id::splat(false);
+                                a = a.replace(i, true);
+                                assert!(a.any());
+                            }
+                        }
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn none() {
+                        let a = $id::splat(true);
+                        assert!(!a.none());
+                        let a = $id::splat(false);
+                        assert!(a.none());
+
+                        if $id::lanes() > 1 {
+                            for i in 0..$id::lanes() {
+                                let mut a = $id::splat(true);
+                                a = a.replace(i, false);
+                                assert!(!a.none());
+                                let mut a = $id::splat(false);
+                                a = a.replace(i, true);
+                                assert!(!a.none());
+                            }
+                        }
+                    }
+                }
+            }
+        }
+    };
+}
diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
new file mode 100644
index 0000000000..a3ce13a451
--- /dev/null
+++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs
@@ -0,0 +1,360 @@
+//! Implements portable horizontal vector min/max reductions.
+
+macro_rules! impl_reduction_min_max {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident
+     | $ielem_ty:ident | $test_tt:tt) => {
+        impl $id {
+            /// Largest vector element value.
+            #[inline]
+            pub fn max_element(self) -> $elem_ty {
+                #[cfg(not(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                )))]
+                {
+                    use crate::llvm::simd_reduce_max;
+                    let v: $ielem_ty = unsafe { simd_reduce_max(self.0) };
+                    v as $elem_ty
+                }
+                #[cfg(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                ))]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    // FIXME: broken on WASM32
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/91
+                    let mut x = self.extract(0);
+                    for i in 1..$id::lanes() {
+                        x = x.max(self.extract(i));
+                    }
+                    x
+                }
+            }
+
+            /// Smallest vector element value.
+            #[inline]
+            pub fn min_element(self) -> $elem_ty {
+                #[cfg(not(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    all(target_arch = "x86", not(target_feature = "sse2")),
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                ),))]
+                {
+                    use crate::llvm::simd_reduce_min;
+                    let v: $ielem_ty = unsafe { simd_reduce_min(self.0) };
+                    v as $elem_ty
+                }
+                #[cfg(any(
+                    target_arch = "aarch64",
+                    target_arch = "arm",
+                    all(target_arch = "x86", not(target_feature = "sse2")),
+                    target_arch = "powerpc64",
+                    target_arch = "wasm32",
+                ))]
+                {
+                    // FIXME: broken on AArch64
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/15
+                    // FIXME: broken on i586-unknown-linux-gnu
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/22
+                    // FIXME: broken on WASM32
+                    // https://github.com/rust-lang-nursery/packed_simd/issues/91
+                    let mut x = self.extract(0);
+                    for i in 1..$id::lanes() {
+                        x = x.min(self.extract(i));
+                    }
+                    x
+                }
+            }
+        }
+        test_if! {$test_tt:
+        paste::item! {
+            // Comparisons use integer casts within mantissa^1 range.
+            #[allow(clippy::float_cmp)]
+            pub mod [<$id _reduction_min_max>] {
+                use super::*;
+                #[cfg_attr(not(target_arch = "wasm32"), test)]
+                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                pub fn max_element() {
+                    let v = $id::splat(0 as $elem_ty);
+                    assert_eq!(v.max_element(), 0 as $elem_ty);
+                    if $id::lanes() > 1 {
+                        let v = v.replace(1, 1 as $elem_ty);
+                        assert_eq!(v.max_element(), 1 as $elem_ty);
+                    }
+                    let v = v.replace(0, 2 as $elem_ty);
+                    assert_eq!(v.max_element(), 2 as $elem_ty);
+                }
+
+                #[cfg_attr(not(target_arch = "wasm32"), test)]
+                #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                pub fn min_element() {
+                    let v = $id::splat(0 as $elem_ty);
+                    assert_eq!(v.min_element(), 0 as $elem_ty);
+                    if $id::lanes() > 1 {
+                        let v = v.replace(1, 1 as $elem_ty);
+                        assert_eq!(v.min_element(), 0 as $elem_ty);
+                    }
+                    let v = $id::splat(1 as $elem_ty);
+                    let v = v.replace(0, 2 as $elem_ty);
+                    if $id::lanes() > 1 {
+                        assert_eq!(v.min_element(), 1 as $elem_ty);
+                    } else {
+                        assert_eq!(v.min_element(), 2 as $elem_ty);
+                    }
+                    if $id::lanes() > 1 {
+                        let v = $id::splat(2 as $elem_ty);
+                        let v = v.replace(1, 1 as $elem_ty);
+                        assert_eq!(v.min_element(), 1 as $elem_ty);
+                    }
+                }
+            }
+        }
+        }
+    };
+}
+
+macro_rules! test_reduction_float_min_max {
+    ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
+        test_if! {
+            $test_tt:
+            paste::item! {
+                // Comparisons use integer casts within mantissa^1 range.
+                #[allow(clippy::float_cmp)]
+                pub mod [<$id _reduction_min_max_nan>] {
+                    use super::*;
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn min_element_test() {
+                        let n = crate::$elem_ty::NAN;
+
+                        assert_eq!(n.min(-3.), -3.);
+                        assert_eq!((-3. as $elem_ty).min(n), -3.);
+
+                        let v0 = $id::splat(-3.);
+
+                        let target_with_broken_last_lane_nan = !cfg!(any(
+                            target_arch = "arm", target_arch = "aarch64",
+                            all(target_arch = "x86",
+                                not(target_feature = "sse2")
+                            ),
+                            target_arch = "powerpc64",
+                            target_arch = "wasm32",
+                        ));
+
+                        // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
+                        for i in 0..$id::lanes() {
+                            // We replace the i-th element of the vector with
+                            // `NaN`: [-3, -3, -3, NaN]
+                            let mut v = v0.replace(i, n);
+
+                            // If the NaN is in the last place, the LLVM
+                            // implementation of these methods is broken on some
+                            // targets:
+                            if i == $id::lanes() - 1 &&
+                                target_with_broken_last_lane_nan {
+                                    assert_eq!(v.min_element(), -3.,
+                                            "[A]: nan at {} => {} | {:?}",
+                                            i, v.min_element(), v);
+
+                                // If we replace all the elements in the vector
+                                // up-to the `i-th` lane with `NaN`s, the result
+                                // is still always `-3.` unless all elements of
+                                // the vector are `NaN`s:
+                                for j in 0..i {
+                                    v = v.replace(j, n);
+                                    if j == i-1 {
+                                        assert!(v.min_element().is_nan(),
+                                            "[B]: nan at {} => {} | {:?}",
+                                            i, v.min_element(), v);
+                                    } else {
+                                        assert_eq!(v.min_element(), -3.,
+                                            "[B]: nan at {} => {} | {:?}",
+                                            i, v.min_element(), v);
+                                    }
+                                }
+
+                                // We are done here, since we were in the last
+                                // lane which is the last iteration of the loop.
+                                break
+                            }
+
+                            // We are not in the last lane, and there is only
+                            // one `NaN` in the vector.
+
+                            // If the vector has one lane, the result is `NaN`:
+                            if $id::lanes() == 1 {
+                                assert!(v.min_element().is_nan(),
+                                        "[C]: all nans | v={:?} | min={} | \
+is_nan: {}",
+                                        v, v.min_element(),
+                                        v.min_element().is_nan()
+                                );
+
+                                // And we are done, since the vector only has
+                                // one lane anyways.
+                                break;
+                            }
+
+                            // The vector has more than one lane, since there is
+                            // only one `NaN` in the vector, the result is
+                            // always `-3`.
+                            assert_eq!(v.min_element(), -3.,
+                                       "[D]: nan at {} => {} | {:?}",
+                                       i, v.min_element(), v);
+
+                            // If we replace all the elements in the vector
+                            // up-to the `i-th` lane with `NaN`s, the result is
+                            // still always `-3.` unless all elements of the
+                            // vector are `NaN`s:
+                            for j in 0..i {
+                                v = v.replace(j, n);
+
+                                if i == $id::lanes() - 1 && j == i - 1 {
+                                    // All elements of the vector are `NaN`s,
+                                    // therefore the result is NaN as well.
+                                    //
+                                    // Note: the #lanes of the vector is > 1, so
+                                    // "i - 1" does not overflow.
+                                    assert!(v.min_element().is_nan(),
+                                            "[E]: all nans | v={:?} | min={} | \
+is_nan: {}",
+                                            v, v.min_element(),
+                                            v.min_element().is_nan());
+                                } else {
+                                    // There are non-`NaN` elements in the
+                                    // vector, therefore the result is `-3.`:
+                                    assert_eq!(v.min_element(), -3.,
+                                               "[F]: nan at {} => {} | {:?}",
+                                               i, v.min_element(), v);
+                                }
+                            }
+                        }
+
+                        // If the vector contains all NaNs the result is NaN:
+                        assert!($id::splat(n).min_element().is_nan(),
+                                "all nans | v={:?} | min={} | is_nan: {}",
+                                $id::splat(n), $id::splat(n).min_element(),
+                                $id::splat(n).min_element().is_nan());
+                    }
+                    #[cfg_attr(not(target_arch = "wasm32"), test)]
+                    #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
+                    fn max_element_test() {
+                        let n = crate::$elem_ty::NAN;
+
+                        assert_eq!(n.max(-3.), -3.);
+                        assert_eq!((-3. as $elem_ty).max(n), -3.);
+
+                        let v0 = $id::splat(-3.);
+
+                        let target_with_broken_last_lane_nan = !cfg!(any(
+                            target_arch = "arm", target_arch = "aarch64",
+                            target_arch = "powerpc64", target_arch = "wasm32",
+                        ));
+
+                        // The vector is initialized to `-3.`s: [-3, -3, -3, -3]
+                        for i in 0..$id::lanes() {
+                            // We replace the i-th element of the vector with
+                            // `NaN`: [-3, -3, -3, NaN]
+                            let mut v = v0.replace(i, n);
+
+                            // If the NaN is in the last place, the LLVM
+                            // implementation of these methods is broken on some
+                            // targets:
+                            if i == $id::lanes() - 1 &&
+                              target_with_broken_last_lane_nan {
+                                assert_eq!(v.max_element(), -3.,
+                                        "[A]: nan at {} => {} | {:?}",
+                                        i, v.max_element(), v);
+
+                                // If we replace all the elements in the vector
+                                // up-to the `i-th` lane with `NaN`s, the result
+                                // is still always `-3.` unless all elements of
+                                // the vector are `NaN`s:
+                                for j in 0..i {
+                                    v = v.replace(j, n);
+                                    if j == i-1 {
+                                        assert!(v.min_element().is_nan(),
+                                        "[B]: nan at {} => {} | {:?}",
+                                        i, v.min_element(), v);
+                                    } else {
+                                        assert_eq!(v.max_element(), -3.,
+                                            "[B]: nan at {} => {} | {:?}",
+                                            i, v.max_element(), v);
+                                    }
+                                }
+
+                                // We are done here, since we were in the last
+                                // lane which is the last iteration of the loop.
+                                break
+                            }
+
+                            // We are not in the last lane, and there is only
+                            // one `NaN` in the vector.
+
+                            // If the vector has one lane, the result is `NaN`:
+                            if $id::lanes() == 1 {
+                                assert!(v.max_element().is_nan(),
+                                        "[C]: all nans | v={:?} | min={} | \
+is_nan: {}",
+                                        v, v.max_element(),
+                                        v.max_element().is_nan());
+
+                                // And we are done, since the vector only has
+                                // one lane anyways.
+                                break;
+                            }
+
+                            // The vector has more than one lane, since there is
+                            // only one `NaN` in the vector, the result is
+                            // always `-3`.
+                            assert_eq!(v.max_element(), -3.,
+                                       "[D]: nan at {} => {} | {:?}",
+                                       i, v.max_element(), v);
+
+                            // If we replace all the elements in the vector
+                            // up-to the `i-th` lane with `NaN`s, the result is
+                            // still always `-3.` unless all elements of the
+                            // vector are `NaN`s:
+                            for j in 0..i {
+                                v = v.replace(j, n);
+
+                                if i == $id::lanes() - 1 && j == i - 1 {
+                                    // All elements of the vector are `NaN`s,
+                                    // therefore the result is NaN as well.
+                                    //
+                                    // Note: the #lanes of the vector is > 1, so
+                                    // "i - 1" does not overflow.
+                                    assert!(v.max_element().is_nan(),
+                                            "[E]: all nans | v={:?} | max={} | \
+is_nan: {}",
+                                            v, v.max_element(),
+                                            v.max_element().is_nan());
+                                } else {
+                                    // There are non-`NaN` elements in the
+                                    // vector, therefore the result is `-3.`:
+                                    assert_eq!(v.max_element(), -3.,
+                                               "[F]: nan at {} => {} | {:?}",
+                                               i, v.max_element(), v);
+                                }
+                            }
+                        }
+
+                        // If the vector contains all NaNs the result is NaN:
+                        assert!($id::splat(n).max_element().is_nan(),
+                                "all nans | v={:?} | max={} | is_nan: {}",
+                                $id::splat(n), $id::splat(n).max_element(),
+                                $id::splat(n).max_element().is_nan());
+                    }
+                }
+            }
+        }
+    };
+}