From ef24de24a82fe681581cc130f342363c47c0969a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 7 Jun 2024 07:48:48 +0200 Subject: Merging upstream version 1.75.0+dfsg1. Signed-off-by: Daniel Baumann --- vendor/packed_simd/src/api/reductions/bitwise.rs | 151 +++++++++ .../src/api/reductions/float_arithmetic.rs | 313 ++++++++++++++++++ .../src/api/reductions/integer_arithmetic.rs | 193 +++++++++++ vendor/packed_simd/src/api/reductions/mask.rs | 89 +++++ vendor/packed_simd/src/api/reductions/min_max.rs | 360 +++++++++++++++++++++ 5 files changed, 1106 insertions(+) create mode 100644 vendor/packed_simd/src/api/reductions/bitwise.rs create mode 100644 vendor/packed_simd/src/api/reductions/float_arithmetic.rs create mode 100644 vendor/packed_simd/src/api/reductions/integer_arithmetic.rs create mode 100644 vendor/packed_simd/src/api/reductions/mask.rs create mode 100644 vendor/packed_simd/src/api/reductions/min_max.rs (limited to 'vendor/packed_simd/src/api/reductions') diff --git a/vendor/packed_simd/src/api/reductions/bitwise.rs b/vendor/packed_simd/src/api/reductions/bitwise.rs new file mode 100644 index 000000000..5bad4f474 --- /dev/null +++ b/vendor/packed_simd/src/api/reductions/bitwise.rs @@ -0,0 +1,151 @@ +//! Implements portable horizontal bitwise vector reductions. +#![allow(unused)] + +macro_rules! impl_reduction_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $ielem_ty:ident | $test_tt:tt | + ($convert:expr) | + ($true:expr, $false:expr) + ) => { + impl $id { + /// Lane-wise bitwise `and` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn and(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_and; + let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x &= self.extract(i) as $elem_ty; + } + x + } + } + + /// Lane-wise bitwise `or` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn or(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_or; + let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x |= self.extract(i) as $elem_ty; + } + x + } + } + + /// Lane-wise bitwise `xor` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn xor(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_xor; + let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x ^= self.extract(i) as $elem_ty; + } + x + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _reduction_bitwise>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn and() { + let v = $id::splat($false); + assert_eq!(v.and(), $false); + let v = $id::splat($true); + assert_eq!(v.and(), $true); + let v = $id::splat($false); + let v = v.replace(0, $true); + if $id::lanes() > 1 { + assert_eq!(v.and(), $false); + } else { + assert_eq!(v.and(), $true); + } + let v = $id::splat($true); + let v = v.replace(0, $false); + assert_eq!(v.and(), $false); + + } + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn or() { + let v = $id::splat($false); + assert_eq!(v.or(), $false); + let v = $id::splat($true); + assert_eq!(v.or(), $true); + let v = $id::splat($false); + let v = v.replace(0, $true); + assert_eq!(v.or(), $true); + let v = $id::splat($true); + let v = v.replace(0, $false); + if $id::lanes() > 1 { + assert_eq!(v.or(), $true); + } else { + assert_eq!(v.or(), $false); + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn xor() { + let v = $id::splat($false); + assert_eq!(v.xor(), $false); + let v = $id::splat($true); + if $id::lanes() > 1 { + assert_eq!(v.xor(), $false); + } else { + assert_eq!(v.xor(), $true); + } + let v = $id::splat($false); + let v = v.replace(0, $true); + assert_eq!(v.xor(), $true); + let v = $id::splat($true); + let v = v.replace(0, $false); + if $id::lanes() > 1 { + assert_eq!(v.xor(), $true); + } else { + assert_eq!(v.xor(), $false); + } + } + } + } + } + }; +} diff --git a/vendor/packed_simd/src/api/reductions/float_arithmetic.rs b/vendor/packed_simd/src/api/reductions/float_arithmetic.rs new file mode 100644 index 000000000..9dc8783db --- /dev/null +++ b/vendor/packed_simd/src/api/reductions/float_arithmetic.rs @@ -0,0 +1,313 @@ +//! Implements portable horizontal float vector arithmetic reductions. + +macro_rules! impl_reduction_float_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Horizontal sum of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) + /// + /// If one of the vector element is `NaN` the reduction returns + /// `NaN`. The resulting `NaN` is not required to be equal to any + /// of the `NaN`s in the vector. + #[inline] + pub fn sum(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_add_ordered; + unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x += self.extract(i) as $elem_ty; + } + x + } + } + + /// Horizontal product of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) + /// + /// If one of the vector element is `NaN` the reduction returns + /// `NaN`. The resulting `NaN` is not required to be equal to any + /// of the `NaN`s in the vector. + #[inline] + pub fn product(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_mul_ordered; + unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x *= self.extract(i) as $elem_ty; + } + x + } + } + } + + impl crate::iter::Sum for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0.), crate::ops::Add::add) + } + } + + impl crate::iter::Product for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1.), crate::ops::Mul::mul) + } + } + + impl<'a> crate::iter::Sum<&'a $id> for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) + } + } + + impl<'a> crate::iter::Product<&'a $id> for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) + } + } + + test_if! { + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _reduction_float_arith>] { + use super::*; + fn alternating(x: usize) -> $id { + let mut v = $id::splat(1 as $elem_ty); + for i in 0..$id::lanes() { + if i % x == 0 { + v = v.replace(i, 2 as $elem_ty); + } + } + v + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sum() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.sum(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.sum(), $id::lanes() as $elem_ty); + let v = alternating(2); + assert_eq!( + v.sum(), + ($id::lanes() / 2 + $id::lanes()) as $elem_ty + ); + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn product() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.product(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.product(), 1 as $elem_ty); + let f = match $id::lanes() { + 64 => 16, + 32 => 8, + 16 => 4, + _ => 2, + }; + let v = alternating(f); + assert_eq!( + v.product(), + (2_usize.pow(($id::lanes() / f) as u32) + as $elem_ty) + ); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unreachable_code)] + fn sum_nan() { + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 + // https://github.com/rust-lang-nursery/packed_simd/issues/6 + return; + + let n0 = crate::$elem_ty::NAN; + let v0 = $id::splat(-3.0); + for i in 0..$id::lanes() { + let mut v = v0.replace(i, n0); + // If the vector contains a NaN the result is NaN: + assert!( + v.sum().is_nan(), + "nan at {} => {} | {:?}", + i, + v.sum(), + v + ); + for j in 0..i { + v = v.replace(j, n0); + assert!(v.sum().is_nan()); + } + } + let v = $id::splat(n0); + assert!(v.sum().is_nan(), "all nans | {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unreachable_code)] + fn product_nan() { + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 + // https://github.com/rust-lang-nursery/packed_simd/issues/6 + return; + + let n0 = crate::$elem_ty::NAN; + let v0 = $id::splat(-3.0); + for i in 0..$id::lanes() { + let mut v = v0.replace(i, n0); + // If the vector contains a NaN the result is NaN: + assert!( + v.product().is_nan(), + "nan at {} => {} | {:?}", + i, + v.product(), + v + ); + for j in 0..i { + v = v.replace(j, n0); + assert!(v.product().is_nan()); + } + } + let v = $id::splat(n0); + assert!(v.product().is_nan(), "all nans | {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unused, dead_code)] + fn sum_roundoff() { + // Performs a tree-reduction + fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { + assert!(!a.is_empty()); + if a.len() == 1 { + a[0] + } else if a.len() == 2 { + a[0] + a[1] + } else { + let mid = a.len() / 2; + let (left, right) = a.split_at(mid); + tree_reduce_sum(left) + tree_reduce_sum(right) + } + } + + let mut start = crate::$elem_ty::EPSILON; + let mut scalar_reduction = 0. as $elem_ty; + + let mut v = $id::splat(0. as $elem_ty); + for i in 0..$id::lanes() { + let c = if i % 2 == 0 { 1e3 } else { -1. }; + start *= ::core::$elem_ty::consts::PI * c; + scalar_reduction += start; + v = v.replace(i, start); + } + let simd_reduction = v.sum(); + + let mut a = [0. as $elem_ty; $id::lanes()]; + v.write_to_slice_unaligned(&mut a); + let tree_reduction = tree_reduce_sum(&a); + + // tolerate 1 ULP difference: + let red_bits = simd_reduction.to_bits(); + let tree_bits = tree_reduction.to_bits(); + assert!( + if red_bits > tree_bits { + red_bits - tree_bits + } else { + tree_bits - red_bits + } < 2, + "vector: {:?} | simd_reduction: {:?} | \ +tree_reduction: {} | scalar_reduction: {}", + v, + simd_reduction, + tree_reduction, + scalar_reduction + ); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unused, dead_code)] + fn product_roundoff() { + use ::core::convert::TryInto; + // Performs a tree-reduction + fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { + assert!(!a.is_empty()); + if a.len() == 1 { + a[0] + } else if a.len() == 2 { + a[0] * a[1] + } else { + let mid = a.len() / 2; + let (left, right) = a.split_at(mid); + tree_reduce_product(left) + * tree_reduce_product(right) + } + } + + let mut start = crate::$elem_ty::EPSILON; + let mut scalar_reduction = 1. as $elem_ty; + + let mut v = $id::splat(0. as $elem_ty); + for i in 0..$id::lanes() { + let c = if i % 2 == 0 { 1e3 } else { -1. }; + start *= ::core::$elem_ty::consts::PI * c; + scalar_reduction *= start; + v = v.replace(i, start); + } + let simd_reduction = v.product(); + + let mut a = [0. as $elem_ty; $id::lanes()]; + v.write_to_slice_unaligned(&mut a); + let tree_reduction = tree_reduce_product(&a); + + // FIXME: Too imprecise, even only for product(f32x8). + // Figure out how to narrow this down. + let ulp_limit = $id::lanes() / 2; + let red_bits = simd_reduction.to_bits(); + let tree_bits = tree_reduction.to_bits(); + assert!( + if red_bits > tree_bits { + red_bits - tree_bits + } else { + tree_bits - red_bits + } < ulp_limit.try_into().unwrap(), + "vector: {:?} | simd_reduction: {:?} | \ +tree_reduction: {} | scalar_reduction: {}", + v, + simd_reduction, + tree_reduction, + scalar_reduction + ); + } + } + } + } + }; +} diff --git a/vendor/packed_simd/src/api/reductions/integer_arithmetic.rs b/vendor/packed_simd/src/api/reductions/integer_arithmetic.rs new file mode 100644 index 000000000..e99e6cb5d --- /dev/null +++ b/vendor/packed_simd/src/api/reductions/integer_arithmetic.rs @@ -0,0 +1,193 @@ +//! Implements portable horizontal integer vector arithmetic reductions. + +macro_rules! impl_reduction_integer_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident + | $test_tt:tt) => { + impl $id { + /// Horizontal wrapping sum of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) + /// + /// If an operation overflows it returns the mathematical result + /// modulo `2^n` where `n` is the number of times it overflows. + #[inline] + pub fn wrapping_sum(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_add_ordered; + let v: $ielem_ty = unsafe { simd_reduce_add_ordered(self.0, 0 as $ielem_ty) }; + v as $elem_ty + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x = x.wrapping_add(self.extract(i) as $elem_ty); + } + x + } + } + + /// Horizontal wrapping product of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) + /// + /// If an operation overflows it returns the mathematical result + /// modulo `2^n` where `n` is the number of times it overflows. + #[inline] + pub fn wrapping_product(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_mul_ordered; + let v: $ielem_ty = unsafe { simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) }; + v as $elem_ty + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x = x.wrapping_mul(self.extract(i) as $elem_ty); + } + x + } + } + } + + impl crate::iter::Sum for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0), crate::ops::Add::add) + } + } + + impl crate::iter::Product for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1), crate::ops::Mul::mul) + } + } + + impl<'a> crate::iter::Sum<&'a $id> for $id { + #[inline] + fn sum>(iter: I) -> $id { + iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) + } + } + + impl<'a> crate::iter::Product<&'a $id> for $id { + #[inline] + fn product>(iter: I) -> $id { + iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction_int_arith>] { + use super::*; + + fn alternating(x: usize) -> $id { + let mut v = $id::splat(1 as $elem_ty); + for i in 0..$id::lanes() { + if i % x == 0 { + v = v.replace(i, 2 as $elem_ty); + } + } + v + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_sum() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.wrapping_sum(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); + let v = alternating(2); + if $id::lanes() > 1 { + assert_eq!( + v.wrapping_sum(), + ($id::lanes() / 2 + $id::lanes()) as $elem_ty + ); + } else { + assert_eq!( + v.wrapping_sum(), + 2 as $elem_ty + ); + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_sum_overflow() { + let start = $elem_ty::max_value() + - ($id::lanes() as $elem_ty / 2); + + let v = $id::splat(start as $elem_ty); + let vwrapping_sum = v.wrapping_sum(); + + let mut wrapping_sum = start; + for _ in 1..$id::lanes() { + wrapping_sum = wrapping_sum.wrapping_add(start); + } + assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_product() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.wrapping_product(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.wrapping_product(), 1 as $elem_ty); + let f = match $id::lanes() { + 64 => 16, + 32 => 8, + 16 => 4, + _ => 2, + }; + let v = alternating(f); + if $id::lanes() > 1 { + assert_eq!( + v.wrapping_product(), + (2_usize.pow(($id::lanes() / f) as u32) + as $elem_ty) + ); + } else { + assert_eq!( + v.wrapping_product(), + 2 as $elem_ty + ); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_product_overflow() { + let start = $elem_ty::max_value() + - ($id::lanes() as $elem_ty / 2); + + let v = $id::splat(start as $elem_ty); + let vmul = v.wrapping_product(); + + let mut mul = start; + for _ in 1..$id::lanes() { + mul = mul.wrapping_mul(start); + } + assert_eq!(mul, vmul, "v = {:?}", v); + } + } + } + } + }; +} diff --git a/vendor/packed_simd/src/api/reductions/mask.rs b/vendor/packed_simd/src/api/reductions/mask.rs new file mode 100644 index 000000000..0dd6a84e7 --- /dev/null +++ b/vendor/packed_simd/src/api/reductions/mask.rs @@ -0,0 +1,89 @@ +//! Implements portable horizontal mask reductions. + +macro_rules! impl_reduction_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Are `all` vector lanes `true`? + #[inline] + pub fn all(self) -> bool { + unsafe { crate::codegen::reductions::mask::All::all(self) } + } + /// Is `any` vector lane `true`? + #[inline] + pub fn any(self) -> bool { + unsafe { crate::codegen::reductions::mask::Any::any(self) } + } + /// Are `all` vector lanes `false`? + #[inline] + pub fn none(self) -> bool { + !self.any() + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn all() { + let a = $id::splat(true); + assert!(a.all()); + let a = $id::splat(false); + assert!(!a.all()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(!a.all()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(!a.all()); + } + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn any() { + let a = $id::splat(true); + assert!(a.any()); + let a = $id::splat(false); + assert!(!a.any()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(a.any()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(a.any()); + } + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn none() { + let a = $id::splat(true); + assert!(!a.none()); + let a = $id::splat(false); + assert!(a.none()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(!a.none()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(!a.none()); + } + } + } + } + } + } + }; +} diff --git a/vendor/packed_simd/src/api/reductions/min_max.rs b/vendor/packed_simd/src/api/reductions/min_max.rs new file mode 100644 index 000000000..a3ce13a45 --- /dev/null +++ b/vendor/packed_simd/src/api/reductions/min_max.rs @@ -0,0 +1,360 @@ +//! Implements portable horizontal vector min/max reductions. + +macro_rules! impl_reduction_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident + | $ielem_ty:ident | $test_tt:tt) => { + impl $id { + /// Largest vector element value. + #[inline] + pub fn max_element(self) -> $elem_ty { + #[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "wasm32", + )))] + { + use crate::llvm::simd_reduce_max; + let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; + v as $elem_ty + } + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "wasm32", + ))] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + // FIXME: broken on WASM32 + // https://github.com/rust-lang-nursery/packed_simd/issues/91 + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.max(self.extract(i)); + } + x + } + } + + /// Smallest vector element value. + #[inline] + pub fn min_element(self) -> $elem_ty { + #[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + all(target_arch = "x86", not(target_feature = "sse2")), + target_arch = "powerpc64", + target_arch = "wasm32", + ),))] + { + use crate::llvm::simd_reduce_min; + let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; + v as $elem_ty + } + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + all(target_arch = "x86", not(target_feature = "sse2")), + target_arch = "powerpc64", + target_arch = "wasm32", + ))] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + // FIXME: broken on i586-unknown-linux-gnu + // https://github.com/rust-lang-nursery/packed_simd/issues/22 + // FIXME: broken on WASM32 + // https://github.com/rust-lang-nursery/packed_simd/issues/91 + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.min(self.extract(i)); + } + x + } + } + } + test_if! {$test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _reduction_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + pub fn max_element() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.max_element(), 0 as $elem_ty); + if $id::lanes() > 1 { + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.max_element(), 1 as $elem_ty); + } + let v = v.replace(0, 2 as $elem_ty); + assert_eq!(v.max_element(), 2 as $elem_ty); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + pub fn min_element() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.min_element(), 0 as $elem_ty); + if $id::lanes() > 1 { + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.min_element(), 0 as $elem_ty); + } + let v = $id::splat(1 as $elem_ty); + let v = v.replace(0, 2 as $elem_ty); + if $id::lanes() > 1 { + assert_eq!(v.min_element(), 1 as $elem_ty); + } else { + assert_eq!(v.min_element(), 2 as $elem_ty); + } + if $id::lanes() > 1 { + let v = $id::splat(2 as $elem_ty); + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.min_element(), 1 as $elem_ty); + } + } + } + } + } + }; +} + +macro_rules! test_reduction_float_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _reduction_min_max_nan>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_element_test() { + let n = crate::$elem_ty::NAN; + + assert_eq!(n.min(-3.), -3.); + assert_eq!((-3. as $elem_ty).min(n), -3.); + + let v0 = $id::splat(-3.); + + let target_with_broken_last_lane_nan = !cfg!(any( + target_arch = "arm", target_arch = "aarch64", + all(target_arch = "x86", + not(target_feature = "sse2") + ), + target_arch = "powerpc64", + target_arch = "wasm32", + )); + + // The vector is initialized to `-3.`s: [-3, -3, -3, -3] + for i in 0..$id::lanes() { + // We replace the i-th element of the vector with + // `NaN`: [-3, -3, -3, NaN] + let mut v = v0.replace(i, n); + + // If the NaN is in the last place, the LLVM + // implementation of these methods is broken on some + // targets: + if i == $id::lanes() - 1 && + target_with_broken_last_lane_nan { + assert_eq!(v.min_element(), -3., + "[A]: nan at {} => {} | {:?}", + i, v.min_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result + // is still always `-3.` unless all elements of + // the vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + if j == i-1 { + assert!(v.min_element().is_nan(), + "[B]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } else { + assert_eq!(v.min_element(), -3., + "[B]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } + } + + // We are done here, since we were in the last + // lane which is the last iteration of the loop. + break + } + + // We are not in the last lane, and there is only + // one `NaN` in the vector. + + // If the vector has one lane, the result is `NaN`: + if $id::lanes() == 1 { + assert!(v.min_element().is_nan(), + "[C]: all nans | v={:?} | min={} | \ +is_nan: {}", + v, v.min_element(), + v.min_element().is_nan() + ); + + // And we are done, since the vector only has + // one lane anyways. + break; + } + + // The vector has more than one lane, since there is + // only one `NaN` in the vector, the result is + // always `-3`. + assert_eq!(v.min_element(), -3., + "[D]: nan at {} => {} | {:?}", + i, v.min_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result is + // still always `-3.` unless all elements of the + // vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + + if i == $id::lanes() - 1 && j == i - 1 { + // All elements of the vector are `NaN`s, + // therefore the result is NaN as well. + // + // Note: the #lanes of the vector is > 1, so + // "i - 1" does not overflow. + assert!(v.min_element().is_nan(), + "[E]: all nans | v={:?} | min={} | \ +is_nan: {}", + v, v.min_element(), + v.min_element().is_nan()); + } else { + // There are non-`NaN` elements in the + // vector, therefore the result is `-3.`: + assert_eq!(v.min_element(), -3., + "[F]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } + } + } + + // If the vector contains all NaNs the result is NaN: + assert!($id::splat(n).min_element().is_nan(), + "all nans | v={:?} | min={} | is_nan: {}", + $id::splat(n), $id::splat(n).min_element(), + $id::splat(n).min_element().is_nan()); + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn max_element_test() { + let n = crate::$elem_ty::NAN; + + assert_eq!(n.max(-3.), -3.); + assert_eq!((-3. as $elem_ty).max(n), -3.); + + let v0 = $id::splat(-3.); + + let target_with_broken_last_lane_nan = !cfg!(any( + target_arch = "arm", target_arch = "aarch64", + target_arch = "powerpc64", target_arch = "wasm32", + )); + + // The vector is initialized to `-3.`s: [-3, -3, -3, -3] + for i in 0..$id::lanes() { + // We replace the i-th element of the vector with + // `NaN`: [-3, -3, -3, NaN] + let mut v = v0.replace(i, n); + + // If the NaN is in the last place, the LLVM + // implementation of these methods is broken on some + // targets: + if i == $id::lanes() - 1 && + target_with_broken_last_lane_nan { + assert_eq!(v.max_element(), -3., + "[A]: nan at {} => {} | {:?}", + i, v.max_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result + // is still always `-3.` unless all elements of + // the vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + if j == i-1 { + assert!(v.min_element().is_nan(), + "[B]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } else { + assert_eq!(v.max_element(), -3., + "[B]: nan at {} => {} | {:?}", + i, v.max_element(), v); + } + } + + // We are done here, since we were in the last + // lane which is the last iteration of the loop. + break + } + + // We are not in the last lane, and there is only + // one `NaN` in the vector. + + // If the vector has one lane, the result is `NaN`: + if $id::lanes() == 1 { + assert!(v.max_element().is_nan(), + "[C]: all nans | v={:?} | min={} | \ +is_nan: {}", + v, v.max_element(), + v.max_element().is_nan()); + + // And we are done, since the vector only has + // one lane anyways. + break; + } + + // The vector has more than one lane, since there is + // only one `NaN` in the vector, the result is + // always `-3`. + assert_eq!(v.max_element(), -3., + "[D]: nan at {} => {} | {:?}", + i, v.max_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result is + // still always `-3.` unless all elements of the + // vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + + if i == $id::lanes() - 1 && j == i - 1 { + // All elements of the vector are `NaN`s, + // therefore the result is NaN as well. + // + // Note: the #lanes of the vector is > 1, so + // "i - 1" does not overflow. + assert!(v.max_element().is_nan(), + "[E]: all nans | v={:?} | max={} | \ +is_nan: {}", + v, v.max_element(), + v.max_element().is_nan()); + } else { + // There are non-`NaN` elements in the + // vector, therefore the result is `-3.`: + assert_eq!(v.max_element(), -3., + "[F]: nan at {} => {} | {:?}", + i, v.max_element(), v); + } + } + } + + // If the vector contains all NaNs the result is NaN: + assert!($id::splat(n).max_element().is_nan(), + "all nans | v={:?} | max={} | is_nan: {}", + $id::splat(n), $id::splat(n).max_element(), + $id::splat(n).max_element().is_nan()); + } + } + } + } + }; +} -- cgit v1.2.3