diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 14:29:10 +0000 |
commit | 2aa4a82499d4becd2284cdb482213d541b8804dd (patch) | |
tree | b80bf8bf13c3766139fbacc530efd0dd9d54394c /third_party/rust/packed_simd/src/api | |
parent | Initial commit. (diff) | |
download | firefox-upstream.tar.xz firefox-upstream.zip |
Adding upstream version 86.0.1.upstream/86.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/packed_simd/src/api')
83 files changed, 8425 insertions, 0 deletions
diff --git a/third_party/rust/packed_simd/src/api/bit_manip.rs b/third_party/rust/packed_simd/src/api/bit_manip.rs new file mode 100644 index 0000000000..6d8865706d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/bit_manip.rs @@ -0,0 +1,129 @@ +//! Bit manipulations. + +macro_rules! impl_bit_manip { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns the number of ones in the binary representation of + /// the lanes of `self`. + #[inline] + pub fn count_ones(self) -> Self { + super::codegen::bit_manip::BitManip::ctpop(self) + } + + /// Returns the number of zeros in the binary representation of + /// the lanes of `self`. + #[inline] + pub fn count_zeros(self) -> Self { + super::codegen::bit_manip::BitManip::ctpop(!self) + } + + /// Returns the number of leading zeros in the binary + /// representation of the lanes of `self`. + #[inline] + pub fn leading_zeros(self) -> Self { + super::codegen::bit_manip::BitManip::ctlz(self) + } + + /// Returns the number of trailing zeros in the binary + /// representation of the lanes of `self`. + #[inline] + pub fn trailing_zeros(self) -> Self { + super::codegen::bit_manip::BitManip::cttz(self) + } + } + + test_if! { + $test_tt: + paste::item_with_macros! { + #[allow(overflowing_literals)] + pub mod [<$id _bit_manip>] { + #![allow(const_item_mutation)] + use super::*; + + const LANE_WIDTH: usize = mem::size_of::<$elem_ty>() * 8; + + macro_rules! test_func { + ($x:expr, $func:ident) => {{ + let mut actual = $x; + for i in 0..$id::lanes() { + actual = actual.replace( + i, + $x.extract(i).$func() as $elem_ty + ); + } + let expected = $x.$func(); + assert_eq!(actual, expected); + }}; + } + + const BYTES: [u8; 64] = [ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + ]; + + fn load_bytes() -> $id { + let elems: &mut [$elem_ty] = unsafe { + slice::from_raw_parts_mut( + BYTES.as_mut_ptr() as *mut $elem_ty, + $id::lanes(), + ) + }; + $id::from_slice_unaligned(elems) + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn count_ones() { + test_func!($id::splat(0), count_ones); + test_func!($id::splat(!0), count_ones); + test_func!(load_bytes(), count_ones); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn count_zeros() { + test_func!($id::splat(0), count_zeros); + test_func!($id::splat(!0), count_zeros); + test_func!(load_bytes(), count_zeros); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn leading_zeros() { + test_func!($id::splat(0), leading_zeros); + test_func!($id::splat(1), leading_zeros); + // some implementations use `pshufb` which has unique + // behavior when the 8th bit is set. + test_func!($id::splat(0b1000_0010), leading_zeros); + test_func!($id::splat(!0), leading_zeros); + test_func!( + $id::splat(1 << (LANE_WIDTH - 1)), + leading_zeros + ); + test_func!(load_bytes(), leading_zeros); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn trailing_zeros() { + test_func!($id::splat(0), trailing_zeros); + test_func!($id::splat(1), trailing_zeros); + test_func!($id::splat(0b1000_0010), trailing_zeros); + test_func!($id::splat(!0), trailing_zeros); + test_func!( + $id::splat(1 << (LANE_WIDTH - 1)), + trailing_zeros + ); + test_func!(load_bytes(), trailing_zeros); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/bitmask.rs b/third_party/rust/packed_simd/src/api/bitmask.rs new file mode 100644 index 0000000000..a06ff0fab1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/bitmask.rs @@ -0,0 +1,82 @@ +//! Bitmask API + +macro_rules! impl_bitmask { + ($id:ident | $ibitmask_ty:ident | ($set:expr, $clear:expr) + | $test_tt:tt) => { + impl $id { + /// Creates a bitmask with the MSB of each vector lane. + /// + /// If the vector has less than 8 lanes, the bits that do not + /// correspond to any vector lanes are cleared. + #[inline] + pub fn bitmask(self) -> $ibitmask_ty { + unsafe { codegen::llvm::simd_bitmask(self.0) } + } + } + + test_if! { + $test_tt: + paste::item! { + #[cfg(not(any( + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/210 + all(target_arch = "mips", target_endian = "big"), + all(target_arch = "mips64", target_endian = "big"), + target_arch = "sparc64", + target_arch = "s390x", + )))] + pub mod [<$id _bitmask>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn bitmask() { + // clear all lanes + let vec = $id::splat($clear as _); + let bitmask: $ibitmask_ty = 0; + assert_eq!(vec.bitmask(), bitmask); + + // set even lanes + let mut vec = $id::splat($clear as _); + for i in 0..$id::lanes() { + if i % 2 == 0 { + vec = vec.replace(i, $set as _); + } + } + // create bitmask with even lanes set: + let mut bitmask: $ibitmask_ty = 0; + for i in 0..$id::lanes() { + if i % 2 == 0 { + bitmask |= 1 << i; + } + } + assert_eq!(vec.bitmask(), bitmask); + + + // set odd lanes + let mut vec = $id::splat($clear as _); + for i in 0..$id::lanes() { + if i % 2 != 0 { + vec = vec.replace(i, $set as _); + } + } + // create bitmask with odd lanes set: + let mut bitmask: $ibitmask_ty = 0; + for i in 0..$id::lanes() { + if i % 2 != 0 { + bitmask |= 1 << i; + } + } + assert_eq!(vec.bitmask(), bitmask); + + // set all lanes + let vec = $id::splat($set as _); + let mut bitmask: $ibitmask_ty = 0; + for i in 0..$id::lanes() { + bitmask |= 1 << i; + } + assert_eq!(vec.bitmask(), bitmask); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cast.rs b/third_party/rust/packed_simd/src/api/cast.rs new file mode 100644 index 0000000000..f1c32ca1a3 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast.rs @@ -0,0 +1,108 @@ +//! Implementation of `FromCast` and `IntoCast`. +#![allow(clippy::module_name_repetitions)] + +/// Numeric cast from `T` to `Self`. +/// +/// > Note: This is a temporary workaround until the conversion traits +/// specified > in [RFC2484] are implemented. +/// +/// Numeric cast between vectors with the same number of lanes, such that: +/// +/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` +/// -> `u32xN`) is a **no-op**, +/// +/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> +/// `u8xN`) will **truncate**, +/// +/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> +/// `u32xN`) will: +/// * **zero-extend** if the source is unsigned, or +/// * **sign-extend** if the source is signed, +/// +/// * casting from a float to an integer will **round the float towards zero**, +/// +/// * casting from an integer to float will produce the floating point +/// representation of the integer, **rounding to nearest, ties to even**, +/// +/// * casting from an `f32` to an `f64` is perfect and lossless, +/// +/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. +/// +/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 +pub trait FromCast<T>: crate::marker::Sized { + /// Numeric cast from `T` to `Self`. + fn from_cast(_: T) -> Self; +} + +/// Numeric cast from `Self` to `T`. +/// +/// > Note: This is a temporary workaround until the conversion traits +/// specified > in [RFC2484] are implemented. +/// +/// Numeric cast between vectors with the same number of lanes, such that: +/// +/// * casting integer vectors whose lane types have the same size (e.g. `i32xN` +/// -> `u32xN`) is a **no-op**, +/// +/// * casting from a larger integer to a smaller integer (e.g. `u32xN` -> +/// `u8xN`) will **truncate**, +/// +/// * casting from a smaller integer to a larger integer (e.g. `u8xN` -> +/// `u32xN`) will: +/// * **zero-extend** if the source is unsigned, or +/// * **sign-extend** if the source is signed, +/// +/// * casting from a float to an integer will **round the float towards zero**, +/// +/// * casting from an integer to float will produce the floating point +/// representation of the integer, **rounding to nearest, ties to even**, +/// +/// * casting from an `f32` to an `f64` is perfect and lossless, +/// +/// * casting from an `f64` to an `f32` **rounds to nearest, ties to even**. +/// +/// [RFC2484]: https://github.com/rust-lang/rfcs/pull/2484 +pub trait Cast<T>: crate::marker::Sized { + /// Numeric cast from `self` to `T`. + fn cast(self) -> T; +} + +/// `FromCast` implies `Cast`. +impl<T, U> Cast<U> for T +where + U: FromCast<T>, +{ + #[inline] + fn cast(self) -> U { + U::from_cast(self) + } +} + +/// `FromCast` and `Cast` are reflexive +impl<T> FromCast<T> for T { + #[inline] + fn from_cast(t: Self) -> Self { + t + } +} + +#[macro_use] +mod macros; + +mod v16; +pub use self::v16::*; + +mod v32; +pub use self::v32::*; + +mod v64; +pub use self::v64::*; + +mod v128; +pub use self::v128::*; + +mod v256; +pub use self::v256::*; + +mod v512; +pub use self::v512::*; diff --git a/third_party/rust/packed_simd/src/api/cast/macros.rs b/third_party/rust/packed_simd/src/api/cast/macros.rs new file mode 100644 index 0000000000..3bb29f0b80 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/macros.rs @@ -0,0 +1,82 @@ +//! Macros implementing `FromCast` + +macro_rules! impl_from_cast_ { + ($id:ident[$test_tt:tt]: $from_ty:ident) => { + impl crate::api::cast::FromCast<$from_ty> for $id { + #[inline] + fn from_cast(x: $from_ty) -> Self { + use crate::llvm::simd_cast; + debug_assert_eq!($from_ty::lanes(), $id::lanes()); + Simd(unsafe { simd_cast(x.0) }) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _from_cast_ $from_ty>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test() { + assert_eq!($id::lanes(), $from_ty::lanes()); + } + } + } + } + }; +} + +macro_rules! impl_from_cast { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_cast_!($id[$test_tt]: $from_ty); + )* + } +} + +macro_rules! impl_from_cast_mask_ { + ($id:ident[$test_tt:tt]: $from_ty:ident) => { + impl crate::api::cast::FromCast<$from_ty> for $id { + #[inline] + fn from_cast(x: $from_ty) -> Self { + debug_assert_eq!($from_ty::lanes(), $id::lanes()); + x.ne($from_ty::default()) + .select($id::splat(true), $id::splat(false)) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _from_cast_ $from_ty>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test() { + assert_eq!($id::lanes(), $from_ty::lanes()); + + let x = $from_ty::default(); + let m: $id = x.cast(); + assert!(m.none()); + } + } + } + } + }; +} + +macro_rules! impl_from_cast_mask { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_cast_mask_!($id[$test_tt]: $from_ty); + )* + } +} + +#[allow(unused)] +macro_rules! impl_into_cast { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_cast_!($from_ty[$test_tt]: $id); + )* + } +} diff --git a/third_party/rust/packed_simd/src/api/cast/v128.rs b/third_party/rust/packed_simd/src/api/cast/v128.rs new file mode 100644 index 0000000000..ab47ddc006 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v128.rs @@ -0,0 +1,79 @@ +//! `FromCast` and `IntoCast` implementations for portable 128-bit wide vectors +#[rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x16[test_v128]: u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast!( + u8x16[test_v128]: i8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast_mask!( + m8x16[test_v128]: i8x16, u8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16, m32x16 +); + +impl_from_cast!( + i16x8[test_v128]: i8x8, u8x8, m8x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m16x8[test_v128]: i8x8, u8x8, m8x8, i16x8, u16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + f32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m32x4[test_v128]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + f64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m64x2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); + +impl_from_cast!( + isizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, usizex2, msizex2 +); +impl_from_cast!( + usizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, msizex2 +); +impl_from_cast_mask!( + msizex2[test_v128]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2 +); + +// FIXME[test_v128]: 64-bit single element vectors into_cast impls +impl_from_cast!(i128x1[test_v128]: u128x1, m128x1); +impl_from_cast!(u128x1[test_v128]: i128x1, m128x1); +impl_from_cast!(m128x1[test_v128]: i128x1, u128x1); diff --git a/third_party/rust/packed_simd/src/api/cast/v16.rs b/third_party/rust/packed_simd/src/api/cast/v16.rs new file mode 100644 index 0000000000..cf974bb08e --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v16.rs @@ -0,0 +1,17 @@ +//! `FromCast` and `IntoCast` implementations for portable 16-bit wide vectors +#[rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x2[test_v16]: u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u8x2[test_v16]: i8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m8x2[test_v16]: i8x2, u8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v256.rs b/third_party/rust/packed_simd/src/api/cast/v256.rs new file mode 100644 index 0000000000..9389dcb4c7 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v256.rs @@ -0,0 +1,81 @@ +//! `FromCast` and `IntoCast` implementations for portable 256-bit wide vectors +#[rustfmt::skip] + +use crate::*; + +impl_from_cast!(i8x32[test_v256]: u8x32, m8x32, i16x32, u16x32, m16x32); +impl_from_cast!(u8x32[test_v256]: i8x32, m8x32, i16x32, u16x32, m16x32); +impl_from_cast_mask!(m8x32[test_v256]: i8x32, u8x32, i16x32, u16x32, m16x32); + +impl_from_cast!( + i16x16[test_v256]: i8x16, u8x16, m8x16, u16x16, m16x16, + i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast!( + u16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, m16x16, + i32x16, u32x16, f32x16, m32x16 +); +impl_from_cast_mask!( + m16x16[test_v256]: i8x16, u8x16, m8x16, i16x16, u16x16, + i32x16, u32x16, f32x16, m32x16 +); + +impl_from_cast!( + i32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + f32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m32x8[test_v256]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + f64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m64x4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m128x2[test_v256]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, m64x2, f64x2, i128x2, u128x2, isizex2, usizex2, msizex2 +); + +impl_from_cast!( + isizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, usizex4, msizex4 +); +impl_from_cast!( + usizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, msizex4 +); +impl_from_cast_mask!( + msizex4[test_v256]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v32.rs b/third_party/rust/packed_simd/src/api/cast/v32.rs new file mode 100644 index 0000000000..2b254ba0cf --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v32.rs @@ -0,0 +1,30 @@ +//! `FromCast` and `IntoCast` implementations for portable 32-bit wide vectors +#[rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x4[test_v32]: u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u8x4[test_v32]: i8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m8x4[test_v32]: i8x4, u8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i16x2[test_v32]: i8x2, u8x2, m8x2, u16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, m16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m16x2[test_v32]: i8x2, u8x2, m8x2, i16x2, u16x2, i32x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v512.rs b/third_party/rust/packed_simd/src/api/cast/v512.rs new file mode 100644 index 0000000000..5a10ab0666 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v512.rs @@ -0,0 +1,68 @@ +//! `FromCast` and `IntoCast` implementations for portable 512-bit wide vectors +#[rustfmt::skip] + +use crate::*; + +impl_from_cast!(i8x64[test_v512]: u8x64, m8x64); +impl_from_cast!(u8x64[test_v512]: i8x64, m8x64); +impl_from_cast_mask!(m8x64[test_v512]: i8x64, u8x64); + +impl_from_cast!(i16x32[test_v512]: i8x32, u8x32, m8x32, u16x32, m16x32); +impl_from_cast!(u16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, m16x32); +impl_from_cast_mask!(m16x32[test_v512]: i8x32, u8x32, m8x32, i16x32, u16x32); + +impl_from_cast!( + i32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, u32x16, f32x16, m32x16 +); +impl_from_cast!( + u32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, f32x16, m32x16 +); +impl_from_cast!( + f32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, m32x16 +); +impl_from_cast_mask!( + m32x16[test_v512]: i8x16, u8x16, m8x16, i16x16, u16x16, m16x16, i32x16, u32x16, f32x16 +); + +impl_from_cast!( + i64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + f64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m64x8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m128x4[test_v512]: i8x4, u8x4, m8x4, i16x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, m64x4, f64x4, i128x4, u128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + isizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, usizex8, msizex8 +); +impl_from_cast!( + usizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, msizex8 +); +impl_from_cast_mask!( + msizex8[test_v512]: i8x8, u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8 +); diff --git a/third_party/rust/packed_simd/src/api/cast/v64.rs b/third_party/rust/packed_simd/src/api/cast/v64.rs new file mode 100644 index 0000000000..192a4638a3 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cast/v64.rs @@ -0,0 +1,47 @@ +//! `FromCast` and `IntoCast` implementations for portable 64-bit wide vectors +#[rustfmt::skip] + +use crate::*; + +impl_from_cast!( + i8x8[test_v64]: u8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast!( + u8x8[test_v64]: i8x8, m8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); +impl_from_cast_mask!( + m8x8[test_v64]: i8x8, u8x8, i16x8, u16x8, m16x8, i32x8, u32x8, f32x8, m32x8, + i64x8, u64x8, f64x8, m64x8, isizex8, usizex8, msizex8 +); + +impl_from_cast!( + i16x4[test_v64]: i8x4, u8x4, m8x4, u16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast!( + u16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, m16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); +impl_from_cast_mask!( + m16x4[test_v64]: i8x4, u8x4, m8x4, i16x4, u16x4, i32x4, u32x4, f32x4, m32x4, + i64x4, u64x4, f64x4, m64x4, i128x4, u128x4, m128x4, isizex4, usizex4, msizex4 +); + +impl_from_cast!( + i32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, u32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + u32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, f32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast!( + f32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, m32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); +impl_from_cast_mask!( + m32x2[test_v64]: i8x2, u8x2, m8x2, i16x2, u16x2, m16x2, i32x2, u32x2, f32x2, + i64x2, u64x2, f64x2, m64x2, i128x2, u128x2, m128x2, isizex2, usizex2, msizex2 +); diff --git a/third_party/rust/packed_simd/src/api/cmp.rs b/third_party/rust/packed_simd/src/api/cmp.rs new file mode 100644 index 0000000000..6d5301dddd --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp.rs @@ -0,0 +1,16 @@ +//! Implement cmp traits for vector types + +#[macro_use] +mod partial_eq; + +#[macro_use] +mod eq; + +#[macro_use] +mod partial_ord; + +#[macro_use] +mod ord; + +#[macro_use] +mod vertical; diff --git a/third_party/rust/packed_simd/src/api/cmp/eq.rs b/third_party/rust/packed_simd/src/api/cmp/eq.rs new file mode 100644 index 0000000000..3c55d0dce5 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/eq.rs @@ -0,0 +1,27 @@ +//! Implements `Eq` for vector types. + +macro_rules! impl_cmp_eq { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::cmp::Eq for $id {} + impl crate::cmp::Eq for LexicographicallyOrdered<$id> {} + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_eq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn eq() { + fn foo<E: crate::cmp::Eq>(_: E) {} + let a = $id::splat($false); + foo(a); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/ord.rs b/third_party/rust/packed_simd/src/api/cmp/ord.rs new file mode 100644 index 0000000000..e54ba3bfde --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/ord.rs @@ -0,0 +1,43 @@ +//! Implements `Ord` for vector types. + +macro_rules! impl_cmp_ord { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl $id { + /// Returns a wrapper that implements `Ord`. + #[inline] + pub fn lex_ord(&self) -> LexicographicallyOrdered<$id> { + LexicographicallyOrdered(*self) + } + } + + impl crate::cmp::Ord for LexicographicallyOrdered<$id> { + #[inline] + fn cmp(&self, other: &Self) -> crate::cmp::Ordering { + match self.partial_cmp(other) { + Some(x) => x, + None => unsafe { crate::hint::unreachable_unchecked() }, + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_ord>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn eq() { + fn foo<E: crate::cmp::Ord>(_: E) {} + let a = $id::splat($false); + foo(a.partial_lex_ord()); + foo(a.lex_ord()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs new file mode 100644 index 0000000000..1712a0de56 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/partial_eq.rs @@ -0,0 +1,67 @@ +//! Implements `PartialEq` for vector types. + +macro_rules! impl_cmp_partial_eq { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 + #[allow(clippy::partialeq_ne_impl)] + impl crate::cmp::PartialEq<$id> for $id { + #[inline] + fn eq(&self, other: &Self) -> bool { + $id::eq(*self, *other).all() + } + #[inline] + fn ne(&self, other: &Self) -> bool { + $id::ne(*self, *other).any() + } + } + + // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 + #[allow(clippy::partialeq_ne_impl)] + impl crate::cmp::PartialEq<LexicographicallyOrdered<$id>> + for LexicographicallyOrdered<$id> + { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + #[inline] + fn ne(&self, other: &Self) -> bool { + self.0 != other.0 + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialEq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_eq() { + let a = $id::splat($false); + let b = $id::splat($true); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + + if $id::lanes() > 1 { + let a = $id::splat($false).replace(0, $true); + let b = $id::splat($true); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs new file mode 100644 index 0000000000..a2292918ba --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/partial_ord.rs @@ -0,0 +1,234 @@ +//! Implements `PartialOrd` for vector types. +//! +//! This implements a lexicographical order. + +macro_rules! impl_cmp_partial_ord { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns a wrapper that implements `PartialOrd`. + #[inline] + pub fn partial_lex_ord(&self) -> LexicographicallyOrdered<$id> { + LexicographicallyOrdered(*self) + } + } + + impl crate::cmp::PartialOrd<LexicographicallyOrdered<$id>> + for LexicographicallyOrdered<$id> + { + #[inline] + fn partial_cmp( + &self, other: &Self, + ) -> Option<crate::cmp::Ordering> { + if PartialEq::eq(self, other) { + Some(crate::cmp::Ordering::Equal) + } else if PartialOrd::lt(self, other) { + Some(crate::cmp::Ordering::Less) + } else if PartialOrd::gt(self, other) { + Some(crate::cmp::Ordering::Greater) + } else { + None + } + } + #[inline] + fn lt(&self, other: &Self) -> bool { + let m_lt = self.0.lt(other.0); + let m_eq = self.0.eq(other.0); + for i in 0..$id::lanes() { + if m_eq.extract(i) { + continue; + } + return m_lt.extract(i); + } + false + } + #[inline] + fn le(&self, other: &Self) -> bool { + self.lt(other) | PartialEq::eq(self, other) + } + #[inline] + fn ge(&self, other: &Self) -> bool { + self.gt(other) | PartialEq::eq(self, other) + } + #[inline] + fn gt(&self, other: &Self) -> bool { + let m_gt = self.0.gt(other.0); + let m_eq = self.0.eq(other.0); + for i in 0..$id::lanes() { + if m_eq.extract(i) { + continue; + } + return m_gt.extract(i); + } + false + } + } + }; +} + +macro_rules! test_cmp_partial_ord_int { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialOrd>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_lex_ord() { + use crate::testing::utils::{test_cmp}; + // constant values + let a = $id::splat(0); + let b = $id::splat(1); + + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + // variable values: a = [0, 1, 2, 3]; b = [3, 2, 1, 0] + let mut a = $id::splat(0); + let mut b = $id::splat(0); + for i in 0..$id::lanes() { + a = a.replace(i, i as $elem_ty); + b = b.replace(i, ($id::lanes() - i) as $elem_ty); + } + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + // variable values: a = [0, 1, 2, 3]; b = [0, 1, 2, 4] + let mut b = a; + b = b.replace( + $id::lanes() - 1, + a.extract($id::lanes() - 1) + 1 as $elem_ty + ); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + if $id::lanes() > 2 { + // variable values a = [0, 1, 0, 0]; b = [0, 1, 2, 3] + let b = a; + let mut a = $id::splat(0); + a = a.replace(1, 1 as $elem_ty); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + + // variable values: a = [0, 1, 2, 3]; b = [0, 1, 3, 2] + let mut b = a; + b = b.replace( + 2, a.extract($id::lanes() - 1) + 1 as $elem_ty + ); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(crate::cmp::Ordering::Equal)); + } + } + } + } + } + }; +} + +macro_rules! test_cmp_partial_ord_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialOrd>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_lex_ord() { + use crate::testing::utils::{test_cmp}; + use crate::cmp::Ordering; + + // constant values + let a = $id::splat(false); + let b = $id::splat(true); + + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + + // variable values: + // a = [false, false, false, false]; + // b = [false, false, false, true] + let a = $id::splat(false); + let mut b = $id::splat(false); + b = b.replace($id::lanes() - 1, true); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + + // variable values: + // a = [true, true, true, false]; + // b = [true, true, true, true] + let mut a = $id::splat(true); + let b = $id::splat(true); + a = a.replace($id::lanes() - 1, false); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + + if $id::lanes() > 2 { + // variable values + // a = [false, true, false, false]; + // b = [false, true, true, true] + let mut a = $id::splat(false); + let mut b = $id::splat(true); + a = a.replace(1, true); + b = b.replace(0, false); + test_cmp(a.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Less)); + test_cmp(b.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Greater)); + test_cmp(a.partial_lex_ord(), a.partial_lex_ord(), + Some(Ordering::Equal)); + test_cmp(b.partial_lex_ord(), b.partial_lex_ord(), + Some(Ordering::Equal)); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/cmp/vertical.rs b/third_party/rust/packed_simd/src/api/cmp/vertical.rs new file mode 100644 index 0000000000..ea4a0d1a34 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/cmp/vertical.rs @@ -0,0 +1,114 @@ +//! Vertical (lane-wise) vector comparisons returning vector masks. + +macro_rules! impl_cmp_vertical { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident, + $mask_ty:ident, + $is_mask:expr,($true:expr, $false:expr) | $test_tt:tt + ) => { + impl $id { + /// Lane-wise equality comparison. + #[inline] + pub fn eq(self, other: Self) -> $mask_ty { + use crate::llvm::simd_eq; + Simd(unsafe { simd_eq(self.0, other.0) }) + } + + /// Lane-wise inequality comparison. + #[inline] + pub fn ne(self, other: Self) -> $mask_ty { + use crate::llvm::simd_ne; + Simd(unsafe { simd_ne(self.0, other.0) }) + } + + /// Lane-wise less-than comparison. + #[inline] + pub fn lt(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_gt, simd_lt}; + if $is_mask { + Simd(unsafe { simd_gt(self.0, other.0) }) + } else { + Simd(unsafe { simd_lt(self.0, other.0) }) + } + } + + /// Lane-wise less-than-or-equals comparison. + #[inline] + pub fn le(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_ge, simd_le}; + if $is_mask { + Simd(unsafe { simd_ge(self.0, other.0) }) + } else { + Simd(unsafe { simd_le(self.0, other.0) }) + } + } + + /// Lane-wise greater-than comparison. + #[inline] + pub fn gt(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_gt, simd_lt}; + if $is_mask { + Simd(unsafe { simd_lt(self.0, other.0) }) + } else { + Simd(unsafe { simd_gt(self.0, other.0) }) + } + } + + /// Lane-wise greater-than-or-equals comparison. + #[inline] + pub fn ge(self, other: Self) -> $mask_ty { + use crate::llvm::{simd_ge, simd_le}; + if $is_mask { + Simd(unsafe { simd_le(self.0, other.0) }) + } else { + Simd(unsafe { simd_ge(self.0, other.0) }) + } + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_vertical>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn cmp() { + let a = $id::splat($false); + let b = $id::splat($true); + + let r = a.lt(b); + let e = $mask_ty::splat(true); + assert!(r == e); + let r = a.le(b); + assert!(r == e); + + let e = $mask_ty::splat(false); + let r = a.gt(b); + assert!(r == e); + let r = a.ge(b); + assert!(r == e); + let r = a.eq(b); + assert!(r == e); + + let mut a = a; + let mut b = b; + let mut e = e; + for i in 0..$id::lanes() { + if i % 2 == 0 { + a = a.replace(i, $false); + b = b.replace(i, $true); + e = e.replace(i, true); + } else { + a = a.replace(i, $true); + b = b.replace(i, $false); + e = e.replace(i, false); + } + } + let r = a.lt(b); + assert!(r == e); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/default.rs b/third_party/rust/packed_simd/src/api/default.rs new file mode 100644 index 0000000000..7af55ea77a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/default.rs @@ -0,0 +1,30 @@ +//! Implements `Default` for vector types. + +macro_rules! impl_default { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl Default for $id { + #[inline] + fn default() -> Self { + Self::splat($elem_ty::default()) + } + } + + test_if!{ + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _default>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn default() { + let a = $id::default(); + for i in 0..$id::lanes() { + assert_eq!(a.extract(i), $elem_ty::default()); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt.rs b/third_party/rust/packed_simd/src/api/fmt.rs new file mode 100644 index 0000000000..f3f55c4015 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt.rs @@ -0,0 +1,12 @@ +//! Implements formatting APIs + +#[macro_use] +mod debug; +#[macro_use] +mod lower_hex; +#[macro_use] +mod upper_hex; +#[macro_use] +mod octal; +#[macro_use] +mod binary; diff --git a/third_party/rust/packed_simd/src/api/fmt/binary.rs b/third_party/rust/packed_simd/src/api/fmt/binary.rs new file mode 100644 index 0000000000..b60769082d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/binary.rs @@ -0,0 +1,56 @@ +//! Implement Octal formatting + +macro_rules! impl_fmt_binary { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::Binary for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_binary>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn binary() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#b}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "") + .replace(")", "").split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#b}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/debug.rs b/third_party/rust/packed_simd/src/api/fmt/debug.rs new file mode 100644 index 0000000000..ad0b8a59a1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/debug.rs @@ -0,0 +1,62 @@ +//! Implement debug formatting + +macro_rules! impl_fmt_debug_tests { + ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_debug>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn debug() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::default(); + let mut s = TinyString::new(); + write!(&mut s, "{:?}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "") + .replace(")", "").split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:?}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} + +macro_rules! impl_fmt_debug { + ([$elem_ty:ty; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::Debug for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + impl_fmt_debug_tests!([$elem_ty; $elem_count]: $id | $test_tt); + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs new file mode 100644 index 0000000000..5a7aa14b5b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/lower_hex.rs @@ -0,0 +1,56 @@ +//! Implement `LowerHex` formatting + +macro_rules! impl_fmt_lower_hex { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::LowerHex for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_lower_hex>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn lower_hex() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#x}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "").replace(")", "") + .split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#x}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/octal.rs b/third_party/rust/packed_simd/src/api/fmt/octal.rs new file mode 100644 index 0000000000..83ac8abc7d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/octal.rs @@ -0,0 +1,56 @@ +//! Implement Octal formatting + +macro_rules! impl_fmt_octal { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::Octal for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_octal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn octal_hex() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#o}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "").replace(")", "") + .split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#o}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs new file mode 100644 index 0000000000..aa88f673ab --- /dev/null +++ b/third_party/rust/packed_simd/src/api/fmt/upper_hex.rs @@ -0,0 +1,56 @@ +//! Implement `UpperHex` formatting + +macro_rules! impl_fmt_upper_hex { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::fmt::UpperHex for $id { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt( + &self, f: &mut crate::fmt::Formatter<'_>, + ) -> crate::fmt::Result { + write!(f, "{}(", stringify!($id))?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _fmt_upper_hex>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn upper_hex() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::splat($elem_ty::default()); + let mut s = TinyString::new(); + write!(&mut s, "{:#X}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}(", stringify!($id)).unwrap(); + assert!(s.starts_with(beg.as_str())); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "").replace(")", "") + .split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:#X}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/from.rs b/third_party/rust/packed_simd/src/api/from.rs new file mode 100644 index 0000000000..c30c4d6e21 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/from.rs @@ -0,0 +1,7 @@ +//! Implementations of the `From` and `Into` traits + +#[macro_use] +mod from_array; + +#[macro_use] +mod from_vector; diff --git a/third_party/rust/packed_simd/src/api/from/from_array.rs b/third_party/rust/packed_simd/src/api/from/from_array.rs new file mode 100644 index 0000000000..b83f938162 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/from/from_array.rs @@ -0,0 +1,123 @@ +//! Implements `From<[T; N]>` and `Into<[T; N]>` for vector types. + +macro_rules! impl_from_array { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt + | ($non_default_array:expr, $non_default_vec:expr)) => { + impl From<[$elem_ty; $elem_count]> for $id { + #[inline] + fn from(array: [$elem_ty; $elem_count]) -> Self { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { array }.vec } + } + } + + impl From<$id> for [$elem_ty; $elem_count] { + #[inline] + fn from(vec: $id) -> Self { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { vec }.array } + } + } + + // FIXME: `Into::into` is not inline, but due to + // the blanket impl in `std`, which is not + // marked `default`, we cannot override it here with + // specialization. + /* + impl Into<[$elem_ty; $elem_count]> for $id { + #[inline] + fn into(self) -> [$elem_ty; $elem_count] { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { vec: self }.array } + } + } + + impl Into<$id> for [$elem_ty; $elem_count] { + #[inline] + fn into(self) -> $id { + union U { + array: [$elem_ty; $elem_count], + vec: $id, + } + unsafe { U { array: self }.vec } + } + } + */ + + test_if! { + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + mod [<$id _from>] { + use super::*; + #[test] + fn array() { + let vec: $id = Default::default(); + + // FIXME: Workaround for arrays with more than 32 + // elements. + // + // Safe because we never take a reference to any + // uninitialized element. + union W { + array: [$elem_ty; $elem_count], + other: () + } + let mut array = W { other: () }; + for i in 0..$elem_count { + let default: $elem_ty = Default::default(); + // note: array.other is the active member and + // initialized so we can take a reference to it: + let p = unsafe { + &mut array.other as *mut () as *mut $elem_ty + }; + // note: default is a valid bit-pattern for + // $elem_ty: + unsafe { + crate::ptr::write(p.wrapping_add(i), default) + }; + } + // note: the array variant of the union is properly + // initialized: + let mut array = unsafe { + array.array + }; + + array[0] = $non_default_array; + let vec = vec.replace(0, $non_default_vec); + + let vec_from_array = $id::from(array); + assert_eq!(vec_from_array, vec); + let array_from_vec + = <[$elem_ty; $elem_count]>::from(vec); + // FIXME: Workaround for arrays with more than 32 + // elements. + for i in 0..$elem_count { + assert_eq!(array_from_vec[i], array[i]); + } + + let vec_from_into_array: $id = array.into(); + assert_eq!(vec_from_into_array, vec); + let array_from_into_vec: [$elem_ty; $elem_count] + = vec.into(); + // FIXME: Workaround for arrays with more than 32 + // elements. + for i in 0..$elem_count { + assert_eq!(array_from_into_vec[i], array[i]); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/from/from_vector.rs b/third_party/rust/packed_simd/src/api/from/from_vector.rs new file mode 100644 index 0000000000..55f70016d5 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/from/from_vector.rs @@ -0,0 +1,67 @@ +//! Implements `From` and `Into` for vector types. + +macro_rules! impl_from_vector { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt + | $source:ident) => { + impl From<$source> for $id { + #[inline] + fn from(source: $source) -> Self { + fn static_assert_same_number_of_lanes<T, U>() + where + T: crate::sealed::Simd, + U: crate::sealed::Simd<LanesType = T::LanesType>, + { + } + use crate::llvm::simd_cast; + static_assert_same_number_of_lanes::<$id, $source>(); + Simd(unsafe { simd_cast(source.0) }) + } + } + + // FIXME: `Into::into` is not inline, but due to the blanket impl in + // `std`, which is not marked `default`, we cannot override it here + // with specialization. + + /* + impl Into<$id> for $source { + #[inline] + fn into(self) -> $id { + unsafe { simd_cast(self) } + } + } + */ + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _from_ $source>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from() { + assert_eq!($id::lanes(), $source::lanes()); + let source: $source = Default::default(); + let vec: $id = Default::default(); + + let e = $id::from(source); + assert_eq!(e, vec); + + let e: $id = source.into(); + assert_eq!(e, vec); + } + } + } + } + }; +} + +macro_rules! impl_from_vectors { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt + | $($source:ident),*) => { + $( + impl_from_vector!( + [$elem_ty; $elem_count]: $id | $test_tt | $source + ); + )* + } +} diff --git a/third_party/rust/packed_simd/src/api/hash.rs b/third_party/rust/packed_simd/src/api/hash.rs new file mode 100644 index 0000000000..ee80eff939 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/hash.rs @@ -0,0 +1,49 @@ +//! Implements `Hash` for vector types. + +macro_rules! impl_hash { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::hash::Hash for $id { + #[inline] + fn hash<H: crate::hash::Hasher>(&self, state: &mut H) { + unsafe { + union A { + data: [$elem_ty; $id::lanes()], + vec: $id, + } + A { vec: *self }.data.hash(state) + } + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _hash>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn hash() { + use crate::hash::{Hash, Hasher}; + #[allow(deprecated)] + use crate::hash::{SipHasher13}; + type A = [$elem_ty; $id::lanes()]; + let a: A = [42 as $elem_ty; $id::lanes()]; + assert_eq!( + crate::mem::size_of::<A>(), + crate::mem::size_of::<$id>() + ); + #[allow(deprecated)] + let mut a_hash = SipHasher13::new(); + let mut v_hash = a_hash.clone(); + a.hash(&mut a_hash); + + // Integer within mantissa^1 range. + #[allow(clippy::float_cmp)] + let v = $id::splat(42 as $elem_ty); + v.hash(&mut v_hash); + assert_eq!(a_hash.finish(), v_hash.finish()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/into_bits.rs b/third_party/rust/packed_simd/src/api/into_bits.rs new file mode 100644 index 0000000000..f2cc1bae53 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits.rs @@ -0,0 +1,59 @@ +//! Implementation of `FromBits` and `IntoBits`. + +/// Safe lossless bitwise conversion from `T` to `Self`. +pub trait FromBits<T>: crate::marker::Sized { + /// Safe lossless bitwise transmute from `T` to `Self`. + fn from_bits(t: T) -> Self; +} + +/// Safe lossless bitwise conversion from `Self` to `T`. +pub trait IntoBits<T>: crate::marker::Sized { + /// Safe lossless bitwise transmute from `self` to `T`. + fn into_bits(self) -> T; +} + +/// `FromBits` implies `IntoBits`. +impl<T, U> IntoBits<U> for T +where + U: FromBits<T>, +{ + #[inline] + fn into_bits(self) -> U { + debug_assert!( + crate::mem::size_of::<Self>() == crate::mem::size_of::<U>() + ); + U::from_bits(self) + } +} + +/// `FromBits` and `IntoBits` are reflexive +impl<T> FromBits<T> for T { + #[inline] + fn from_bits(t: Self) -> Self { + t + } +} + +#[macro_use] +mod macros; + +mod v16; +pub use self::v16::*; + +mod v32; +pub use self::v32::*; + +mod v64; +pub use self::v64::*; + +mod v128; +pub use self::v128::*; + +mod v256; +pub use self::v256::*; + +mod v512; +pub use self::v512::*; + +mod arch_specific; +pub use self::arch_specific::*; diff --git a/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs new file mode 100644 index 0000000000..fee6140052 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/arch_specific.rs @@ -0,0 +1,189 @@ +//! `FromBits` and `IntoBits` between portable vector types and the +//! architecture-specific vector types. +#[rustfmt::skip] + +// FIXME: MIPS FromBits/IntoBits + +#[allow(unused)] +use crate::*; + +/// This macro implements FromBits for the portable and the architecture +/// specific vector types. +/// +/// The "leaf" case is at the bottom, and the most generic case is at the top. +/// The generic case is split into smaller cases recursively. +macro_rules! impl_arch { + ([$arch_head_i:ident[$arch_head_tt:tt]: $($arch_head_ty:ident),*], + $([$arch_tail_i:ident[$arch_tail_tt:tt]: $($arch_tail_ty:ident),*]),* | + from: $($from_ty:ident),* | into: $($into_ty:ident),* | + test: $test_tt:tt) => { + impl_arch!( + [$arch_head_i[$arch_head_tt]: $($arch_head_ty),*] | + from: $($from_ty),* | + into: $($into_ty),* | + test: $test_tt + ); + impl_arch!( + $([$arch_tail_i[$arch_tail_tt]: $($arch_tail_ty),*]),* | + from: $($from_ty),* | + into: $($into_ty),* | + test: $test_tt + ); + }; + ([$arch:ident[$arch_tt:tt]: $($arch_ty:ident),*] | + from: $($from_ty:ident),* | into: $($into_ty:ident),* | + test: $test_tt:tt) => { + // note: if target is "arm", "+v7,+neon" must be enabled + // and the std library must be recompiled with them + #[cfg(any( + not(target_arch = "arm"), + all(target_feature = "v7", target_feature = "neon", + any(feature = "core_arch", libcore_neon))) + )] + // note: if target is "powerpc", "altivec" must be enabled + // and the std library must be recompiled with it + #[cfg(any( + not(target_arch = "powerpc"), + all(target_feature = "altivec", feature = "core_arch"), + ))] + #[cfg(target_arch = $arch_tt)] + use crate::arch::$arch::{ + $($arch_ty),* + }; + + #[cfg(any( + not(target_arch = "arm"), + all(target_feature = "v7", target_feature = "neon", + any(feature = "core_arch", libcore_neon))) + )] + #[cfg(any( + not(target_arch = "powerpc"), + all(target_feature = "altivec", feature = "core_arch"), + ))] + #[cfg(target_arch = $arch_tt)] + impl_arch!($($arch_ty),* | $($from_ty),* | $($into_ty),* | + test: $test_tt); + }; + ($arch_head:ident, $($arch_tail:ident),* | $($from_ty:ident),* + | $($into_ty:ident),* | test: $test_tt:tt) => { + impl_arch!($arch_head | $($from_ty),* | $($into_ty),* | + test: $test_tt); + impl_arch!($($arch_tail),* | $($from_ty),* | $($into_ty),* | + test: $test_tt); + }; + ($arch_head:ident | $($from_ty:ident),* | $($into_ty:ident),* | + test: $test_tt:tt) => { + impl_from_bits!($arch_head[$test_tt]: $($from_ty),*); + impl_into_bits!($arch_head[$test_tt]: $($into_ty),*); + }; +} + +//////////////////////////////////////////////////////////////////////////////// +// Implementations for the 64-bit wide vector types: + +// FIXME: 64-bit single element types +// FIXME: arm/aarch float16x4_t missing +impl_arch!( + [arm["arm"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, + poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, + uint64x1_t], + [aarch64["aarch64"]: int8x8_t, uint8x8_t, poly8x8_t, int16x4_t, uint16x4_t, + poly16x4_t, int32x2_t, uint32x2_t, float32x2_t, int64x1_t, uint64x1_t, + float64x1_t] | + from: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2 | + into: i8x8, u8x8, i16x4, u16x4, i32x2, u32x2, f32x2 | + test: test_v64 +); + +//////////////////////////////////////////////////////////////////////////////// +// Implementations for the 128-bit wide vector types: + +// FIXME: arm/aarch float16x8_t missing +// FIXME: ppc vector_pixel missing +// FIXME: ppc64 vector_Float16 missing +// FIXME: ppc64 vector_signed_long_long missing +// FIXME: ppc64 vector_unsigned_long_long missing +// FIXME: ppc64 vector_bool_long_long missing +// FIXME: ppc64 vector_signed___int128 missing +// FIXME: ppc64 vector_unsigned___int128 missing +impl_arch!( + [x86["x86"]: __m128, __m128i, __m128d], + [x86_64["x86_64"]: __m128, __m128i, __m128d], + [arm["arm"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, uint16x8_t, + poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, uint64x2_t], + [aarch64["aarch64"]: int8x16_t, uint8x16_t, poly8x16_t, int16x8_t, + uint16x8_t, poly16x8_t, int32x4_t, uint32x4_t, float32x4_t, int64x2_t, + uint64x2_t, float64x2_t], + [powerpc["powerpc"]: vector_signed_char, vector_unsigned_char, + vector_signed_short, vector_unsigned_short, vector_signed_int, + vector_unsigned_int, vector_float], + [powerpc64["powerpc64"]: vector_signed_char, vector_unsigned_char, + vector_signed_short, vector_unsigned_short, vector_signed_int, + vector_unsigned_int, vector_float, vector_signed_long, + vector_unsigned_long, vector_double] | + from: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, + i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, i64x2, u64x2, f64x2, + i128x1, u128x1 | + test: test_v128 +); + +impl_arch!( + [powerpc["powerpc"]: vector_bool_char], + [powerpc64["powerpc64"]: vector_bool_char] | + from: m8x16, m16x8, m32x4, m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16 | + test: test_v128 +); + +impl_arch!( + [powerpc["powerpc"]: vector_bool_short], + [powerpc64["powerpc64"]: vector_bool_short] | + from: m16x8, m32x4, m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16, m16x8 | + test: test_v128 +); + +impl_arch!( + [powerpc["powerpc"]: vector_bool_int], + [powerpc64["powerpc64"]: vector_bool_int] | + from: m32x4, m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16, m16x8, m32x4 | + test: test_v128 +); + +impl_arch!( + [powerpc64["powerpc64"]: vector_bool_long] | + from: m64x2, m128x1 | + into: i8x16, u8x16, i16x8, u16x8, i32x4, u32x4, f32x4, + i64x2, u64x2, f64x2, i128x1, u128x1, + // Masks: + m8x16, m16x8, m32x4, m64x2 | + test: test_v128 +); + +//////////////////////////////////////////////////////////////////////////////// +// Implementations for the 256-bit wide vector types + +impl_arch!( + [x86["x86"]: __m256, __m256i, __m256d], + [x86_64["x86_64"]: __m256, __m256i, __m256d] | + from: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, + i32x8, u32x8, f32x8, m32x8, + i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2 | + into: i8x32, u8x32, i16x16, u16x16, i32x8, u32x8, f32x8, + i64x4, u64x4, f64x4, i128x2, u128x2 | + test: test_v256 +); + +//////////////////////////////////////////////////////////////////////////////// +// FIXME: Implementations for the 512-bit wide vector types diff --git a/third_party/rust/packed_simd/src/api/into_bits/macros.rs b/third_party/rust/packed_simd/src/api/into_bits/macros.rs new file mode 100644 index 0000000000..8cec5b0047 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/macros.rs @@ -0,0 +1,74 @@ +//! Macros implementing `FromBits` + +macro_rules! impl_from_bits_ { + ($id:ident[$test_tt:tt]: $from_ty:ident) => { + impl crate::api::into_bits::FromBits<$from_ty> for $id { + #[inline] + fn from_bits(x: $from_ty) -> Self { + unsafe { crate::mem::transmute(x) } + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _from_bits_ $from_ty>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn test() { + use crate::{ + ptr::{read_unaligned}, + mem::{size_of, zeroed} + }; + use crate::IntoBits; + assert_eq!(size_of::<$id>(), + size_of::<$from_ty>()); + // This is safe becasue we never create a reference to + // uninitialized memory: + let a: $from_ty = unsafe { zeroed() }; + + let b_0: $id = crate::FromBits::from_bits(a); + let b_1: $id = a.into_bits(); + + // Check that these are byte-wise equal, that is, + // that the bit patterns are identical: + for i in 0..size_of::<$id>() { + // This is safe because we only read initialized + // memory in bounds. Also, taking a reference to + // `b_i` is ok because the fields are initialized. + unsafe { + let b_0_v: u8 = read_unaligned( + (&b_0 as *const $id as *const u8) + .wrapping_add(i) + ); + let b_1_v: u8 = read_unaligned( + (&b_1 as *const $id as *const u8) + .wrapping_add(i) + ); + assert_eq!(b_0_v, b_1_v); + } + } + } + } + } + } + }; +} + +macro_rules! impl_from_bits { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_bits_!($id[$test_tt]: $from_ty); + )* + } +} + +#[allow(unused)] +macro_rules! impl_into_bits { + ($id:ident[$test_tt:tt]: $($from_ty:ident),*) => { + $( + impl_from_bits_!($from_ty[$test_tt]: $id); + )* + } +} diff --git a/third_party/rust/packed_simd/src/api/into_bits/v128.rs b/third_party/rust/packed_simd/src/api/into_bits/v128.rs new file mode 100644 index 0000000000..e32cd7f9f0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v128.rs @@ -0,0 +1,28 @@ +//! `FromBits` and `IntoBits` implementations for portable 128-bit wide vectors +#[rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x16[test_v128]: u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u8x16[test_v128]: i8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m8x16[test_v128]: m16x8, m32x4, m64x2, m128x1); + +impl_from_bits!(i16x8[test_v128]: i8x16, u8x16, m8x16, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u16x8[test_v128]: i8x16, u8x16, m8x16, i16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m16x8[test_v128]: m32x4, m64x2, m128x1); + +impl_from_bits!(i32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(f32x4[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m32x4[test_v128]: m64x2, m128x1); + +impl_from_bits!(i64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, u64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(u64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, f64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(f64x2[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, m64x2, i128x1, u128x1, m128x1); +impl_from_bits!(m64x2[test_v128]: m128x1); + +impl_from_bits!(i128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, u128x1, m128x1); +impl_from_bits!(u128x1[test_v128]: i8x16, u8x16, m8x16, i16x8, u16x8, m16x8, i32x4, u32x4, f32x4, m32x4, i64x2, u64x2, f64x2, m64x2, i128x1, m128x1); +// note: m128x1 cannot be constructed from all the other masks bit patterns in here + diff --git a/third_party/rust/packed_simd/src/api/into_bits/v16.rs b/third_party/rust/packed_simd/src/api/into_bits/v16.rs new file mode 100644 index 0000000000..e44d0e7f9a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v16.rs @@ -0,0 +1,9 @@ +//! `FromBits` and `IntoBits` implementations for portable 16-bit wide vectors +#[rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x2[test_v16]: u8x2, m8x2); +impl_from_bits!(u8x2[test_v16]: i8x2, m8x2); +// note: m8x2 cannot be constructed from all i8x2 or u8x2 bit patterns diff --git a/third_party/rust/packed_simd/src/api/into_bits/v256.rs b/third_party/rust/packed_simd/src/api/into_bits/v256.rs new file mode 100644 index 0000000000..c4c373e0d0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v256.rs @@ -0,0 +1,27 @@ +//! `FromBits` and `IntoBits` implementations for portable 256-bit wide vectors +#[rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x32[test_v256]: u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u8x32[test_v256]: i8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m8x32[test_v256]: m16x16, m32x8, m64x4, m128x2); + +impl_from_bits!(i16x16[test_v256]: i8x32, u8x32, m8x32, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u16x16[test_v256]: i8x32, u8x32, m8x32, i16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m16x16[test_v256]: m32x8, m64x4, m128x2); + +impl_from_bits!(i32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(f32x8[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m32x8[test_v256]: m64x4, m128x2); + +impl_from_bits!(i64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, u64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(u64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, f64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(f64x4[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, m64x4, i128x2, u128x2, m128x2); +impl_from_bits!(m64x4[test_v256]: m128x2); + +impl_from_bits!(i128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, u128x2, m128x2); +impl_from_bits!(u128x2[test_v256]: i8x32, u8x32, m8x32, i16x16, u16x16, m16x16, i32x8, u32x8, f32x8, m32x8, i64x4, u64x4, f64x4, m64x4, i128x2, m128x2); +// note: m128x2 cannot be constructed from all the other masks bit patterns in here diff --git a/third_party/rust/packed_simd/src/api/into_bits/v32.rs b/third_party/rust/packed_simd/src/api/into_bits/v32.rs new file mode 100644 index 0000000000..5dba38a179 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v32.rs @@ -0,0 +1,13 @@ +//! `FromBits` and `IntoBits` implementations for portable 32-bit wide vectors +#[rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x4[test_v32]: u8x4, m8x4, i16x2, u16x2, m16x2); +impl_from_bits!(u8x4[test_v32]: i8x4, m8x4, i16x2, u16x2, m16x2); +impl_from_bits!(m8x4[test_v32]: m16x2); + +impl_from_bits!(i16x2[test_v32]: i8x4, u8x4, m8x4, u16x2, m16x2); +impl_from_bits!(u16x2[test_v32]: i8x4, u8x4, m8x4, i16x2, m16x2); +// note: m16x2 cannot be constructed from all m8x4 bit patterns diff --git a/third_party/rust/packed_simd/src/api/into_bits/v512.rs b/third_party/rust/packed_simd/src/api/into_bits/v512.rs new file mode 100644 index 0000000000..4a771962c3 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v512.rs @@ -0,0 +1,27 @@ +//! `FromBits` and `IntoBits` implementations for portable 512-bit wide vectors +#[rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x64[test_v512]: u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u8x64[test_v512]: i8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m8x64[test_v512]: m16x32, m32x16, m64x8, m128x4); + +impl_from_bits!(i16x32[test_v512]: i8x64, u8x64, m8x64, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u16x32[test_v512]: i8x64, u8x64, m8x64, i16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m16x32[test_v512]: m32x16, m64x8, m128x4); + +impl_from_bits!(i32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(f32x16[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m32x16[test_v512]: m64x8, m128x4); + +impl_from_bits!(i64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, u64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(u64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, f64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(f64x8[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, m64x8, i128x4, u128x4, m128x4); +impl_from_bits!(m64x8[test_v512]: m128x4); + +impl_from_bits!(i128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, u128x4, m128x4); +impl_from_bits!(u128x4[test_v512]: i8x64, u8x64, m8x64, i16x32, u16x32, m16x32, i32x16, u32x16, f32x16, m32x16, i64x8, u64x8, f64x8, m64x8, i128x4, m128x4); +// note: m128x4 cannot be constructed from all the other masks bit patterns in here diff --git a/third_party/rust/packed_simd/src/api/into_bits/v64.rs b/third_party/rust/packed_simd/src/api/into_bits/v64.rs new file mode 100644 index 0000000000..5b065f1bd5 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/into_bits/v64.rs @@ -0,0 +1,18 @@ +//! `FromBits` and `IntoBits` implementations for portable 64-bit wide vectors +#[rustfmt::skip] + +#[allow(unused)] // wasm_bindgen_test +use crate::*; + +impl_from_bits!(i8x8[test_v64]: u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(u8x8[test_v64]: i8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(m8x8[test_v64]: m16x4, m32x2); + +impl_from_bits!(i16x4[test_v64]: i8x8, u8x8, m8x8, u16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(u16x4[test_v64]: i8x8, u8x8, m8x8, i16x4, m16x4, i32x2, u32x2, f32x2, m32x2); +impl_from_bits!(m16x4[test_v64]: m32x2); + +impl_from_bits!(i32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, u32x2, f32x2, m32x2); +impl_from_bits!(u32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, f32x2, m32x2); +impl_from_bits!(f32x2[test_v64]: i8x8, u8x8, m8x8, i16x4, u16x4, m16x4, i32x2, u32x2, m32x2); +// note: m32x2 cannot be constructed from all m16x4 or m8x8 bit patterns diff --git a/third_party/rust/packed_simd/src/api/math.rs b/third_party/rust/packed_simd/src/api/math.rs new file mode 100644 index 0000000000..e7a8d256ba --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math.rs @@ -0,0 +1,4 @@ +//! Implements vertical math operations + +#[macro_use] +mod float; diff --git a/third_party/rust/packed_simd/src/api/math/float.rs b/third_party/rust/packed_simd/src/api/math/float.rs new file mode 100644 index 0000000000..c0ec46e917 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float.rs @@ -0,0 +1,61 @@ +//! Implements vertical floating-point math operations. + +#[macro_use] +mod abs; + +#[macro_use] +mod consts; + +#[macro_use] +mod cos; + +#[macro_use] +mod exp; + +#[macro_use] +mod powf; + +#[macro_use] +mod ln; + +#[macro_use] +mod mul_add; + +#[macro_use] +mod mul_adde; + +#[macro_use] +mod recpre; + +#[macro_use] +mod rsqrte; + +#[macro_use] +mod sin; + +#[macro_use] +mod sqrt; + +#[macro_use] +mod sqrte; + +macro_rules! impl_float_category { + ([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => { + impl $id { + #[inline] + pub fn is_nan(self) -> $mask_ty { + self.ne(self) + } + + #[inline] + pub fn is_infinite(self) -> $mask_ty { + self.eq(Self::INFINITY) | self.eq(Self::NEG_INFINITY) + } + + #[inline] + pub fn is_finite(self) -> $mask_ty { + !(self.is_nan() | self.is_infinite()) + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/abs.rs b/third_party/rust/packed_simd/src/api/math/float/abs.rs new file mode 100644 index 0000000000..1865bdb68e --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/abs.rs @@ -0,0 +1,31 @@ +//! Implements vertical (lane-wise) floating-point `abs`. + +macro_rules! impl_math_float_abs { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Absolute value. + #[inline] + pub fn abs(self) -> Self { + use crate::codegen::math::float::abs::Abs; + Abs::abs(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_abs>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn abs() { + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, o.abs()); + + let mo = $id::splat(-1 as $elem_ty); + assert_eq!(o, mo.abs()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/consts.rs b/third_party/rust/packed_simd/src/api/math/float/consts.rs new file mode 100644 index 0000000000..89f93a6d69 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/consts.rs @@ -0,0 +1,86 @@ +macro_rules! impl_float_consts { + ([$elem_ty:ident; $elem_count:expr]: $id:ident) => { + impl $id { + /// Machine epsilon value. + pub const EPSILON: $id = $id::splat(core::$elem_ty::EPSILON); + + /// Smallest finite value. + pub const MIN: $id = $id::splat(core::$elem_ty::MIN); + + /// Smallest positive normal value. + pub const MIN_POSITIVE: $id = + $id::splat(core::$elem_ty::MIN_POSITIVE); + + /// Largest finite value. + pub const MAX: $id = $id::splat(core::$elem_ty::MAX); + + /// Not a Number (NaN). + pub const NAN: $id = $id::splat(core::$elem_ty::NAN); + + /// Infinity (∞). + pub const INFINITY: $id = $id::splat(core::$elem_ty::INFINITY); + + /// Negative infinity (-∞). + pub const NEG_INFINITY: $id = + $id::splat(core::$elem_ty::NEG_INFINITY); + + /// Archimedes' constant (π) + pub const PI: $id = $id::splat(core::$elem_ty::consts::PI); + + /// π/2 + pub const FRAC_PI_2: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_2); + + /// π/3 + pub const FRAC_PI_3: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_3); + + /// π/4 + pub const FRAC_PI_4: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_4); + + /// π/6 + pub const FRAC_PI_6: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_6); + + /// π/8 + pub const FRAC_PI_8: $id = + $id::splat(core::$elem_ty::consts::FRAC_PI_8); + + /// 1/π + pub const FRAC_1_PI: $id = + $id::splat(core::$elem_ty::consts::FRAC_1_PI); + + /// 2/π + pub const FRAC_2_PI: $id = + $id::splat(core::$elem_ty::consts::FRAC_2_PI); + + /// 2/sqrt(π) + pub const FRAC_2_SQRT_PI: $id = + $id::splat(core::$elem_ty::consts::FRAC_2_SQRT_PI); + + /// sqrt(2) + pub const SQRT_2: $id = $id::splat(core::$elem_ty::consts::SQRT_2); + + /// 1/sqrt(2) + pub const FRAC_1_SQRT_2: $id = + $id::splat(core::$elem_ty::consts::FRAC_1_SQRT_2); + + /// Euler's number (e) + pub const E: $id = $id::splat(core::$elem_ty::consts::E); + + /// log<sub>2</sub>(e) + pub const LOG2_E: $id = $id::splat(core::$elem_ty::consts::LOG2_E); + + /// log<sub>10</sub>(e) + pub const LOG10_E: $id = + $id::splat(core::$elem_ty::consts::LOG10_E); + + /// ln(2) + pub const LN_2: $id = $id::splat(core::$elem_ty::consts::LN_2); + + /// ln(10) + pub const LN_10: $id = $id::splat(core::$elem_ty::consts::LN_10); + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/cos.rs b/third_party/rust/packed_simd/src/api/math/float/cos.rs new file mode 100644 index 0000000000..e5b8f46036 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/cos.rs @@ -0,0 +1,44 @@ +//! Implements vertical (lane-wise) floating-point `cos`. + +macro_rules! impl_math_float_cos { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Cosine. + #[inline] + pub fn cos(self) -> Self { + use crate::codegen::math::float::cos::Cos; + Cos::cos(self) + } + + /// Cosine of `self * PI`. + #[inline] + pub fn cos_pi(self) -> Self { + use crate::codegen::math::float::cos_pi::CosPi; + CosPi::cos_pi(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_cos>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn cos() { + use crate::$elem_ty::consts::PI; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let p = $id::splat(PI as $elem_ty); + let ph = $id::splat(PI as $elem_ty / 2.); + let z_r = $id::splat((PI as $elem_ty / 2.).cos()); + let o_r = $id::splat((PI as $elem_ty).cos()); + + assert_eq!(o, z.cos()); + assert_eq!(z_r, ph.cos()); + assert_eq!(o_r, p.cos()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/exp.rs b/third_party/rust/packed_simd/src/api/math/float/exp.rs new file mode 100644 index 0000000000..e3356d853a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/exp.rs @@ -0,0 +1,33 @@ +//! Implements vertical (lane-wise) floating-point `exp`. + +macro_rules! impl_math_float_exp { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns the exponential function of `self`: `e^(self)`. + #[inline] + pub fn exp(self) -> Self { + use crate::codegen::math::float::exp::Exp; + Exp::exp(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_exp>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn exp() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(o, z.exp()); + + let e = $id::splat(crate::f64::consts::E as $elem_ty); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!((e - o.exp()).abs().le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/ln.rs b/third_party/rust/packed_simd/src/api/math/float/ln.rs new file mode 100644 index 0000000000..5ceb9173ae --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/ln.rs @@ -0,0 +1,33 @@ +//! Implements vertical (lane-wise) floating-point `ln`. + +macro_rules! impl_math_float_ln { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Returns the natural logarithm of `self`. + #[inline] + pub fn ln(self) -> Self { + use crate::codegen::math::float::ln::Ln; + Ln::ln(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_ln>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ln() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, o.ln()); + + let e = $id::splat(crate::f64::consts::E as $elem_ty); + let tol = $id::splat(2.4e-4 as $elem_ty); + assert!((o - e.ln()).abs().le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_add.rs b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs new file mode 100644 index 0000000000..4b170ee2b7 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/mul_add.rs @@ -0,0 +1,44 @@ +//! Implements vertical (lane-wise) floating-point `mul_add`. + +macro_rules! impl_math_float_mul_add { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Fused multiply add: `self * y + z` + #[inline] + pub fn mul_add(self, y: Self, z: Self) -> Self { + use crate::codegen::math::float::mul_add::MulAdd; + MulAdd::mul_add(self, y, z) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_mul_add>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn mul_add() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.mul_add(z, z)); + assert_eq!(o, o.mul_add(o, z)); + assert_eq!(o, o.mul_add(z, o)); + assert_eq!(o, z.mul_add(o, o)); + + assert_eq!(t, o.mul_add(o, o)); + assert_eq!(t, o.mul_add(t, z)); + assert_eq!(t, t.mul_add(o, z)); + + assert_eq!(f, t.mul_add(t, z)); + assert_eq!(f, t.mul_add(o, t)); + assert_eq!(t3, t.mul_add(o, o)); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs new file mode 100644 index 0000000000..c5b27110f2 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/mul_adde.rs @@ -0,0 +1,48 @@ +//! Implements vertical (lane-wise) floating-point `mul_adde`. + +macro_rules! impl_math_float_mul_adde { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Fused multiply add estimate: ~= `self * y + z` + /// + /// While fused multiply-add (`fma`) has infinite precision, + /// `mul_adde` has _at worst_ the same precision of a multiply followed by an add. + /// This might be more efficient on architectures that do not have an `fma` instruction. + #[inline] + pub fn mul_adde(self, y: Self, z: Self) -> Self { + use crate::codegen::math::float::mul_adde::MulAddE; + MulAddE::mul_adde(self, y, z) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_mul_adde>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn mul_adde() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let t3 = $id::splat(3 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + assert_eq!(z, z.mul_adde(z, z)); + assert_eq!(o, o.mul_adde(o, z)); + assert_eq!(o, o.mul_adde(z, o)); + assert_eq!(o, z.mul_adde(o, o)); + + assert_eq!(t, o.mul_adde(o, o)); + assert_eq!(t, o.mul_adde(t, z)); + assert_eq!(t, t.mul_adde(o, z)); + + assert_eq!(f, t.mul_adde(t, z)); + assert_eq!(f, t.mul_adde(o, t)); + assert_eq!(t3, t.mul_adde(o, o)); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/powf.rs b/third_party/rust/packed_simd/src/api/math/float/powf.rs new file mode 100644 index 0000000000..83dc9ff9c0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/powf.rs @@ -0,0 +1,36 @@ +//! Implements vertical (lane-wise) floating-point `powf`. + +macro_rules! impl_math_float_powf { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Raises `self` number to the floating point power of `x`. + #[inline] + pub fn powf(self, x: Self) -> Self { + use crate::codegen::math::float::powf::Powf; + Powf::powf(self, x) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_powf>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn powf() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + assert_eq!(o, o.powf(z)); + assert_eq!(o, t.powf(z)); + assert_eq!(o, o.powf(o)); + assert_eq!(t, t.powf(o)); + + let f = $id::splat(4 as $elem_ty); + assert_eq!(f, t.powf(t)); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/recpre.rs b/third_party/rust/packed_simd/src/api/math/float/recpre.rs new file mode 100644 index 0000000000..127f0b2ff6 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/recpre.rs @@ -0,0 +1,36 @@ +//! Implements vertical (lane-wise) floating-point `recpre`. + +macro_rules! impl_math_float_recpre { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Reciprocal estimate: `~= 1. / self`. + /// + /// FIXME: The precision of the estimate is currently unspecified. + #[inline] + pub fn recpre(self) -> Self { + $id::splat(1.) / self + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_recpre>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn recpre() { + let tol = $id::splat(2.4e-4 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let error = (o - o.recpre()).abs(); + assert!(error.le(tol).all()); + + let t = $id::splat(2 as $elem_ty); + let e = 0.5; + let error = (e - t.recpre()).abs(); + assert!(error.le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs new file mode 100644 index 0000000000..c77977f7b1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/rsqrte.rs @@ -0,0 +1,40 @@ +//! Implements vertical (lane-wise) floating-point `rsqrte`. + +macro_rules! impl_math_float_rsqrte { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Reciprocal square-root estimate: `~= 1. / self.sqrt()`. + /// + /// FIXME: The precision of the estimate is currently unspecified. + #[inline] + pub fn rsqrte(self) -> Self { + unsafe { + use crate::llvm::simd_fsqrt; + $id::splat(1.) / Simd(simd_fsqrt(self.0)) + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_rsqrte>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn rsqrte() { + use crate::$elem_ty::consts::SQRT_2; + let tol = $id::splat(2.4e-4 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let error = (o - o.rsqrte()).abs(); + assert!(error.le(tol).all()); + + let t = $id::splat(2 as $elem_ty); + let e = 1. / SQRT_2; + let error = (e - t.rsqrte()).abs(); + assert!(error.le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/sin.rs b/third_party/rust/packed_simd/src/api/math/float/sin.rs new file mode 100644 index 0000000000..49908319b1 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/sin.rs @@ -0,0 +1,50 @@ +//! Implements vertical (lane-wise) floating-point `sin`. + +macro_rules! impl_math_float_sin { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Sine. + #[inline] + pub fn sin(self) -> Self { + use crate::codegen::math::float::sin::Sin; + Sin::sin(self) + } + + /// Sine of `self * PI`. + #[inline] + pub fn sin_pi(self) -> Self { + use crate::codegen::math::float::sin_pi::SinPi; + SinPi::sin_pi(self) + } + + /// Sine and cosine of `self * PI`. + #[inline] + pub fn sin_cos_pi(self) -> (Self, Self) { + use crate::codegen::math::float::sin_cos_pi::SinCosPi; + SinCosPi::sin_cos_pi(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_sin>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sin() { + use crate::$elem_ty::consts::PI; + let z = $id::splat(0 as $elem_ty); + let p = $id::splat(PI as $elem_ty); + let ph = $id::splat(PI as $elem_ty / 2.); + let o_r = $id::splat((PI as $elem_ty / 2.).sin()); + let z_r = $id::splat((PI as $elem_ty).sin()); + + assert_eq!(z, z.sin()); + assert_eq!(o_r, ph.sin()); + assert_eq!(z_r, p.sin()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrt.rs b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs new file mode 100644 index 0000000000..ae624122d0 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/sqrt.rs @@ -0,0 +1,35 @@ +//! Implements vertical (lane-wise) floating-point `sqrt`. + +macro_rules! impl_math_float_sqrt { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + #[inline] + pub fn sqrt(self) -> Self { + use crate::codegen::math::float::sqrt::Sqrt; + Sqrt::sqrt(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_sqrt>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sqrt() { + use crate::$elem_ty::consts::SQRT_2; + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + assert_eq!(z, z.sqrt()); + assert_eq!(o, o.sqrt()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(SQRT_2); + assert_eq!(e, t.sqrt()); + + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/math/float/sqrte.rs b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs new file mode 100644 index 0000000000..f7ffad748d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/math/float/sqrte.rs @@ -0,0 +1,44 @@ +//! Implements vertical (lane-wise) floating-point `sqrte`. + +macro_rules! impl_math_float_sqrte { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Square-root estimate. + /// + /// FIXME: The precision of the estimate is currently unspecified. + #[inline] + pub fn sqrte(self) -> Self { + use crate::codegen::math::float::sqrte::Sqrte; + Sqrte::sqrte(self) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _math_sqrte>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sqrte() { + use crate::$elem_ty::consts::SQRT_2; + let tol = $id::splat(2.4e-4 as $elem_ty); + + let z = $id::splat(0 as $elem_ty); + let error = (z - z.sqrte()).abs(); + assert!(error.le(tol).all()); + + let o = $id::splat(1 as $elem_ty); + let error = (o - o.sqrte()).abs(); + assert!(error.le(tol).all()); + + let t = $id::splat(2 as $elem_ty); + let e = $id::splat(SQRT_2 as $elem_ty); + let error = (e - t.sqrte()).abs(); + + assert!(error.le(tol).all()); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/minimal.rs b/third_party/rust/packed_simd/src/api/minimal.rs new file mode 100644 index 0000000000..840d9e3258 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal.rs @@ -0,0 +1,6 @@ +#[macro_use] +mod iuf; +#[macro_use] +mod mask; +#[macro_use] +mod ptr; diff --git a/third_party/rust/packed_simd/src/api/minimal/iuf.rs b/third_party/rust/packed_simd/src/api/minimal/iuf.rs new file mode 100644 index 0000000000..a155ac178a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal/iuf.rs @@ -0,0 +1,169 @@ +//! Minimal API of signed integer, unsigned integer, and floating-point +//! vectors. + +macro_rules! impl_minimal_iuf { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident | + $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { + + $(#[$doc])* + pub type $id = Simd<[$elem_ty; $elem_count]>; + + impl sealed::Simd for $id { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + impl $id { + /// Creates a new instance with each vector elements initialized + /// with the provided values. + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new($($elem_name: $elem_ty),*) -> Self { + Simd(codegen::$id($($elem_name as $ielem_ty),*)) + } + + /// Returns the number of vector lanes. + #[inline] + pub const fn lanes() -> usize { + $elem_count + } + + /// Constructs a new instance with each element initialized to + /// `value`. + #[inline] + pub const fn splat(value: $elem_ty) -> Self { + Simd(codegen::$id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + value as $ielem_ty + }),*)) + } + + /// Extracts the value at `index`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + pub fn extract(self, index: usize) -> $elem_ty { + assert!(index < $elem_count); + unsafe { self.extract_unchecked(index) } + } + + /// Extracts the value at `index`. + /// + /// # Safety + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { + use crate::llvm::simd_extract; + let e: $ielem_ty = simd_extract(self.0, index as u32); + e as $elem_ty + } + + /// Returns a new vector where the value at `index` is replaced by `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { + assert!(index < $elem_count); + unsafe { self.replace_unchecked(index, new_value) } + } + + /// Returns a new vector where the value at `index` is replaced by `new_value`. + /// + /// # Safety + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + #[must_use = "replace_unchecked does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub unsafe fn replace_unchecked( + self, + index: usize, + new_value: $elem_ty, + ) -> Self { + use crate::llvm::simd_insert; + Simd(simd_insert(self.0, index as u32, new_value as $ielem_ty)) + } + } + + test_if!{ + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _minimal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn minimal() { + // lanes: + assert_eq!($elem_count, $id::lanes()); + + // splat and extract / extract_unchecked: + const VAL: $elem_ty = 7 as $elem_ty; + const VEC: $id = $id::splat(VAL); + for i in 0..$id::lanes() { + assert_eq!(VAL, VEC.extract(i)); + assert_eq!( + VAL, unsafe { VEC.extract_unchecked(i) } + ); + } + + // replace / replace_unchecked + let new_vec = VEC.replace(0, 42 as $elem_ty); + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(42 as $elem_ty, new_vec.extract(i)); + } else { + assert_eq!(VAL, new_vec.extract(i)); + } + } + let new_vec = unsafe { + VEC.replace_unchecked(0, 42 as $elem_ty) + }; + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(42 as $elem_ty, new_vec.extract(i)); + } else { + assert_eq!(VAL, new_vec.extract(i)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn extract_panic_oob() { + const VAL: $elem_ty = 7 as $elem_ty; + const VEC: $id = $id::splat(VAL); + let _ = VEC.extract($id::lanes()); + } + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn replace_panic_oob() { + const VAL: $elem_ty = 7 as $elem_ty; + const VEC: $id = $id::splat(VAL); + let _ = VEC.replace($id::lanes(), 42 as $elem_ty); + } + } + } + } + } +} diff --git a/third_party/rust/packed_simd/src/api/minimal/mask.rs b/third_party/rust/packed_simd/src/api/minimal/mask.rs new file mode 100644 index 0000000000..a420060b42 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal/mask.rs @@ -0,0 +1,176 @@ +//! Minimal API of mask vectors. + +macro_rules! impl_minimal_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident + | $test_tt:tt | $($elem_name:ident),+ | $(#[$doc:meta])*) => { + $(#[$doc])* + pub type $id = Simd<[$elem_ty; $elem_count]>; + + impl sealed::Simd for $id { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + impl $id { + /// Creates a new instance with each vector elements initialized + /// with the provided values. + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new($($elem_name: bool),*) -> Self { + Simd(codegen::$id($(Self::bool_to_internal($elem_name)),*)) + } + + /// Converts a boolean type into the type of the vector lanes. + #[inline] + #[allow(clippy::indexing_slicing)] + const fn bool_to_internal(x: bool) -> $ielem_ty { + [0 as $ielem_ty, !(0 as $ielem_ty)][x as usize] + } + + /// Returns the number of vector lanes. + #[inline] + pub const fn lanes() -> usize { + $elem_count + } + + /// Constructs a new instance with each element initialized to + /// `value`. + #[inline] + pub const fn splat(value: bool) -> Self { + Simd(codegen::$id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + Self::bool_to_internal(value) + }),*)) + } + + /// Extracts the value at `index`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + pub fn extract(self, index: usize) -> bool { + assert!(index < $elem_count); + unsafe { self.extract_unchecked(index) } + } + + /// Extracts the value at `index`. + /// + /// # Safety + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn extract_unchecked(self, index: usize) -> bool { + use crate::llvm::simd_extract; + let x: $ielem_ty = simd_extract(self.0, index as u32); + x != 0 + } + + /// Returns a new vector where the value at `index` is replaced by + /// `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub fn replace(self, index: usize, new_value: bool) -> Self { + assert!(index < $elem_count); + unsafe { self.replace_unchecked(index, new_value) } + } + + /// Returns a new vector where the value at `index` is replaced by + /// `new_value`. + /// + /// # Safety + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + #[must_use = "replace_unchecked does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub unsafe fn replace_unchecked( + self, + index: usize, + new_value: bool, + ) -> Self { + use crate::llvm::simd_insert; + Simd(simd_insert(self.0, index as u32, + Self::bool_to_internal(new_value))) + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _minimal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn minimal() { + // TODO: test new + + // lanes: + assert_eq!($elem_count, $id::lanes()); + + // splat and extract / extract_unchecked: + let vec = $id::splat(true); + for i in 0..$id::lanes() { + assert_eq!(true, vec.extract(i)); + assert_eq!(true, + unsafe { vec.extract_unchecked(i) } + ); + } + + // replace / replace_unchecked + let new_vec = vec.replace(0, false); + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(false, new_vec.extract(i)); + } else { + assert_eq!(true, new_vec.extract(i)); + } + } + let new_vec = unsafe { + vec.replace_unchecked(0, false) + }; + for i in 0..$id::lanes() { + if i == 0 { + assert_eq!(false, new_vec.extract(i)); + } else { + assert_eq!(true, new_vec.extract(i)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn extract_panic_oob() { + let vec = $id::splat(false); + let _ = vec.extract($id::lanes()); + } + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn replace_panic_oob() { + let vec = $id::splat(false); + let _ = vec.replace($id::lanes(), true); + } + } + } + } + } +} diff --git a/third_party/rust/packed_simd/src/api/minimal/ptr.rs b/third_party/rust/packed_simd/src/api/minimal/ptr.rs new file mode 100644 index 0000000000..c3d61fbf6d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/minimal/ptr.rs @@ -0,0 +1,1377 @@ +//! Minimal API of pointer vectors. + +macro_rules! impl_minimal_p { + ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident, + $usize_ty:ident, $isize_ty:ident | $ref:ident | $test_tt:tt + | $($elem_name:ident),+ | ($true:expr, $false:expr) | + $(#[$doc:meta])*) => { + + $(#[$doc])* + pub type $id<T> = Simd<[$elem_ty; $elem_count]>; + + impl<T> sealed::Simd for $id<T> { + type Element = $elem_ty; + const LANES: usize = $elem_count; + type LanesType = [u32; $elem_count]; + } + + impl<T> $id<T> { + /// Creates a new instance with each vector elements initialized + /// with the provided values. + #[inline] + #[allow(clippy::too_many_arguments)] + pub const fn new($($elem_name: $elem_ty),*) -> Self { + Simd(codegen::$id($($elem_name),*)) + } + + /// Returns the number of vector lanes. + #[inline] + pub const fn lanes() -> usize { + $elem_count + } + + /// Constructs a new instance with each element initialized to + /// `value`. + #[inline] + pub const fn splat(value: $elem_ty) -> Self { + Simd(codegen::$id($({ + #[allow(non_camel_case_types, dead_code)] + struct $elem_name; + value + }),*)) + } + + /// Constructs a new instance with each element initialized to + /// `null`. + #[inline] + pub const fn null() -> Self { + Self::splat(crate::ptr::null_mut() as $elem_ty) + } + + /// Returns a mask that selects those lanes that contain `null` + /// pointers. + #[inline] + pub fn is_null(self) -> $mask_ty { + self.eq(Self::null()) + } + + /// Extracts the value at `index`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + pub fn extract(self, index: usize) -> $elem_ty { + assert!(index < $elem_count); + unsafe { self.extract_unchecked(index) } + } + + /// Extracts the value at `index`. + /// + /// # Safety + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn extract_unchecked(self, index: usize) -> $elem_ty { + use crate::llvm::simd_extract; + simd_extract(self.0, index as u32) + } + + /// Returns a new vector where the value at `index` is replaced by + /// `new_value`. + /// + /// # Panics + /// + /// If `index >= Self::lanes()`. + #[inline] + #[must_use = "replace does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + #[allow(clippy::not_unsafe_ptr_arg_deref)] + pub fn replace(self, index: usize, new_value: $elem_ty) -> Self { + assert!(index < $elem_count); + unsafe { self.replace_unchecked(index, new_value) } + } + + /// Returns a new vector where the value at `index` is replaced by `new_value`. + /// + /// # Safety + /// + /// If `index >= Self::lanes()` the behavior is undefined. + #[inline] + #[must_use = "replace_unchecked does not modify the original value - \ + it returns a new vector with the value at `index` \ + replaced by `new_value`d" + ] + pub unsafe fn replace_unchecked( + self, + index: usize, + new_value: $elem_ty, + ) -> Self { + use crate::llvm::simd_insert; + Simd(simd_insert(self.0, index as u32, new_value)) + } + } + + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _minimal>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn minimal() { + // lanes: + assert_eq!($elem_count, $id::<i32>::lanes()); + + // splat and extract / extract_unchecked: + let VAL7: <$id<i32> as sealed::Simd>::Element + = $ref!(7); + let VAL42: <$id<i32> as sealed::Simd>::Element + = $ref!(42); + let VEC: $id<i32> = $id::splat(VAL7); + for i in 0..$id::<i32>::lanes() { + assert_eq!(VAL7, VEC.extract(i)); + assert_eq!( + VAL7, unsafe { VEC.extract_unchecked(i) } + ); + } + + // replace / replace_unchecked + let new_vec = VEC.replace(0, VAL42); + for i in 0..$id::<i32>::lanes() { + if i == 0 { + assert_eq!(VAL42, new_vec.extract(i)); + } else { + assert_eq!(VAL7, new_vec.extract(i)); + } + } + let new_vec = unsafe { + VEC.replace_unchecked(0, VAL42) + }; + for i in 0..$id::<i32>::lanes() { + if i == 0 { + assert_eq!(VAL42, new_vec.extract(i)); + } else { + assert_eq!(VAL7, new_vec.extract(i)); + } + } + + let mut n = $id::<i32>::null(); + assert_eq!( + n, + $id::<i32>::splat(unsafe { crate::mem::zeroed() }) + ); + assert!(n.is_null().all()); + n = n.replace( + 0, unsafe { crate::mem::transmute(1_isize) } + ); + assert!(!n.is_null().all()); + if $id::<i32>::lanes() > 1 { + assert!(n.is_null().any()); + } else { + assert!(!n.is_null().any()); + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn extract_panic_oob() { + let VAL: <$id<i32> as sealed::Simd>::Element + = $ref!(7); + let VEC: $id<i32> = $id::splat(VAL); + let _ = VEC.extract($id::<i32>::lanes()); + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn replace_panic_oob() { + let VAL: <$id<i32> as sealed::Simd>::Element + = $ref!(7); + let VAL42: <$id<i32> as sealed::Simd>::Element + = $ref!(42); + let VEC: $id<i32> = $id::splat(VAL); + let _ = VEC.replace($id::<i32>::lanes(), VAL42); + } + } + } + } + + impl<T> crate::fmt::Debug for $id<T> { + #[allow(clippy::missing_inline_in_public_items)] + fn fmt(&self, f: &mut crate::fmt::Formatter<'_>) + -> crate::fmt::Result { + write!( + f, + "{}<{}>(", + stringify!($id), + crate::intrinsics::type_name::<T>() + )?; + for i in 0..$elem_count { + if i > 0 { + write!(f, ", ")?; + } + self.extract(i).fmt(f)?; + } + write!(f, ")") + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _fmt_debug>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn debug() { + use arrayvec::{ArrayString,ArrayVec}; + type TinyString = ArrayString<[u8; 512]>; + + use crate::fmt::Write; + let v = $id::<i32>::default(); + let mut s = TinyString::new(); + write!(&mut s, "{:?}", v).unwrap(); + + let mut beg = TinyString::new(); + write!(&mut beg, "{}<i32>(", stringify!($id)).unwrap(); + assert!( + s.starts_with(beg.as_str()), + "s = {} (should start with = {})", s, beg + ); + assert!(s.ends_with(")")); + let s: ArrayVec<[TinyString; 64]> + = s.replace(beg.as_str(), "") + .replace(")", "").split(",") + .map(|v| TinyString::from(v.trim()).unwrap()) + .collect(); + assert_eq!(s.len(), $id::<i32>::lanes()); + for (index, ss) in s.into_iter().enumerate() { + let mut e = TinyString::new(); + write!(&mut e, "{:?}", v.extract(index)).unwrap(); + assert_eq!(ss, e); + } + } + } + } + } + + impl<T> Default for $id<T> { + #[inline] + fn default() -> Self { + // FIXME: ptrs do not implement default + Self::null() + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _default>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn default() { + let a = $id::<i32>::default(); + for i in 0..$id::<i32>::lanes() { + assert_eq!( + a.extract(i), unsafe { crate::mem::zeroed() } + ); + } + } + } + } + } + + impl<T> $id<T> { + /// Lane-wise equality comparison. + #[inline] + pub fn eq(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_eq; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_eq(a.0, b.0)) + } + } + + /// Lane-wise inequality comparison. + #[inline] + pub fn ne(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_ne; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_ne(a.0, b.0)) + } + } + + /// Lane-wise less-than comparison. + #[inline] + pub fn lt(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_lt; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_lt(a.0, b.0)) + } + } + + /// Lane-wise less-than-or-equals comparison. + #[inline] + pub fn le(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_le; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_le(a.0, b.0)) + } + } + + /// Lane-wise greater-than comparison. + #[inline] + pub fn gt(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_gt; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_gt(a.0, b.0)) + } + } + + /// Lane-wise greater-than-or-equals comparison. + #[inline] + pub fn ge(self, other: Self) -> $mask_ty { + unsafe { + use crate::llvm::simd_ge; + let a: $usize_ty = crate::mem::transmute(self); + let b: $usize_ty = crate::mem::transmute(other); + Simd(simd_ge(a.0, b.0)) + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_vertical>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn cmp() { + let a = $id::<i32>::null(); + let b = $id::<i32>::splat(unsafe { + crate::mem::transmute(1_isize) + }); + + let r = a.lt(b); + let e = $mask_ty::splat(true); + assert!(r == e); + let r = a.le(b); + assert!(r == e); + + let e = $mask_ty::splat(false); + let r = a.gt(b); + assert!(r == e); + let r = a.ge(b); + assert!(r == e); + let r = a.eq(b); + assert!(r == e); + + let mut a = a; + let mut b = b; + let mut e = e; + for i in 0..$id::<i32>::lanes() { + if i % 2 == 0 { + a = a.replace( + i, + unsafe { crate::mem::transmute(0_isize) } + ); + b = b.replace( + i, + unsafe { crate::mem::transmute(1_isize) } + ); + e = e.replace(i, true); + } else { + a = a.replace( + i, + unsafe { crate::mem::transmute(1_isize) } + ); + b = b.replace( + i, + unsafe { crate::mem::transmute(0_isize) } + ); + e = e.replace(i, false); + } + } + let r = a.lt(b); + assert!(r == e); + } + } + } + } + + #[allow(clippy::partialeq_ne_impl)] + impl<T> crate::cmp::PartialEq<$id<T>> for $id<T> { + #[inline] + fn eq(&self, other: &Self) -> bool { + $id::<T>::eq(*self, *other).all() + } + #[inline] + fn ne(&self, other: &Self) -> bool { + $id::<T>::ne(*self, *other).any() + } + } + + // FIXME: https://github.com/rust-lang-nursery/rust-clippy/issues/2892 + #[allow(clippy::partialeq_ne_impl)] + impl<T> crate::cmp::PartialEq<LexicographicallyOrdered<$id<T>>> + for LexicographicallyOrdered<$id<T>> + { + #[inline] + fn eq(&self, other: &Self) -> bool { + self.0 == other.0 + } + #[inline] + fn ne(&self, other: &Self) -> bool { + self.0 != other.0 + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_PartialEq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn partial_eq() { + let a = $id::<i32>::null(); + let b = $id::<i32>::splat(unsafe { + crate::mem::transmute(1_isize) + }); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + + if $id::<i32>::lanes() > 1 { + let a = $id::<i32>::null().replace(0, unsafe { + crate::mem::transmute(1_isize) + }); + let b = $id::<i32>::splat(unsafe { + crate::mem::transmute(1_isize) + }); + + assert!(a != b); + assert!(!(a == b)); + assert!(a == a); + assert!(!(a != a)); + } + } + } + } + } + + impl<T> crate::cmp::Eq for $id<T> {} + impl<T> crate::cmp::Eq for LexicographicallyOrdered<$id<T>> {} + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _cmp_eq>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn eq() { + fn foo<E: crate::cmp::Eq>(_: E) {} + let a = $id::<i32>::null(); + foo(a); + } + } + } + } + + impl<T> From<[$elem_ty; $elem_count]> for $id<T> { + #[inline] + fn from(array: [$elem_ty; $elem_count]) -> Self { + unsafe { + // FIXME: unnecessary zeroing; better than UB. + let mut u: Self = crate::mem::zeroed(); + crate::ptr::copy_nonoverlapping( + &array as *const [$elem_ty; $elem_count] as *const u8, + &mut u as *mut Self as *mut u8, + crate::mem::size_of::<Self>() + ); + u + } + } + } + impl<T> Into<[$elem_ty; $elem_count]> for $id<T> { + #[inline] + fn into(self) -> [$elem_ty; $elem_count] { + unsafe { + // FIXME: unnecessary zeroing; better than UB. + let mut u: [$elem_ty; $elem_count] = crate::mem::zeroed(); + crate::ptr::copy_nonoverlapping( + &self as *const $id<T> as *const u8, + &mut u as *mut [$elem_ty; $elem_count] as *mut u8, + crate::mem::size_of::<Self>() + ); + u + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _from>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn array() { + let values = [1_i32; $elem_count]; + + let mut vec: $id<i32> = Default::default(); + let mut array = [ + $id::<i32>::null().extract(0); $elem_count + ]; + + for i in 0..$elem_count { + let ptr = &values[i] as *const i32 as *mut i32; + vec = vec.replace(i, ptr); + array[i] = ptr; + } + + // FIXME: there is no impl of From<$id<T>> for [$elem_ty; N] + // let a0 = From::from(vec); + // assert_eq!(a0, array); + #[allow(unused_assignments)] + let mut a1 = array; + a1 = vec.into(); + assert_eq!(a1, array); + + let v0: $id<i32> = From::from(array); + assert_eq!(v0, vec); + let v1: $id<i32> = array.into(); + assert_eq!(v1, vec); + } + } + } + } + + impl<T> $id<T> { + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::<Self>()` boundary. + #[inline] + pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked(0) as *const $elem_ty; + assert!( + target_ptr.align_offset(crate::mem::align_of::<Self>()) + == 0 + ); + Self::from_slice_aligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + Self::from_slice_unaligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::<Self>()` boundary, the behavior is undefined. + #[inline] + pub unsafe fn from_slice_aligned_unchecked(slice: &[$elem_ty]) + -> Self { + #[allow(clippy::cast_ptr_alignment)] + *(slice.get_unchecked(0) as *const $elem_ty as *const Self) + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn from_slice_unaligned_unchecked( + slice: &[$elem_ty], + ) -> Self { + use crate::mem::size_of; + let target_ptr = + slice.get_unchecked(0) as *const $elem_ty as *const u8; + let mut x = Self::splat(crate::ptr::null_mut() as $elem_ty); + let self_ptr = &mut x as *mut Self as *mut u8; + crate::ptr::copy_nonoverlapping( + target_ptr, + self_ptr, + size_of::<Self>(), + ); + x + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _slice_from_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_unaligned() { + let (null, non_null) = ptr_vals!($id<i32>); + + let mut unaligned = [ + non_null; $id::<i32>::lanes() + 1 + ]; + unaligned[0] = null; + let vec = $id::<i32>::from_slice_unaligned( + &unaligned[1..] + ); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_unaligned_fail() { + let (_null, non_null) = ptr_vals!($id<i32>); + let unaligned = [non_null; $id::<i32>::lanes() + 1]; + // the slice is not large enough => panic + let _vec = $id::<i32>::from_slice_unaligned( + &unaligned[2..] + ); + } + + union A { + data: [<$id<i32> as sealed::Simd>::Element; + 2 * $id::<i32>::lanes()], + _vec: $id<i32>, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_aligned() { + let (null, non_null) = ptr_vals!($id<i32>); + let mut aligned = A { + data: [null; 2 * $id::<i32>::lanes()], + }; + for i in + $id::<i32>::lanes()..(2 * $id::<i32>::lanes()) { + unsafe { + aligned.data[i] = non_null; + } + } + + let vec = unsafe { + $id::<i32>::from_slice_aligned( + &aligned.data[$id::<i32>::lanes()..] + ) + }; + for (index, &b) in unsafe { + aligned.data.iter().enumerate() + } { + if index < $id::<i32>::lanes() { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!( + b, vec.extract(index - $id::<i32>::lanes()) + ); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_lanes() { + let (_null, non_null) = ptr_vals!($id<i32>); + let aligned = A { + data: [non_null; 2 * $id::<i32>::lanes()], + }; + // the slice is not large enough => panic + let _vec = unsafe { + $id::<i32>::from_slice_aligned( + &aligned.data[2 * $id::<i32>::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_align() { + unsafe { + let (null, _non_null) = ptr_vals!($id<i32>); + let aligned = A { + data: [null; 2 * $id::<i32>::lanes()], + }; + + // get a pointer to the front of data + let ptr = aligned.data.as_ptr(); + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset( + crate::mem::align_of::<$id<i32>>() + ) == 0 { + // the pointer is properly aligned, so + // from_slice_aligned won't fail here (e.g. this + // can happen for i128x1). So we panic to make + // the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s = slice::from_raw_parts( + ptr, $id::<i32>::lanes() + ); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let _vec = $id::<i32>::from_slice_aligned(s); + } + } + } + } + } + + impl<T> $id<T> { + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::<Self>()` boundary. + #[inline] + pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty; + assert!( + target_ptr.align_offset(crate::mem::align_of::<Self>()) + == 0 + ); + self.write_to_slice_aligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + self.write_to_slice_unaligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::<Self>()` boundary, the behavior is + /// undefined. + #[inline] + pub unsafe fn write_to_slice_aligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + #[allow(clippy::cast_ptr_alignment)] + *(slice.get_unchecked_mut(0) as *mut $elem_ty as *mut Self) = + self; + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn write_to_slice_unaligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; + let self_ptr = &self as *const Self as *const u8; + crate::ptr::copy_nonoverlapping( + self_ptr, + target_ptr, + crate::mem::size_of::<Self>(), + ); + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _slice_write_to_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_unaligned() { + let (null, non_null) = ptr_vals!($id<i32>); + let mut unaligned = [null; $id::<i32>::lanes() + 1]; + let vec = $id::<i32>::splat(non_null); + vec.write_to_slice_unaligned(&mut unaligned[1..]); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_unaligned_fail() { + let (null, non_null) = ptr_vals!($id<i32>); + let mut unaligned = [null; $id::<i32>::lanes() + 1]; + let vec = $id::<i32>::splat(non_null); + // the slice is not large enough => panic + vec.write_to_slice_unaligned(&mut unaligned[2..]); + } + + union A { + data: [<$id<i32> as sealed::Simd>::Element; + 2 * $id::<i32>::lanes()], + _vec: $id<i32>, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_aligned() { + let (null, non_null) = ptr_vals!($id<i32>); + let mut aligned = A { + data: [null; 2 * $id::<i32>::lanes()], + }; + let vec = $id::<i32>::splat(non_null); + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[$id::<i32>::lanes()..] + ) + }; + for (index, &b) in + unsafe { aligned.data.iter().enumerate() } { + if index < $id::<i32>::lanes() { + assert_eq!(b, null); + } else { + assert_eq!(b, non_null); + assert_eq!( + b, vec.extract(index - $id::<i32>::lanes()) + ); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_lanes() { + let (null, non_null) = ptr_vals!($id<i32>); + let mut aligned = A { + data: [null; 2 * $id::<i32>::lanes()], + }; + let vec = $id::<i32>::splat(non_null); + // the slice is not large enough => panic + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[2 * $id::<i32>::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_align() { + let (null, non_null) = ptr_vals!($id<i32>); + unsafe { + let mut aligned = A { + data: [null; 2 * $id::<i32>::lanes()], + }; + + // get a pointer to the front of data + let ptr = aligned.data.as_mut_ptr(); + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset( + crate::mem::align_of::<$id<i32>>() + ) == 0 { + // the pointer is properly aligned, so + // write_to_slice_aligned won't fail here (e.g. + // this can happen for i128x1). So we panic to + // make the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s = slice::from_raw_parts_mut( + ptr, $id::<i32>::lanes() + ); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let vec = $id::<i32>::splat(non_null); + vec.write_to_slice_aligned(s); + } + } + } + } + } + + impl<T> crate::hash::Hash for $id<T> { + #[inline] + fn hash<H: crate::hash::Hasher>(&self, state: &mut H) { + let s: $usize_ty = unsafe { crate::mem::transmute(*self) }; + s.hash(state) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _hash>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn hash() { + use crate::hash::{Hash, Hasher}; + #[allow(deprecated)] + use crate::hash::{SipHasher13}; + + let values = [1_i32; $elem_count]; + + let mut vec: $id<i32> = Default::default(); + let mut array = [ + $id::<i32>::null().extract(0); + $elem_count + ]; + + for i in 0..$elem_count { + let ptr = &values[i] as *const i32 as *mut i32; + vec = vec.replace(i, ptr); + array[i] = ptr; + } + + #[allow(deprecated)] + let mut a_hash = SipHasher13::new(); + let mut v_hash = a_hash.clone(); + array.hash(&mut a_hash); + vec.hash(&mut v_hash); + assert_eq!(a_hash.finish(), v_hash.finish()); + } + } + } + } + + impl<T> $id<T> { + /// Calculates the offset from a pointer. + /// + /// `count` is in units of `T`; e.g. a count of `3` represents a + /// pointer offset of `3 * size_of::<T>()` bytes. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and resulting pointer must be either in + /// bounds or one byte past the end of an allocated object. + /// + /// * The computed offset, in bytes, cannot overflow an `isize`. + /// + /// * The offset being in bounds cannot rely on "wrapping around" + /// the address space. That is, the infinite-precision sum, in bytes + /// must fit in a `usize`. + /// + /// The compiler and standard library generally tries to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so `vec.as_ptr().offset(vec.len() as isize)` + /// is always safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table limitations or + /// splitting the address space. However, some 32-bit and 16-bit + /// platforms may successfully serve a request for more than + /// `isize::MAX` bytes with things like Physical Address Extension. + /// As such, memory acquired directly from allocators or memory + /// mapped files may be too large to handle with this function. + /// + /// Consider using `wrapping_offset` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + pub unsafe fn offset(self, count: $isize_ty) -> Self { + // FIXME: should use LLVM's `add nsw nuw` + self.wrapping_offset(count) + } + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// + /// `count` is in units of `T`; e.g. a count of `3` represents a + /// pointer offset of `3 * size_of::<T>()` bytes. + /// + /// # Safety + /// + /// The resulting pointer does not need to be in bounds, but it is + /// potentially hazardous to dereference (which requires unsafe). + /// + /// Always use `.offset(count)` instead when possible, because + /// offset allows the compiler to optimize better. + #[inline] + pub fn wrapping_offset(self, count: $isize_ty) -> Self { + unsafe { + let x: $isize_ty = crate::mem::transmute(self); + // note: {+,*} currently performs a `wrapping_{add, mul}` + crate::mem::transmute( + x + (count * crate::mem::size_of::<T>() as isize) + ) + } + } + + /// Calculates the distance between two pointers. + /// + /// The returned value is in units of `T`: the distance in bytes is + /// divided by `mem::size_of::<T>()`. + /// + /// This function is the inverse of offset. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and other pointer must be either in bounds + /// or one byte past the end of the same allocated object. + /// + /// * The distance between the pointers, in bytes, cannot overflow + /// an `isize`. + /// + /// * The distance between the pointers, in bytes, must be an exact + /// multiple of the size of `T`. + /// + /// * The distance being in bounds cannot rely on "wrapping around" + /// the address space. + /// + /// The compiler and standard library generally try to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so `ptr_into_vec.offset_from(vec.as_ptr())` + /// is always safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table limitations or + /// splitting the address space. However, some 32-bit and 16-bit + /// platforms may successfully serve a request for more than + /// `isize::MAX` bytes with things like Physical Address Extension. + /// As such, memory acquired directly from allocators or memory + /// mapped files may be too large to handle with this function. + /// + /// Consider using `wrapping_offset_from` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + pub unsafe fn offset_from(self, origin: Self) -> $isize_ty { + // FIXME: should use LLVM's `sub nsw nuw`. + self.wrapping_offset_from(origin) + } + + /// Calculates the distance between two pointers. + /// + /// The returned value is in units of `T`: the distance in bytes is + /// divided by `mem::size_of::<T>()`. + /// + /// If the address different between the two pointers is not a + /// multiple of `mem::size_of::<T>()` then the result of the + /// division is rounded towards zero. + /// + /// Though this method is safe for any two pointers, note that its + /// result will be mostly useless if the two pointers aren't into + /// the same allocated object, for example if they point to two + /// different local variables. + #[inline] + pub fn wrapping_offset_from(self, origin: Self) -> $isize_ty { + let x: $isize_ty = unsafe { crate::mem::transmute(self) }; + let y: $isize_ty = unsafe { crate::mem::transmute(origin) }; + // note: {-,/} currently perform wrapping_{sub, div} + (y - x) / (crate::mem::size_of::<T>() as isize) + } + + /// Calculates the offset from a pointer (convenience for + /// `.offset(count as isize)`). + /// + /// `count` is in units of `T`; e.g. a count of 3 represents a + /// pointer offset of `3 * size_of::<T>()` bytes. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and resulting pointer must be either in + /// bounds or one byte past the end of an allocated object. + /// + /// * The computed offset, in bytes, cannot overflow an `isize`. + /// + /// * The offset being in bounds cannot rely on "wrapping around" + /// the address space. That is, the infinite-precision sum must fit + /// in a `usize`. + /// + /// The compiler and standard library generally tries to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so `vec.as_ptr().add(vec.len())` is always + /// safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 263 bytes due to page-table limitations or + /// splitting the address space. However, some 32-bit and 16-bit + /// platforms may successfully serve a request for more than + /// `isize::MAX` bytes with things like Physical Address Extension. + /// As such, memory acquired directly from allocators or memory + /// mapped files may be too large to handle with this function. + /// + /// Consider using `wrapping_offset` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + #[allow(clippy::should_implement_trait)] + pub unsafe fn add(self, count: $usize_ty) -> Self { + self.offset(count.cast()) + } + + /// Calculates the offset from a pointer (convenience for + /// `.offset((count as isize).wrapping_neg())`). + /// + /// `count` is in units of T; e.g. a `count` of 3 represents a + /// pointer offset of `3 * size_of::<T>()` bytes. + /// + /// # Safety + /// + /// If any of the following conditions are violated, the result is + /// Undefined Behavior: + /// + /// * Both the starting and resulting pointer must be either in + /// bounds or one byte past the end of an allocated object. + /// + /// * The computed offset cannot exceed `isize::MAX` **bytes**. + /// + /// * The offset being in bounds cannot rely on "wrapping around" + /// the address space. That is, the infinite-precision sum must fit + /// in a usize. + /// + /// The compiler and standard library generally tries to ensure + /// allocations never reach a size where an offset is a concern. For + /// instance, `Vec` and `Box` ensure they never allocate more than + /// `isize::MAX` bytes, so + /// `vec.as_ptr().add(vec.len()).sub(vec.len())` is always safe. + /// + /// Most platforms fundamentally can't even construct such an + /// allocation. For instance, no known 64-bit platform can ever + /// serve a request for 2<sup>63</sup> bytes due to page-table + /// limitations or splitting the address space. However, some 32-bit + /// and 16-bit platforms may successfully serve a request for more + /// than `isize::MAX` bytes with things like Physical Address + /// Extension. As such, memory acquired directly from allocators or + /// memory mapped files *may* be too large to handle with this + /// function. + /// + /// Consider using `wrapping_offset` instead if these constraints + /// are difficult to satisfy. The only advantage of this method is + /// that it enables more aggressive compiler optimizations. + #[inline] + #[allow(clippy::should_implement_trait)] + pub unsafe fn sub(self, count: $usize_ty) -> Self { + let x: $isize_ty = count.cast(); + // note: - is currently wrapping_neg + self.offset(-x) + } + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// (convenience for `.wrapping_offset(count as isize)`) + /// + /// `count` is in units of T; e.g. a `count` of 3 represents a + /// pointer offset of `3 * size_of::<T>()` bytes. + /// + /// # Safety + /// + /// The resulting pointer does not need to be in bounds, but it is + /// potentially hazardous to dereference (which requires `unsafe`). + /// + /// Always use `.add(count)` instead when possible, because `add` + /// allows the compiler to optimize better. + #[inline] + pub fn wrapping_add(self, count: $usize_ty) -> Self { + self.wrapping_offset(count.cast()) + } + + /// Calculates the offset from a pointer using wrapping arithmetic. + /// (convenience for `.wrapping_offset((count as + /// isize).wrapping_sub())`) + /// + /// `count` is in units of T; e.g. a `count` of 3 represents a + /// pointer offset of `3 * size_of::<T>()` bytes. + /// + /// # Safety + /// + /// The resulting pointer does not need to be in bounds, but it is + /// potentially hazardous to dereference (which requires `unsafe`). + /// + /// Always use `.sub(count)` instead when possible, because `sub` + /// allows the compiler to optimize better. + #[inline] + pub fn wrapping_sub(self, count: $usize_ty) -> Self { + let x: $isize_ty = count.cast(); + self.wrapping_offset(-1 * x) + } + } + + impl<T> $id<T> { + /// Shuffle vector elements according to `indices`. + #[inline] + pub fn shuffle1_dyn<I>(self, indices: I) -> Self + where + Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>, + { + codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _shuffle1_dyn>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn shuffle1_dyn() { + let (null, non_null) = ptr_vals!($id<i32>); + + // alternating = [non_null, null, non_null, null, ...] + let mut alternating = $id::<i32>::splat(null); + for i in 0..$id::<i32>::lanes() { + if i % 2 == 0 { + alternating = alternating.replace(i, non_null); + } + } + + type Indices = <$id<i32> + as codegen::shuffle1_dyn::Shuffle1Dyn>::Indices; + // even = [0, 0, 2, 2, 4, 4, ..] + let even = { + let mut v = Indices::splat(0); + for i in 0..$id::<i32>::lanes() { + if i % 2 == 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 - 1).into()); + } + } + v + }; + // odd = [1, 1, 3, 3, 5, 5, ...] + let odd = { + let mut v = Indices::splat(0); + for i in 0..$id::<i32>::lanes() { + if i % 2 != 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 + 1).into()); + } + } + v + }; + + assert_eq!( + alternating.shuffle1_dyn(even), + $id::<i32>::splat(non_null) + ); + if $id::<i32>::lanes() > 1 { + assert_eq!( + alternating.shuffle1_dyn(odd), + $id::<i32>::splat(null) + ); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops.rs b/third_party/rust/packed_simd/src/api/ops.rs new file mode 100644 index 0000000000..f71c98795d --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops.rs @@ -0,0 +1,32 @@ +//! Implementation of the `ops` traits +#[macro_use] +mod vector_mask_bitwise; +#[macro_use] +mod scalar_mask_bitwise; + +#[macro_use] +mod vector_arithmetic; +#[macro_use] +mod scalar_arithmetic; + +#[macro_use] +mod vector_bitwise; +#[macro_use] +mod scalar_bitwise; + +#[macro_use] +mod vector_shifts; +#[macro_use] +mod scalar_shifts; + +#[macro_use] +mod vector_rotates; + +#[macro_use] +mod vector_neg; + +#[macro_use] +mod vector_int_min_max; + +#[macro_use] +mod vector_float_min_max; diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs new file mode 100644 index 0000000000..da1a2037ea --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_arithmetic.rs @@ -0,0 +1,203 @@ +//! Vertical (lane-wise) vector-scalar / scalar-vector arithmetic operations. + +macro_rules! impl_ops_scalar_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Add<$elem_ty> for $id { + type Output = Self; + #[inline] + fn add(self, other: $elem_ty) -> Self { + self + $id::splat(other) + } + } + impl crate::ops::Add<$id> for $elem_ty { + type Output = $id; + #[inline] + fn add(self, other: $id) -> $id { + $id::splat(self) + other + } + } + + impl crate::ops::Sub<$elem_ty> for $id { + type Output = Self; + #[inline] + fn sub(self, other: $elem_ty) -> Self { + self - $id::splat(other) + } + } + impl crate::ops::Sub<$id> for $elem_ty { + type Output = $id; + #[inline] + fn sub(self, other: $id) -> $id { + $id::splat(self) - other + } + } + + impl crate::ops::Mul<$elem_ty> for $id { + type Output = Self; + #[inline] + fn mul(self, other: $elem_ty) -> Self { + self * $id::splat(other) + } + } + impl crate::ops::Mul<$id> for $elem_ty { + type Output = $id; + #[inline] + fn mul(self, other: $id) -> $id { + $id::splat(self) * other + } + } + + impl crate::ops::Div<$elem_ty> for $id { + type Output = Self; + #[inline] + fn div(self, other: $elem_ty) -> Self { + self / $id::splat(other) + } + } + impl crate::ops::Div<$id> for $elem_ty { + type Output = $id; + #[inline] + fn div(self, other: $id) -> $id { + $id::splat(self) / other + } + } + + impl crate::ops::Rem<$elem_ty> for $id { + type Output = Self; + #[inline] + fn rem(self, other: $elem_ty) -> Self { + self % $id::splat(other) + } + } + impl crate::ops::Rem<$id> for $elem_ty { + type Output = $id; + #[inline] + fn rem(self, other: $id) -> $id { + $id::splat(self) % other + } + } + + impl crate::ops::AddAssign<$elem_ty> for $id { + #[inline] + fn add_assign(&mut self, other: $elem_ty) { + *self = *self + other; + } + } + + impl crate::ops::SubAssign<$elem_ty> for $id { + #[inline] + fn sub_assign(&mut self, other: $elem_ty) { + *self = *self - other; + } + } + + impl crate::ops::MulAssign<$elem_ty> for $id { + #[inline] + fn mul_assign(&mut self, other: $elem_ty) { + *self = *self * other; + } + } + + impl crate::ops::DivAssign<$elem_ty> for $id { + #[inline] + fn div_assign(&mut self, other: $elem_ty) { + *self = *self / other; + } + } + + impl crate::ops::RemAssign<$elem_ty> for $id { + #[inline] + fn rem_assign(&mut self, other: $elem_ty) { + *self = *self % other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_arith>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_scalar_arithmetic() { + let zi = 0 as $elem_ty; + let oi = 1 as $elem_ty; + let ti = 2 as $elem_ty; + let fi = 4 as $elem_ty; + let z = $id::splat(zi); + let o = $id::splat(oi); + let t = $id::splat(ti); + let f = $id::splat(fi); + + // add + assert_eq!(zi + z, z); + assert_eq!(z + zi, z); + assert_eq!(oi + z, o); + assert_eq!(o + zi, o); + assert_eq!(ti + z, t); + assert_eq!(t + zi, t); + assert_eq!(ti + t, f); + assert_eq!(t + ti, f); + // sub + assert_eq!(zi - z, z); + assert_eq!(z - zi, z); + assert_eq!(oi - z, o); + assert_eq!(o - zi, o); + assert_eq!(ti - z, t); + assert_eq!(t - zi, t); + assert_eq!(fi - t, t); + assert_eq!(f - ti, t); + assert_eq!(f - o - o, t); + assert_eq!(f - oi - oi, t); + // mul + assert_eq!(zi * z, z); + assert_eq!(z * zi, z); + assert_eq!(zi * o, z); + assert_eq!(z * oi, z); + assert_eq!(zi * t, z); + assert_eq!(z * ti, z); + assert_eq!(oi * t, t); + assert_eq!(o * ti, t); + assert_eq!(ti * t, f); + assert_eq!(t * ti, f); + // div + assert_eq!(zi / o, z); + assert_eq!(z / oi, z); + assert_eq!(ti / o, t); + assert_eq!(t / oi, t); + assert_eq!(fi / o, f); + assert_eq!(f / oi, f); + assert_eq!(ti / t, o); + assert_eq!(t / ti, o); + assert_eq!(fi / t, t); + assert_eq!(f / ti, t); + // rem + assert_eq!(oi % o, z); + assert_eq!(o % oi, z); + assert_eq!(fi % t, z); + assert_eq!(f % ti, z); + + { + let mut v = z; + assert_eq!(v, z); + v += oi; // add_assign + assert_eq!(v, o); + v -= oi; // sub_assign + assert_eq!(v, z); + v = t; + v *= oi; // mul_assign + assert_eq!(v, t); + v *= ti; + assert_eq!(v, f); + v /= oi; // div_assign + assert_eq!(v, f); + v /= ti; + assert_eq!(v, t); + v %= ti; // rem_assign + assert_eq!(v, z); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs new file mode 100644 index 0000000000..88216769ae --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_bitwise.rs @@ -0,0 +1,162 @@ +//! Vertical (lane-wise) vector-scalar / scalar-vector bitwise operations. + +macro_rules! impl_ops_scalar_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::BitXor<$elem_ty> for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: $elem_ty) -> Self { + self ^ $id::splat(other) + } + } + impl crate::ops::BitXor<$id> for $elem_ty { + type Output = $id; + #[inline] + fn bitxor(self, other: $id) -> $id { + $id::splat(self) ^ other + } + } + + impl crate::ops::BitAnd<$elem_ty> for $id { + type Output = Self; + #[inline] + fn bitand(self, other: $elem_ty) -> Self { + self & $id::splat(other) + } + } + impl crate::ops::BitAnd<$id> for $elem_ty { + type Output = $id; + #[inline] + fn bitand(self, other: $id) -> $id { + $id::splat(self) & other + } + } + + impl crate::ops::BitOr<$elem_ty> for $id { + type Output = Self; + #[inline] + fn bitor(self, other: $elem_ty) -> Self { + self | $id::splat(other) + } + } + impl crate::ops::BitOr<$id> for $elem_ty { + type Output = $id; + #[inline] + fn bitor(self, other: $id) -> $id { + $id::splat(self) | other + } + } + + impl crate::ops::BitAndAssign<$elem_ty> for $id { + #[inline] + fn bitand_assign(&mut self, other: $elem_ty) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign<$elem_ty> for $id { + #[inline] + fn bitor_assign(&mut self, other: $elem_ty) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign<$elem_ty> for $id { + #[inline] + fn bitxor_assign(&mut self, other: $elem_ty) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_bitwise>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_scalar_bitwise() { + let zi = 0 as $elem_ty; + let oi = 1 as $elem_ty; + let ti = 2 as $elem_ty; + let z = $id::splat(zi); + let o = $id::splat(oi); + let t = $id::splat(ti); + + // BitAnd: + assert_eq!(oi & o, o); + assert_eq!(o & oi, o); + assert_eq!(oi & z, z); + assert_eq!(o & zi, z); + assert_eq!(zi & o, z); + assert_eq!(z & oi, z); + assert_eq!(zi & z, z); + assert_eq!(z & zi, z); + + assert_eq!(ti & t, t); + assert_eq!(t & ti, t); + assert_eq!(ti & o, z); + assert_eq!(t & oi, z); + assert_eq!(oi & t, z); + assert_eq!(o & ti, z); + + // BitOr: + assert_eq!(oi | o, o); + assert_eq!(o | oi, o); + assert_eq!(oi | z, o); + assert_eq!(o | zi, o); + assert_eq!(zi | o, o); + assert_eq!(z | oi, o); + assert_eq!(zi | z, z); + assert_eq!(z | zi, z); + + assert_eq!(ti | t, t); + assert_eq!(t | ti, t); + assert_eq!(zi | t, t); + assert_eq!(z | ti, t); + assert_eq!(ti | z, t); + assert_eq!(t | zi, t); + + // BitXOR: + assert_eq!(oi ^ o, z); + assert_eq!(o ^ oi, z); + assert_eq!(zi ^ z, z); + assert_eq!(z ^ zi, z); + assert_eq!(zi ^ o, o); + assert_eq!(z ^ oi, o); + assert_eq!(oi ^ z, o); + assert_eq!(o ^ zi, o); + + assert_eq!(ti ^ t, z); + assert_eq!(t ^ ti, z); + assert_eq!(ti ^ z, t); + assert_eq!(t ^ zi, t); + assert_eq!(zi ^ t, t); + assert_eq!(z ^ ti, t); + + { + // AndAssign: + let mut v = o; + v &= ti; + assert_eq!(v, z); + } + { + // OrAssign: + let mut v = z; + v |= oi; + assert_eq!(v, o); + } + { + // XORAssign: + let mut v = z; + v ^= oi; + assert_eq!(v, o); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs new file mode 100644 index 0000000000..523a85207b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_mask_bitwise.rs @@ -0,0 +1,140 @@ +//! Vertical (lane-wise) vector-vector bitwise operations. + +macro_rules! impl_ops_scalar_mask_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::BitXor<bool> for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: bool) -> Self { + self ^ $id::splat(other) + } + } + impl crate::ops::BitXor<$id> for bool { + type Output = $id; + #[inline] + fn bitxor(self, other: $id) -> $id { + $id::splat(self) ^ other + } + } + + impl crate::ops::BitAnd<bool> for $id { + type Output = Self; + #[inline] + fn bitand(self, other: bool) -> Self { + self & $id::splat(other) + } + } + impl crate::ops::BitAnd<$id> for bool { + type Output = $id; + #[inline] + fn bitand(self, other: $id) -> $id { + $id::splat(self) & other + } + } + + impl crate::ops::BitOr<bool> for $id { + type Output = Self; + #[inline] + fn bitor(self, other: bool) -> Self { + self | $id::splat(other) + } + } + impl crate::ops::BitOr<$id> for bool { + type Output = $id; + #[inline] + fn bitor(self, other: $id) -> $id { + $id::splat(self) | other + } + } + + impl crate::ops::BitAndAssign<bool> for $id { + #[inline] + fn bitand_assign(&mut self, other: bool) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign<bool> for $id { + #[inline] + fn bitor_assign(&mut self, other: bool) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign<bool> for $id { + #[inline] + fn bitxor_assign(&mut self, other: bool) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_mask_bitwise>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_scalar_mask_bitwise() { + let ti = true; + let fi = false; + let t = $id::splat(ti); + let f = $id::splat(fi); + assert!(t != f); + assert!(!(t == f)); + + // BitAnd: + assert_eq!(ti & f, f); + assert_eq!(t & fi, f); + assert_eq!(fi & t, f); + assert_eq!(f & ti, f); + assert_eq!(ti & t, t); + assert_eq!(t & ti, t); + assert_eq!(fi & f, f); + assert_eq!(f & fi, f); + + // BitOr: + assert_eq!(ti | f, t); + assert_eq!(t | fi, t); + assert_eq!(fi | t, t); + assert_eq!(f | ti, t); + assert_eq!(ti | t, t); + assert_eq!(t | ti, t); + assert_eq!(fi | f, f); + assert_eq!(f | fi, f); + + // BitXOR: + assert_eq!(ti ^ f, t); + assert_eq!(t ^ fi, t); + assert_eq!(fi ^ t, t); + assert_eq!(f ^ ti, t); + assert_eq!(ti ^ t, f); + assert_eq!(t ^ ti, f); + assert_eq!(fi ^ f, f); + assert_eq!(f ^ fi, f); + + { + // AndAssign: + let mut v = f; + v &= ti; + assert_eq!(v, f); + } + { + // OrAssign: + let mut v = f; + v |= ti; + assert_eq!(v, t); + } + { + // XORAssign: + let mut v = f; + v ^= ti; + assert_eq!(v, t); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs new file mode 100644 index 0000000000..9c164ad56c --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/scalar_shifts.rs @@ -0,0 +1,107 @@ +//! Vertical (lane-wise) vector-scalar shifts operations. + +macro_rules! impl_ops_scalar_shifts { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Shl<u32> for $id { + type Output = Self; + #[inline] + fn shl(self, other: u32) -> Self { + self << $id::splat(other as $elem_ty) + } + } + impl crate::ops::Shr<u32> for $id { + type Output = Self; + #[inline] + fn shr(self, other: u32) -> Self { + self >> $id::splat(other as $elem_ty) + } + } + + impl crate::ops::ShlAssign<u32> for $id { + #[inline] + fn shl_assign(&mut self, other: u32) { + *self = *self << other; + } + } + impl crate::ops::ShrAssign<u32> for $id { + #[inline] + fn shr_assign(&mut self, other: u32) { + *self = *self >> other; + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_scalar_shifts>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), + allow(unreachable_code, + unused_variables, + unused_mut) + )] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn ops_scalar_shifts() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + { + let zi = 0 as u32; + let oi = 1 as u32; + let ti = 2 as u32; + let maxi + = (mem::size_of::<$elem_ty>() * 8 - 1) as u32; + + // shr + assert_eq!(z >> zi, z); + assert_eq!(z >> oi, z); + assert_eq!(z >> ti, z); + assert_eq!(z >> ti, z); + + #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/13 + return; + } + + assert_eq!(o >> zi, o); + assert_eq!(t >> zi, t); + assert_eq!(f >> zi, f); + assert_eq!(f >> maxi, z); + + assert_eq!(o >> oi, z); + assert_eq!(t >> oi, o); + assert_eq!(t >> ti, z); + assert_eq!(f >> oi, t); + assert_eq!(f >> ti, o); + assert_eq!(f >> maxi, z); + + // shl + assert_eq!(z << zi, z); + assert_eq!(o << zi, o); + assert_eq!(t << zi, t); + assert_eq!(f << zi, f); + assert_eq!(f << maxi, z); + + assert_eq!(o << oi, t); + assert_eq!(o << ti, f); + assert_eq!(t << oi, f); + + { // shr_assign + let mut v = o; + v >>= oi; + assert_eq!(v, z); + } + { // shl_assign + let mut v = o; + v <<= oi; + assert_eq!(v, t); + } + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs new file mode 100644 index 0000000000..7057f52d03 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_arithmetic.rs @@ -0,0 +1,148 @@ +//! Vertical (lane-wise) vector-vector arithmetic operations. + +macro_rules! impl_ops_vector_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Add for $id { + type Output = Self; + #[inline] + fn add(self, other: Self) -> Self { + use crate::llvm::simd_add; + unsafe { Simd(simd_add(self.0, other.0)) } + } + } + + impl crate::ops::Sub for $id { + type Output = Self; + #[inline] + fn sub(self, other: Self) -> Self { + use crate::llvm::simd_sub; + unsafe { Simd(simd_sub(self.0, other.0)) } + } + } + + impl crate::ops::Mul for $id { + type Output = Self; + #[inline] + fn mul(self, other: Self) -> Self { + use crate::llvm::simd_mul; + unsafe { Simd(simd_mul(self.0, other.0)) } + } + } + + impl crate::ops::Div for $id { + type Output = Self; + #[inline] + fn div(self, other: Self) -> Self { + use crate::llvm::simd_div; + unsafe { Simd(simd_div(self.0, other.0)) } + } + } + + impl crate::ops::Rem for $id { + type Output = Self; + #[inline] + fn rem(self, other: Self) -> Self { + use crate::llvm::simd_rem; + unsafe { Simd(simd_rem(self.0, other.0)) } + } + } + + impl crate::ops::AddAssign for $id { + #[inline] + fn add_assign(&mut self, other: Self) { + *self = *self + other; + } + } + + impl crate::ops::SubAssign for $id { + #[inline] + fn sub_assign(&mut self, other: Self) { + *self = *self - other; + } + } + + impl crate::ops::MulAssign for $id { + #[inline] + fn mul_assign(&mut self, other: Self) { + *self = *self * other; + } + } + + impl crate::ops::DivAssign for $id { + #[inline] + fn div_assign(&mut self, other: Self) { + *self = *self / other; + } + } + + impl crate::ops::RemAssign for $id { + #[inline] + fn rem_assign(&mut self, other: Self) { + *self = *self % other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_arith>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_vector_arithmetic() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + // add + assert_eq!(z + z, z); + assert_eq!(o + z, o); + assert_eq!(t + z, t); + assert_eq!(t + t, f); + // sub + assert_eq!(z - z, z); + assert_eq!(o - z, o); + assert_eq!(t - z, t); + assert_eq!(f - t, t); + assert_eq!(f - o - o, t); + // mul + assert_eq!(z * z, z); + assert_eq!(z * o, z); + assert_eq!(z * t, z); + assert_eq!(o * t, t); + assert_eq!(t * t, f); + // div + assert_eq!(z / o, z); + assert_eq!(t / o, t); + assert_eq!(f / o, f); + assert_eq!(t / t, o); + assert_eq!(f / t, t); + // rem + assert_eq!(o % o, z); + assert_eq!(f % t, z); + + { + let mut v = z; + assert_eq!(v, z); + v += o; // add_assign + assert_eq!(v, o); + v -= o; // sub_assign + assert_eq!(v, z); + v = t; + v *= o; // mul_assign + assert_eq!(v, t); + v *= t; + assert_eq!(v, f); + v /= o; // div_assign + assert_eq!(v, f); + v /= t; + assert_eq!(v, t); + v %= t; // rem_assign + assert_eq!(v, z); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs new file mode 100644 index 0000000000..7be9603fa2 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_bitwise.rs @@ -0,0 +1,129 @@ +//! Vertical (lane-wise) vector-vector bitwise operations. + +macro_rules! impl_ops_vector_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::Not for $id { + type Output = Self; + #[inline] + fn not(self) -> Self { + Self::splat($true) ^ self + } + } + impl crate::ops::BitXor for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: Self) -> Self { + use crate::llvm::simd_xor; + unsafe { Simd(simd_xor(self.0, other.0)) } + } + } + impl crate::ops::BitAnd for $id { + type Output = Self; + #[inline] + fn bitand(self, other: Self) -> Self { + use crate::llvm::simd_and; + unsafe { Simd(simd_and(self.0, other.0)) } + } + } + impl crate::ops::BitOr for $id { + type Output = Self; + #[inline] + fn bitor(self, other: Self) -> Self { + use crate::llvm::simd_or; + unsafe { Simd(simd_or(self.0, other.0)) } + } + } + impl crate::ops::BitAndAssign for $id { + #[inline] + fn bitand_assign(&mut self, other: Self) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign for $id { + #[inline] + fn bitor_assign(&mut self, other: Self) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign for $id { + #[inline] + fn bitxor_assign(&mut self, other: Self) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_bitwise>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_vector_bitwise() { + + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let m = $id::splat(!z.extract(0)); + + // Not: + assert_eq!(!z, m); + assert_eq!(!m, z); + + // BitAnd: + assert_eq!(o & o, o); + assert_eq!(o & z, z); + assert_eq!(z & o, z); + assert_eq!(z & z, z); + + assert_eq!(t & t, t); + assert_eq!(t & o, z); + assert_eq!(o & t, z); + + // BitOr: + assert_eq!(o | o, o); + assert_eq!(o | z, o); + assert_eq!(z | o, o); + assert_eq!(z | z, z); + + assert_eq!(t | t, t); + assert_eq!(z | t, t); + assert_eq!(t | z, t); + + // BitXOR: + assert_eq!(o ^ o, z); + assert_eq!(z ^ z, z); + assert_eq!(z ^ o, o); + assert_eq!(o ^ z, o); + + assert_eq!(t ^ t, z); + assert_eq!(t ^ z, t); + assert_eq!(z ^ t, t); + + { + // AndAssign: + let mut v = o; + v &= t; + assert_eq!(v, z); + } + { + // OrAssign: + let mut v = z; + v |= o; + assert_eq!(v, o); + } + { + // XORAssign: + let mut v = z; + v ^= o; + assert_eq!(v, o); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs new file mode 100644 index 0000000000..8310667b7a --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_float_min_max.rs @@ -0,0 +1,74 @@ +//! Vertical (lane-wise) vector `min` and `max` for floating-point vectors. + +macro_rules! impl_ops_vector_float_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Minimum of two vectors. + /// + /// Returns a new vector containing the minimum value of each of + /// the input vector lanes. + #[inline] + pub fn min(self, x: Self) -> Self { + use crate::llvm::simd_fmin; + unsafe { Simd(simd_fmin(self.0, x.0)) } + } + + /// Maximum of two vectors. + /// + /// Returns a new vector containing the maximum value of each of + /// the input vector lanes. + #[inline] + pub fn max(self, x: Self) -> Self { + use crate::llvm::simd_fmax; + unsafe { Simd(simd_fmax(self.0, x.0)) } + } + } + test_if!{ + $test_tt: + paste::item! { + #[cfg(not(any( + // FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/223 + all(target_arch = "mips", target_endian = "big"), + target_arch = "mips64", + )))] + pub mod [<$id _ops_vector_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_max() { + let n = crate::$elem_ty::NAN; + let o = $id::splat(1. as $elem_ty); + let t = $id::splat(2. as $elem_ty); + + let mut m = o; // [1., 2., 1., 2., ...] + let mut on = o; + for i in 0..$id::lanes() { + if i % 2 == 0 { + m = m.replace(i, 2. as $elem_ty); + on = on.replace(i, n); + } + } + + assert_eq!(o.min(t), o); + assert_eq!(t.min(o), o); + assert_eq!(m.min(o), o); + assert_eq!(o.min(m), o); + assert_eq!(m.min(t), m); + assert_eq!(t.min(m), m); + + assert_eq!(o.max(t), t); + assert_eq!(t.max(o), t); + assert_eq!(m.max(o), m); + assert_eq!(o.max(m), m); + assert_eq!(m.max(t), t); + assert_eq!(t.max(m), t); + + assert_eq!(on.min(o), o); + assert_eq!(o.min(on), o); + assert_eq!(on.max(o), o); + assert_eq!(o.max(on), o); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs new file mode 100644 index 0000000000..36ea98e6bf --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_int_min_max.rs @@ -0,0 +1,57 @@ +//! Vertical (lane-wise) vector `min` and `max` for integer vectors. + +macro_rules! impl_ops_vector_int_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Minimum of two vectors. + /// + /// Returns a new vector containing the minimum value of each of + /// the input vector lanes. + #[inline] + pub fn min(self, x: Self) -> Self { + self.lt(x).select(self, x) + } + + /// Maximum of two vectors. + /// + /// Returns a new vector containing the maximum value of each of + /// the input vector lanes. + #[inline] + pub fn max(self, x: Self) -> Self { + self.gt(x).select(self, x) + } + } + test_if!{$test_tt: + paste::item! { + pub mod [<$id _ops_vector_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_max() { + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + + let mut m = o; + for i in 0..$id::lanes() { + if i % 2 == 0 { + m = m.replace(i, 2 as $elem_ty); + } + } + assert_eq!(o.min(t), o); + assert_eq!(t.min(o), o); + assert_eq!(m.min(o), o); + assert_eq!(o.min(m), o); + assert_eq!(m.min(t), m); + assert_eq!(t.min(m), m); + + assert_eq!(o.max(t), t); + assert_eq!(t.max(o), t); + assert_eq!(m.max(o), m); + assert_eq!(o.max(m), m); + assert_eq!(m.max(t), t); + assert_eq!(t.max(m), t); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs new file mode 100644 index 0000000000..295fc1ca81 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_mask_bitwise.rs @@ -0,0 +1,116 @@ +//! Vertical (lane-wise) vector-vector bitwise operations. + +macro_rules! impl_ops_vector_mask_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $test_tt:tt | + ($true:expr, $false:expr) + ) => { + impl crate::ops::Not for $id { + type Output = Self; + #[inline] + fn not(self) -> Self { + Self::splat($true) ^ self + } + } + impl crate::ops::BitXor for $id { + type Output = Self; + #[inline] + fn bitxor(self, other: Self) -> Self { + use crate::llvm::simd_xor; + unsafe { Simd(simd_xor(self.0, other.0)) } + } + } + impl crate::ops::BitAnd for $id { + type Output = Self; + #[inline] + fn bitand(self, other: Self) -> Self { + use crate::llvm::simd_and; + unsafe { Simd(simd_and(self.0, other.0)) } + } + } + impl crate::ops::BitOr for $id { + type Output = Self; + #[inline] + fn bitor(self, other: Self) -> Self { + use crate::llvm::simd_or; + unsafe { Simd(simd_or(self.0, other.0)) } + } + } + impl crate::ops::BitAndAssign for $id { + #[inline] + fn bitand_assign(&mut self, other: Self) { + *self = *self & other; + } + } + impl crate::ops::BitOrAssign for $id { + #[inline] + fn bitor_assign(&mut self, other: Self) { + *self = *self | other; + } + } + impl crate::ops::BitXorAssign for $id { + #[inline] + fn bitxor_assign(&mut self, other: Self) { + *self = *self ^ other; + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_mask_bitwise>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn ops_vector_mask_bitwise() { + let t = $id::splat(true); + let f = $id::splat(false); + assert!(t != f); + assert!(!(t == f)); + + // Not: + assert_eq!(!t, f); + assert_eq!(t, !f); + + // BitAnd: + assert_eq!(t & f, f); + assert_eq!(f & t, f); + assert_eq!(t & t, t); + assert_eq!(f & f, f); + + // BitOr: + assert_eq!(t | f, t); + assert_eq!(f | t, t); + assert_eq!(t | t, t); + assert_eq!(f | f, f); + + // BitXOR: + assert_eq!(t ^ f, t); + assert_eq!(f ^ t, t); + assert_eq!(t ^ t, f); + assert_eq!(f ^ f, f); + + { + // AndAssign: + let mut v = f; + v &= t; + assert_eq!(v, f); + } + { + // OrAssign: + let mut v = f; + v |= t; + assert_eq!(v, t); + } + { + // XORAssign: + let mut v = f; + v ^= t; + assert_eq!(v, t); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_neg.rs b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs new file mode 100644 index 0000000000..e2d91fd2fe --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_neg.rs @@ -0,0 +1,43 @@ +//! Vertical (lane-wise) vector `Neg`. + +macro_rules! impl_ops_vector_neg { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Neg for $id { + type Output = Self; + #[inline] + fn neg(self) -> Self { + Self::splat(-1 as $elem_ty) * self + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_neg>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn neg() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + let nz = $id::splat(-(0 as $elem_ty)); + let no = $id::splat(-(1 as $elem_ty)); + let nt = $id::splat(-(2 as $elem_ty)); + let nf = $id::splat(-(4 as $elem_ty)); + + assert_eq!(-z, nz); + assert_eq!(-o, no); + assert_eq!(-t, nt); + assert_eq!(-f, nf); + + assert_eq!(z, -nz); + assert_eq!(o, -no); + assert_eq!(t, -nt); + assert_eq!(f, -nf); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs new file mode 100644 index 0000000000..6c794ecf4b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_rotates.rs @@ -0,0 +1,90 @@ +//! Vertical (lane-wise) vector rotates operations. +#![allow(unused)] + +macro_rules! impl_ops_vector_rotates { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Shifts the bits of each lane to the left by the specified + /// amount in the corresponding lane of `n`, wrapping the + /// truncated bits to the end of the resulting integer. + /// + /// Note: this is neither the same operation as `<<` nor equivalent + /// to `slice::rotate_left`. + #[inline] + pub fn rotate_left(self, n: $id) -> $id { + const LANE_WIDTH: $elem_ty = + crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; + // Protect against undefined behavior for over-long bit shifts + let n = n % LANE_WIDTH; + (self << n) | (self >> ((LANE_WIDTH - n) % LANE_WIDTH)) + } + + /// Shifts the bits of each lane to the right by the specified + /// amount in the corresponding lane of `n`, wrapping the + /// truncated bits to the beginning of the resulting integer. + /// + /// Note: this is neither the same operation as `<<` nor equivalent + /// to `slice::rotate_left`. + #[inline] + pub fn rotate_right(self, n: $id) -> $id { + const LANE_WIDTH: $elem_ty = + crate::mem::size_of::<$elem_ty>() as $elem_ty * 8; + // Protect against undefined behavior for over-long bit shifts + let n = n % LANE_WIDTH; + (self >> n) | (self << ((LANE_WIDTH - n) % LANE_WIDTH)) + } + } + + test_if!{ + $test_tt: + paste::item! { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/75 + #[cfg(not(any( + target_arch = "s390x", + target_arch = "sparc64", + )))] + pub mod [<$id _ops_vector_rotate>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn rotate_ops() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + let max = $id::splat( + (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty); + + // rotate_right + assert_eq!(z.rotate_right(z), z); + assert_eq!(z.rotate_right(o), z); + assert_eq!(z.rotate_right(t), z); + + assert_eq!(o.rotate_right(z), o); + assert_eq!(t.rotate_right(z), t); + assert_eq!(f.rotate_right(z), f); + assert_eq!(f.rotate_right(max), f << 1); + + assert_eq!(o.rotate_right(o), o << max); + assert_eq!(t.rotate_right(o), o); + assert_eq!(t.rotate_right(t), o << max); + assert_eq!(f.rotate_right(o), t); + assert_eq!(f.rotate_right(t), o); + + // rotate_left + assert_eq!(z.rotate_left(z), z); + assert_eq!(o.rotate_left(z), o); + assert_eq!(t.rotate_left(z), t); + assert_eq!(f.rotate_left(z), f); + assert_eq!(f.rotate_left(max), t); + + assert_eq!(o.rotate_left(o), t); + assert_eq!(o.rotate_left(t), f); + assert_eq!(t.rotate_left(o), f); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs new file mode 100644 index 0000000000..22e1fbc0ec --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ops/vector_shifts.rs @@ -0,0 +1,107 @@ +//! Vertical (lane-wise) vector-vector shifts operations. + +macro_rules! impl_ops_vector_shifts { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl crate::ops::Shl<$id> for $id { + type Output = Self; + #[inline] + fn shl(self, other: Self) -> Self { + use crate::llvm::simd_shl; + unsafe { Simd(simd_shl(self.0, other.0)) } + } + } + impl crate::ops::Shr<$id> for $id { + type Output = Self; + #[inline] + fn shr(self, other: Self) -> Self { + use crate::llvm::simd_shr; + unsafe { Simd(simd_shr(self.0, other.0)) } + } + } + impl crate::ops::ShlAssign<$id> for $id { + #[inline] + fn shl_assign(&mut self, other: Self) { + *self = *self << other; + } + } + impl crate::ops::ShrAssign<$id> for $id { + #[inline] + fn shr_assign(&mut self, other: Self) { + *self = *self >> other; + } + } + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _ops_vector_shifts>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg_attr(any(target_arch = "s390x", target_arch = "sparc64"), + allow(unreachable_code, + unused_variables, + unused_mut) + )] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn ops_vector_shifts() { + let z = $id::splat(0 as $elem_ty); + let o = $id::splat(1 as $elem_ty); + let t = $id::splat(2 as $elem_ty); + let f = $id::splat(4 as $elem_ty); + + let max =$id::splat( + (mem::size_of::<$elem_ty>() * 8 - 1) as $elem_ty + ); + + // shr + assert_eq!(z >> z, z); + assert_eq!(z >> o, z); + assert_eq!(z >> t, z); + assert_eq!(z >> t, z); + + #[cfg(any(target_arch = "s390x", target_arch = "sparc64"))] { + // FIXME: rust produces bad codegen for shifts: + // https://github.com/rust-lang-nursery/packed_simd/issues/13 + return; + } + + assert_eq!(o >> z, o); + assert_eq!(t >> z, t); + assert_eq!(f >> z, f); + assert_eq!(f >> max, z); + + assert_eq!(o >> o, z); + assert_eq!(t >> o, o); + assert_eq!(t >> t, z); + assert_eq!(f >> o, t); + assert_eq!(f >> t, o); + assert_eq!(f >> max, z); + + // shl + assert_eq!(z << z, z); + assert_eq!(o << z, o); + assert_eq!(t << z, t); + assert_eq!(f << z, f); + assert_eq!(f << max, z); + + assert_eq!(o << o, t); + assert_eq!(o << t, f); + assert_eq!(t << o, f); + + { + // shr_assign + let mut v = o; + v >>= o; + assert_eq!(v, z); + } + { + // shl_assign + let mut v = o; + v <<= o; + assert_eq!(v, t); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/ptr.rs b/third_party/rust/packed_simd/src/api/ptr.rs new file mode 100644 index 0000000000..d2e523a49f --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ptr.rs @@ -0,0 +1,4 @@ +//! Vector of pointers + +#[macro_use] +mod gather_scatter; diff --git a/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs new file mode 100644 index 0000000000..4304356209 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/ptr/gather_scatter.rs @@ -0,0 +1,217 @@ +//! Implements masked gather and scatters for vectors of pointers + +macro_rules! impl_ptr_read { + ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident + | $test_tt:tt) => { + impl<T> $id<T> + where + [T; $elem_count]: sealed::SimdArray, + { + /// Reads selected vector elements from memory. + /// + /// Instantiates a new vector by reading the values from `self` for + /// those lanes whose `mask` is `true`, and using the elements of + /// `value` otherwise. + /// + /// No memory is accessed for those lanes of `self` whose `mask` is + /// `false`. + /// + /// # Safety + /// + /// This method is unsafe because it dereferences raw pointers. The + /// pointers must be aligned to `mem::align_of::<T>()`. + #[inline] + pub unsafe fn read<M>( + self, mask: Simd<[M; $elem_count]>, + value: Simd<[T; $elem_count]>, + ) -> Simd<[T; $elem_count]> + where + M: sealed::Mask, + [M; $elem_count]: sealed::SimdArray, + { + use crate::llvm::simd_gather; + Simd(simd_gather(value.0, self.0, mask.0)) + } + } + + test_if! { + $test_tt: + paste::item! { + mod [<$id _read>] { + use super::*; + #[test] + fn read() { + let mut v = [0_i32; $elem_count]; + for i in 0..$elem_count { + v[i] = i as i32; + } + + let mut ptr = $id::<i32>::null(); + + for i in 0..$elem_count { + ptr = ptr.replace(i, + &v[i] as *const i32 as *mut i32 + ); + } + + // all mask elements are true: + let mask = $mask_ty::splat(true); + let def = Simd::<[i32; $elem_count]>::splat(42_i32); + let r: Simd<[i32; $elem_count]> = unsafe { + ptr.read(mask, def) + }; + assert_eq!( + r, + Simd::<[i32; $elem_count]>::from_slice_unaligned( + &v + ) + ); + + let mut mask = mask; + for i in 0..$elem_count { + if i % 2 != 0 { + mask = mask.replace(i, false); + } + } + + // even mask elements are true, odd ones are false: + let r: Simd<[i32; $elem_count]> = unsafe { + ptr.read(mask, def) + }; + let mut e = v; + for i in 0..$elem_count { + if i % 2 != 0 { + e[i] = 42; + } + } + assert_eq!( + r, + Simd::<[i32; $elem_count]>::from_slice_unaligned( + &e + ) + ); + + // all mask elements are false: + let mask = $mask_ty::splat(false); + let def = Simd::<[i32; $elem_count]>::splat(42_i32); + let r: Simd<[i32; $elem_count]> = unsafe { + ptr.read(mask, def) } + ; + assert_eq!(r, def); + } + } + } + } + }; +} + +macro_rules! impl_ptr_write { + ([$elem_ty:ty; $elem_count:expr]: $id:ident, $mask_ty:ident + | $test_tt:tt) => { + impl<T> $id<T> + where + [T; $elem_count]: sealed::SimdArray, + { + /// Writes selected vector elements to memory. + /// + /// Writes the lanes of `values` for which the mask is `true` to + /// their corresponding memory addresses in `self`. + /// + /// No memory is accessed for those lanes of `self` whose `mask` is + /// `false`. + /// + /// Overlapping memory addresses of `self` are written to in order + /// from the lest-significant to the most-significant element. + /// + /// # Safety + /// + /// This method is unsafe because it dereferences raw pointers. The + /// pointers must be aligned to `mem::align_of::<T>()`. + #[inline] + pub unsafe fn write<M>( + self, mask: Simd<[M; $elem_count]>, + value: Simd<[T; $elem_count]>, + ) where + M: sealed::Mask, + [M; $elem_count]: sealed::SimdArray, + { + use crate::llvm::simd_scatter; + simd_scatter(value.0, self.0, mask.0) + } + } + + test_if! { + $test_tt: + paste::item! { + mod [<$id _write>] { + use super::*; + #[test] + fn write() { + // fourty_two = [42, 42, 42, ...] + let fourty_two + = Simd::<[i32; $elem_count]>::splat(42_i32); + + // This test will write to this array + let mut arr = [0_i32; $elem_count]; + for i in 0..$elem_count { + arr[i] = i as i32; + } + // arr = [0, 1, 2, ...] + + let mut ptr = $id::<i32>::null(); + for i in 0..$elem_count { + ptr = ptr.replace(i, unsafe { + arr.as_ptr().add(i) as *mut i32 + }); + } + // ptr = [&arr[0], &arr[1], ...] + + // write `fourty_two` to all elements of `v` + { + let backup = arr; + unsafe { + ptr.write($mask_ty::splat(true), fourty_two) + }; + assert_eq!(arr, [42_i32; $elem_count]); + arr = backup; // arr = [0, 1, 2, ...] + } + + // write 42 to even elements of arr: + { + // set odd elements of the mask to false + let mut mask = $mask_ty::splat(true); + for i in 0..$elem_count { + if i % 2 != 0 { + mask = mask.replace(i, false); + } + } + // mask = [true, false, true, false, ...] + + // expected result r = [42, 1, 42, 3, 42, 5, ...] + let mut r = arr; + for i in 0..$elem_count { + if i % 2 == 0 { + r[i] = 42; + } + } + + let backup = arr; + unsafe { ptr.write(mask, fourty_two) }; + assert_eq!(arr, r); + arr = backup; // arr = [0, 1, 2, 3, ...] + } + + // write 42 to no elements of arr + { + let backup = arr; + unsafe { + ptr.write($mask_ty::splat(false), fourty_two) + }; + assert_eq!(arr, backup); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions.rs b/third_party/rust/packed_simd/src/api/reductions.rs new file mode 100644 index 0000000000..54d2f0cc7f --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions.rs @@ -0,0 +1,12 @@ +//! Reductions + +#[macro_use] +mod float_arithmetic; +#[macro_use] +mod integer_arithmetic; +#[macro_use] +mod bitwise; +#[macro_use] +mod mask; +#[macro_use] +mod min_max; diff --git a/third_party/rust/packed_simd/src/api/reductions/bitwise.rs b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs new file mode 100644 index 0000000000..5bad4f474b --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/bitwise.rs @@ -0,0 +1,151 @@ +//! Implements portable horizontal bitwise vector reductions. +#![allow(unused)] + +macro_rules! impl_reduction_bitwise { + ( + [$elem_ty:ident; $elem_count:expr]: + $id:ident | $ielem_ty:ident | $test_tt:tt | + ($convert:expr) | + ($true:expr, $false:expr) + ) => { + impl $id { + /// Lane-wise bitwise `and` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn and(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_and; + let r: $ielem_ty = unsafe { simd_reduce_and(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x &= self.extract(i) as $elem_ty; + } + x + } + } + + /// Lane-wise bitwise `or` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn or(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_or; + let r: $ielem_ty = unsafe { simd_reduce_or(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x |= self.extract(i) as $elem_ty; + } + x + } + } + + /// Lane-wise bitwise `xor` of the vector elements. + /// + /// Note: if the vector has one lane, the first element of the + /// vector is returned. + #[inline] + pub fn xor(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_xor; + let r: $ielem_ty = unsafe { simd_reduce_xor(self.0) }; + $convert(r) + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on aarch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x ^= self.extract(i) as $elem_ty; + } + x + } + } + } + + test_if!{ + $test_tt: + paste::item! { + pub mod [<$id _reduction_bitwise>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn and() { + let v = $id::splat($false); + assert_eq!(v.and(), $false); + let v = $id::splat($true); + assert_eq!(v.and(), $true); + let v = $id::splat($false); + let v = v.replace(0, $true); + if $id::lanes() > 1 { + assert_eq!(v.and(), $false); + } else { + assert_eq!(v.and(), $true); + } + let v = $id::splat($true); + let v = v.replace(0, $false); + assert_eq!(v.and(), $false); + + } + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn or() { + let v = $id::splat($false); + assert_eq!(v.or(), $false); + let v = $id::splat($true); + assert_eq!(v.or(), $true); + let v = $id::splat($false); + let v = v.replace(0, $true); + assert_eq!(v.or(), $true); + let v = $id::splat($true); + let v = v.replace(0, $false); + if $id::lanes() > 1 { + assert_eq!(v.or(), $true); + } else { + assert_eq!(v.or(), $false); + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn xor() { + let v = $id::splat($false); + assert_eq!(v.xor(), $false); + let v = $id::splat($true); + if $id::lanes() > 1 { + assert_eq!(v.xor(), $false); + } else { + assert_eq!(v.xor(), $true); + } + let v = $id::splat($false); + let v = v.replace(0, $true); + assert_eq!(v.xor(), $true); + let v = $id::splat($true); + let v = v.replace(0, $false); + if $id::lanes() > 1 { + assert_eq!(v.xor(), $true); + } else { + assert_eq!(v.xor(), $false); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs new file mode 100644 index 0000000000..4a47452e50 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/float_arithmetic.rs @@ -0,0 +1,317 @@ +//! Implements portable horizontal float vector arithmetic reductions. + +macro_rules! impl_reduction_float_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Horizontal sum of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) + /// + /// If one of the vector element is `NaN` the reduction returns + /// `NaN`. The resulting `NaN` is not required to be equal to any + /// of the `NaN`s in the vector. + #[inline] + pub fn sum(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_add_ordered; + unsafe { simd_reduce_add_ordered(self.0, 0 as $elem_ty) } + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x += self.extract(i) as $elem_ty; + } + x + } + } + + /// Horizontal product of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) + /// + /// If one of the vector element is `NaN` the reduction returns + /// `NaN`. The resulting `NaN` is not required to be equal to any + /// of the `NaN`s in the vector. + #[inline] + pub fn product(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_mul_ordered; + unsafe { simd_reduce_mul_ordered(self.0, 1 as $elem_ty) } + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x *= self.extract(i) as $elem_ty; + } + x + } + } + } + + impl crate::iter::Sum for $id { + #[inline] + fn sum<I: Iterator<Item = $id>>(iter: I) -> $id { + iter.fold($id::splat(0.), crate::ops::Add::add) + } + } + + impl crate::iter::Product for $id { + #[inline] + fn product<I: Iterator<Item = $id>>(iter: I) -> $id { + iter.fold($id::splat(1.), crate::ops::Mul::mul) + } + } + + impl<'a> crate::iter::Sum<&'a $id> for $id { + #[inline] + fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id { + iter.fold($id::splat(0.), |a, b| crate::ops::Add::add(a, *b)) + } + } + + impl<'a> crate::iter::Product<&'a $id> for $id { + #[inline] + fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id { + iter.fold($id::splat(1.), |a, b| crate::ops::Mul::mul(a, *b)) + } + } + + test_if! { + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _reduction_float_arith>] { + use super::*; + fn alternating(x: usize) -> $id { + let mut v = $id::splat(1 as $elem_ty); + for i in 0..$id::lanes() { + if i % x == 0 { + v = v.replace(i, 2 as $elem_ty); + } + } + v + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn sum() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.sum(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.sum(), $id::lanes() as $elem_ty); + let v = alternating(2); + assert_eq!( + v.sum(), + ($id::lanes() / 2 + $id::lanes()) as $elem_ty + ); + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn product() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.product(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.product(), 1 as $elem_ty); + let f = match $id::lanes() { + 64 => 16, + 32 => 8, + 16 => 4, + _ => 2, + }; + let v = alternating(f); + assert_eq!( + v.product(), + (2_usize.pow(($id::lanes() / f) as u32) + as $elem_ty) + ); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unreachable_code)] + #[allow(unused_mut)] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn sum_nan() { + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 + // https://github.com/rust-lang-nursery/packed_simd/issues/6 + return; + + let n0 = crate::$elem_ty::NAN; + let v0 = $id::splat(-3.0); + for i in 0..$id::lanes() { + let mut v = v0.replace(i, n0); + // If the vector contains a NaN the result is NaN: + assert!( + v.sum().is_nan(), + "nan at {} => {} | {:?}", + i, + v.sum(), + v + ); + for j in 0..i { + v = v.replace(j, n0); + assert!(v.sum().is_nan()); + } + } + let v = $id::splat(n0); + assert!(v.sum().is_nan(), "all nans | {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unreachable_code)] + #[allow(unused_mut)] + // ^^^ FIXME: https://github.com/rust-lang/rust/issues/55344 + fn product_nan() { + // FIXME: https://bugs.llvm.org/show_bug.cgi?id=36732 + // https://github.com/rust-lang-nursery/packed_simd/issues/6 + return; + + let n0 = crate::$elem_ty::NAN; + let v0 = $id::splat(-3.0); + for i in 0..$id::lanes() { + let mut v = v0.replace(i, n0); + // If the vector contains a NaN the result is NaN: + assert!( + v.product().is_nan(), + "nan at {} => {} | {:?}", + i, + v.product(), + v + ); + for j in 0..i { + v = v.replace(j, n0); + assert!(v.product().is_nan()); + } + } + let v = $id::splat(n0); + assert!(v.product().is_nan(), "all nans | {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unused, dead_code)] + fn sum_roundoff() { + // Performs a tree-reduction + fn tree_reduce_sum(a: &[$elem_ty]) -> $elem_ty { + assert!(!a.is_empty()); + if a.len() == 1 { + a[0] + } else if a.len() == 2 { + a[0] + a[1] + } else { + let mid = a.len() / 2; + let (left, right) = a.split_at(mid); + tree_reduce_sum(left) + tree_reduce_sum(right) + } + } + + let mut start = crate::$elem_ty::EPSILON; + let mut scalar_reduction = 0. as $elem_ty; + + let mut v = $id::splat(0. as $elem_ty); + for i in 0..$id::lanes() { + let c = if i % 2 == 0 { 1e3 } else { -1. }; + start *= ::core::$elem_ty::consts::PI * c; + scalar_reduction += start; + v = v.replace(i, start); + } + let simd_reduction = v.sum(); + + let mut a = [0. as $elem_ty; $id::lanes()]; + v.write_to_slice_unaligned(&mut a); + let tree_reduction = tree_reduce_sum(&a); + + // tolerate 1 ULP difference: + let red_bits = simd_reduction.to_bits(); + let tree_bits = tree_reduction.to_bits(); + assert!( + if red_bits > tree_bits { + red_bits - tree_bits + } else { + tree_bits - red_bits + } < 2, + "vector: {:?} | simd_reduction: {:?} | \ + tree_reduction: {} | scalar_reduction: {}", + v, + simd_reduction, + tree_reduction, + scalar_reduction + ); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[allow(unused, dead_code)] + fn product_roundoff() { + use ::core::convert::TryInto; + // Performs a tree-reduction + fn tree_reduce_product(a: &[$elem_ty]) -> $elem_ty { + assert!(!a.is_empty()); + if a.len() == 1 { + a[0] + } else if a.len() == 2 { + a[0] * a[1] + } else { + let mid = a.len() / 2; + let (left, right) = a.split_at(mid); + tree_reduce_product(left) + * tree_reduce_product(right) + } + } + + let mut start = crate::$elem_ty::EPSILON; + let mut scalar_reduction = 1. as $elem_ty; + + let mut v = $id::splat(0. as $elem_ty); + for i in 0..$id::lanes() { + let c = if i % 2 == 0 { 1e3 } else { -1. }; + start *= ::core::$elem_ty::consts::PI * c; + scalar_reduction *= start; + v = v.replace(i, start); + } + let simd_reduction = v.product(); + + let mut a = [0. as $elem_ty; $id::lanes()]; + v.write_to_slice_unaligned(&mut a); + let tree_reduction = tree_reduce_product(&a); + + // FIXME: Too imprecise, even only for product(f32x8). + // Figure out how to narrow this down. + let ulp_limit = $id::lanes() / 2; + let red_bits = simd_reduction.to_bits(); + let tree_bits = tree_reduction.to_bits(); + assert!( + if red_bits > tree_bits { + red_bits - tree_bits + } else { + tree_bits - red_bits + } < ulp_limit.try_into().unwrap(), + "vector: {:?} | simd_reduction: {:?} | \ + tree_reduction: {} | scalar_reduction: {}", + v, + simd_reduction, + tree_reduction, + scalar_reduction + ); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs new file mode 100644 index 0000000000..91dffad310 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/integer_arithmetic.rs @@ -0,0 +1,197 @@ +//! Implements portable horizontal integer vector arithmetic reductions. + +macro_rules! impl_reduction_integer_arithmetic { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $ielem_ty:ident + | $test_tt:tt) => { + impl $id { + /// Horizontal wrapping sum of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 + x1) + (x2 + x3)) + ((x4 + x5) + (x6 + x7)) + /// + /// If an operation overflows it returns the mathematical result + /// modulo `2^n` where `n` is the number of times it overflows. + #[inline] + pub fn wrapping_sum(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_add_ordered; + let v: $ielem_ty = unsafe { + simd_reduce_add_ordered(self.0, 0 as $ielem_ty) + }; + v as $elem_ty + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x = x.wrapping_add(self.extract(i) as $elem_ty); + } + x + } + } + + /// Horizontal wrapping product of the vector elements. + /// + /// The intrinsic performs a tree-reduction of the vector elements. + /// That is, for an 8 element vector: + /// + /// > ((x0 * x1) * (x2 * x3)) * ((x4 * x5) * (x6 * x7)) + /// + /// If an operation overflows it returns the mathematical result + /// modulo `2^n` where `n` is the number of times it overflows. + #[inline] + pub fn wrapping_product(self) -> $elem_ty { + #[cfg(not(target_arch = "aarch64"))] + { + use crate::llvm::simd_reduce_mul_ordered; + let v: $ielem_ty = unsafe { + simd_reduce_mul_ordered(self.0, 1 as $ielem_ty) + }; + v as $elem_ty + } + #[cfg(target_arch = "aarch64")] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + let mut x = self.extract(0) as $elem_ty; + for i in 1..$id::lanes() { + x = x.wrapping_mul(self.extract(i) as $elem_ty); + } + x + } + } + } + + impl crate::iter::Sum for $id { + #[inline] + fn sum<I: Iterator<Item = $id>>(iter: I) -> $id { + iter.fold($id::splat(0), crate::ops::Add::add) + } + } + + impl crate::iter::Product for $id { + #[inline] + fn product<I: Iterator<Item = $id>>(iter: I) -> $id { + iter.fold($id::splat(1), crate::ops::Mul::mul) + } + } + + impl<'a> crate::iter::Sum<&'a $id> for $id { + #[inline] + fn sum<I: Iterator<Item = &'a $id>>(iter: I) -> $id { + iter.fold($id::splat(0), |a, b| crate::ops::Add::add(a, *b)) + } + } + + impl<'a> crate::iter::Product<&'a $id> for $id { + #[inline] + fn product<I: Iterator<Item = &'a $id>>(iter: I) -> $id { + iter.fold($id::splat(1), |a, b| crate::ops::Mul::mul(a, *b)) + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction_int_arith>] { + use super::*; + + fn alternating(x: usize) -> $id { + let mut v = $id::splat(1 as $elem_ty); + for i in 0..$id::lanes() { + if i % x == 0 { + v = v.replace(i, 2 as $elem_ty); + } + } + v + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_sum() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.wrapping_sum(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.wrapping_sum(), $id::lanes() as $elem_ty); + let v = alternating(2); + if $id::lanes() > 1 { + assert_eq!( + v.wrapping_sum(), + ($id::lanes() / 2 + $id::lanes()) as $elem_ty + ); + } else { + assert_eq!( + v.wrapping_sum(), + 2 as $elem_ty + ); + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_sum_overflow() { + let start = $elem_ty::max_value() + - ($id::lanes() as $elem_ty / 2); + + let v = $id::splat(start as $elem_ty); + let vwrapping_sum = v.wrapping_sum(); + + let mut wrapping_sum = start; + for _ in 1..$id::lanes() { + wrapping_sum = wrapping_sum.wrapping_add(start); + } + assert_eq!(wrapping_sum, vwrapping_sum, "v = {:?}", v); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_product() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.wrapping_product(), 0 as $elem_ty); + let v = $id::splat(1 as $elem_ty); + assert_eq!(v.wrapping_product(), 1 as $elem_ty); + let f = match $id::lanes() { + 64 => 16, + 32 => 8, + 16 => 4, + _ => 2, + }; + let v = alternating(f); + if $id::lanes() > 1 { + assert_eq!( + v.wrapping_product(), + (2_usize.pow(($id::lanes() / f) as u32) + as $elem_ty) + ); + } else { + assert_eq!( + v.wrapping_product(), + 2 as $elem_ty + ); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn wrapping_product_overflow() { + let start = $elem_ty::max_value() + - ($id::lanes() as $elem_ty / 2); + + let v = $id::splat(start as $elem_ty); + let vmul = v.wrapping_product(); + + let mut mul = start; + for _ in 1..$id::lanes() { + mul = mul.wrapping_mul(start); + } + assert_eq!(mul, vmul, "v = {:?}", v); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/mask.rs b/third_party/rust/packed_simd/src/api/reductions/mask.rs new file mode 100644 index 0000000000..0dd6a84e7e --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/mask.rs @@ -0,0 +1,89 @@ +//! Implements portable horizontal mask reductions. + +macro_rules! impl_reduction_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Are `all` vector lanes `true`? + #[inline] + pub fn all(self) -> bool { + unsafe { crate::codegen::reductions::mask::All::all(self) } + } + /// Is `any` vector lane `true`? + #[inline] + pub fn any(self) -> bool { + unsafe { crate::codegen::reductions::mask::Any::any(self) } + } + /// Are `all` vector lanes `false`? + #[inline] + pub fn none(self) -> bool { + !self.any() + } + } + + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _reduction>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn all() { + let a = $id::splat(true); + assert!(a.all()); + let a = $id::splat(false); + assert!(!a.all()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(!a.all()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(!a.all()); + } + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn any() { + let a = $id::splat(true); + assert!(a.any()); + let a = $id::splat(false); + assert!(!a.any()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(a.any()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(a.any()); + } + } + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn none() { + let a = $id::splat(true); + assert!(!a.none()); + let a = $id::splat(false); + assert!(a.none()); + + if $id::lanes() > 1 { + for i in 0..$id::lanes() { + let mut a = $id::splat(true); + a = a.replace(i, false); + assert!(!a.none()); + let mut a = $id::splat(false); + a = a.replace(i, true); + assert!(!a.none()); + } + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/reductions/min_max.rs b/third_party/rust/packed_simd/src/api/reductions/min_max.rs new file mode 100644 index 0000000000..c4c1400a8f --- /dev/null +++ b/third_party/rust/packed_simd/src/api/reductions/min_max.rs @@ -0,0 +1,381 @@ +//! Implements portable horizontal vector min/max reductions. + +macro_rules! impl_reduction_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident + | $ielem_ty:ident | $test_tt:tt) => { + impl $id { + /// Largest vector element value. + #[inline] + pub fn max_element(self) -> $elem_ty { + #[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "wasm32", + )))] + { + use crate::llvm::simd_reduce_max; + let v: $ielem_ty = unsafe { simd_reduce_max(self.0) }; + v as $elem_ty + } + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "wasm32", + ))] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + // FIXME: broken on WASM32 + // https://github.com/rust-lang-nursery/packed_simd/issues/91 + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.max(self.extract(i)); + } + x + } + } + + /// Smallest vector element value. + #[inline] + pub fn min_element(self) -> $elem_ty { + #[cfg(not(any( + target_arch = "aarch64", + target_arch = "arm", + all(target_arch = "x86", not(target_feature = "sse2")), + target_arch = "powerpc64", + target_arch = "wasm32", + ),))] + { + use crate::llvm::simd_reduce_min; + let v: $ielem_ty = unsafe { simd_reduce_min(self.0) }; + v as $elem_ty + } + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + all(target_arch = "x86", not(target_feature = "sse2")), + target_arch = "powerpc64", + target_arch = "wasm32", + ))] + { + // FIXME: broken on AArch64 + // https://github.com/rust-lang-nursery/packed_simd/issues/15 + // FIXME: broken on i586-unknown-linux-gnu + // https://github.com/rust-lang-nursery/packed_simd/issues/22 + // FIXME: broken on WASM32 + // https://github.com/rust-lang-nursery/packed_simd/issues/91 + let mut x = self.extract(0); + for i in 1..$id::lanes() { + x = x.min(self.extract(i)); + } + x + } + } + } + test_if! {$test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _reduction_min_max>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + pub fn max_element() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.max_element(), 0 as $elem_ty); + if $id::lanes() > 1 { + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.max_element(), 1 as $elem_ty); + } + let v = v.replace(0, 2 as $elem_ty); + assert_eq!(v.max_element(), 2 as $elem_ty); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + pub fn min_element() { + let v = $id::splat(0 as $elem_ty); + assert_eq!(v.min_element(), 0 as $elem_ty); + if $id::lanes() > 1 { + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.min_element(), 0 as $elem_ty); + } + let v = $id::splat(1 as $elem_ty); + let v = v.replace(0, 2 as $elem_ty); + if $id::lanes() > 1 { + assert_eq!(v.min_element(), 1 as $elem_ty); + } else { + assert_eq!(v.min_element(), 2 as $elem_ty); + } + if $id::lanes() > 1 { + let v = $id::splat(2 as $elem_ty); + let v = v.replace(1, 1 as $elem_ty); + assert_eq!(v.min_element(), 1 as $elem_ty); + } + } + } + } + } + }; +} + +macro_rules! test_reduction_float_min_max { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if!{ + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _reduction_min_max_nan>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn min_element_test() { + let n = crate::$elem_ty::NAN; + + assert_eq!(n.min(-3.), -3.); + assert_eq!((-3. as $elem_ty).min(n), -3.); + + let v0 = $id::splat(-3.); + + let target_with_broken_last_lane_nan = !cfg!(any( + target_arch = "arm", target_arch = "aarch64", + all(target_arch = "x86", + not(target_feature = "sse2") + ), + target_arch = "powerpc64", + target_arch = "wasm32", + )); + + // The vector is initialized to `-3.`s: [-3, -3, -3, -3] + for i in 0..$id::lanes() { + // We replace the i-th element of the vector with + // `NaN`: [-3, -3, -3, NaN] + let mut v = v0.replace(i, n); + + // If the NaN is in the last place, the LLVM + // implementation of these methods is broken on some + // targets: + if i == $id::lanes() - 1 && + target_with_broken_last_lane_nan { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/5 + // + // If there is a NaN, the result should always + // the smallest element, but currently when the + // last element is NaN the current + // implementation incorrectly returns NaN. + // + // The targets mentioned above use different + // codegen that produces the correct result. + // + // These asserts detect if this behavior changes + assert!(v.min_element().is_nan(), + // FIXME: ^^^ should be -3. + "[A]: nan at {} => {} | {:?}", + i, v.min_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result + // is still always `-3.` unless all elements of + // the vector are `NaN`s: + // + // This is also broken: + for j in 0..i { + v = v.replace(j, n); + assert!(v.min_element().is_nan(), + // FIXME: ^^^ should be -3. + "[B]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } + + // We are done here, since we were in the last + // lane which is the last iteration of the loop. + break + } + + // We are not in the last lane, and there is only + // one `NaN` in the vector. + + // If the vector has one lane, the result is `NaN`: + if $id::lanes() == 1 { + assert!(v.min_element().is_nan(), + "[C]: all nans | v={:?} | min={} | \ + is_nan: {}", + v, v.min_element(), + v.min_element().is_nan() + ); + + // And we are done, since the vector only has + // one lane anyways. + break; + } + + // The vector has more than one lane, since there is + // only one `NaN` in the vector, the result is + // always `-3`. + assert_eq!(v.min_element(), -3., + "[D]: nan at {} => {} | {:?}", + i, v.min_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result is + // still always `-3.` unless all elements of the + // vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + + if i == $id::lanes() - 1 && j == i - 1 { + // All elements of the vector are `NaN`s, + // therefore the result is NaN as well. + // + // Note: the #lanes of the vector is > 1, so + // "i - 1" does not overflow. + assert!(v.min_element().is_nan(), + "[E]: all nans | v={:?} | min={} | \ + is_nan: {}", + v, v.min_element(), + v.min_element().is_nan()); + } else { + // There are non-`NaN` elements in the + // vector, therefore the result is `-3.`: + assert_eq!(v.min_element(), -3., + "[F]: nan at {} => {} | {:?}", + i, v.min_element(), v); + } + } + } + + // If the vector contains all NaNs the result is NaN: + assert!($id::splat(n).min_element().is_nan(), + "all nans | v={:?} | min={} | is_nan: {}", + $id::splat(n), $id::splat(n).min_element(), + $id::splat(n).min_element().is_nan()); + } + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn max_element_test() { + let n = crate::$elem_ty::NAN; + + assert_eq!(n.max(-3.), -3.); + assert_eq!((-3. as $elem_ty).max(n), -3.); + + let v0 = $id::splat(-3.); + + let target_with_broken_last_lane_nan = !cfg!(any( + target_arch = "arm", target_arch = "aarch64", + target_arch = "powerpc64", target_arch = "wasm32", + )); + + // The vector is initialized to `-3.`s: [-3, -3, -3, -3] + for i in 0..$id::lanes() { + // We replace the i-th element of the vector with + // `NaN`: [-3, -3, -3, NaN] + let mut v = v0.replace(i, n); + + // If the NaN is in the last place, the LLVM + // implementation of these methods is broken on some + // targets: + if i == $id::lanes() - 1 && + target_with_broken_last_lane_nan { + // FIXME: + // https://github.com/rust-lang-nursery/packed_simd/issues/5 + // + // If there is a NaN, the result should + // always the largest element, but currently + // when the last element is NaN the current + // implementation incorrectly returns NaN. + // + // The targets mentioned above use different + // codegen that produces the correct result. + // + // These asserts detect if this behavior + // changes + assert!(v.max_element().is_nan(), + // FIXME: ^^^ should be -3. + "[A]: nan at {} => {} | {:?}", + i, v.max_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result + // is still always `-3.` unless all elements of + // the vector are `NaN`s: + // + // This is also broken: + for j in 0..i { + v = v.replace(j, n); + assert!(v.max_element().is_nan(), + // FIXME: ^^^ should be -3. + "[B]: nan at {} => {} | {:?}", + i, v.max_element(), v); + } + + // We are done here, since we were in the last + // lane which is the last iteration of the loop. + break + } + + // We are not in the last lane, and there is only + // one `NaN` in the vector. + + // If the vector has one lane, the result is `NaN`: + if $id::lanes() == 1 { + assert!(v.max_element().is_nan(), + "[C]: all nans | v={:?} | min={} | \ + is_nan: {}", + v, v.max_element(), + v.max_element().is_nan()); + + // And we are done, since the vector only has + // one lane anyways. + break; + } + + // The vector has more than one lane, since there is + // only one `NaN` in the vector, the result is + // always `-3`. + assert_eq!(v.max_element(), -3., + "[D]: nan at {} => {} | {:?}", + i, v.max_element(), v); + + // If we replace all the elements in the vector + // up-to the `i-th` lane with `NaN`s, the result is + // still always `-3.` unless all elements of the + // vector are `NaN`s: + for j in 0..i { + v = v.replace(j, n); + + if i == $id::lanes() - 1 && j == i - 1 { + // All elements of the vector are `NaN`s, + // therefore the result is NaN as well. + // + // Note: the #lanes of the vector is > 1, so + // "i - 1" does not overflow. + assert!(v.max_element().is_nan(), + "[E]: all nans | v={:?} | max={} | \ + is_nan: {}", + v, v.max_element(), + v.max_element().is_nan()); + } else { + // There are non-`NaN` elements in the + // vector, therefore the result is `-3.`: + assert_eq!(v.max_element(), -3., + "[F]: nan at {} => {} | {:?}", + i, v.max_element(), v); + } + } + } + + // If the vector contains all NaNs the result is NaN: + assert!($id::splat(n).max_element().is_nan(), + "all nans | v={:?} | max={} | is_nan: {}", + $id::splat(n), $id::splat(n).max_element(), + $id::splat(n).max_element().is_nan()); + } + } + } + } + } +} diff --git a/third_party/rust/packed_simd/src/api/select.rs b/third_party/rust/packed_simd/src/api/select.rs new file mode 100644 index 0000000000..24525df56c --- /dev/null +++ b/third_party/rust/packed_simd/src/api/select.rs @@ -0,0 +1,75 @@ +//! Implements mask's `select`. + +/// Implements mask select method +macro_rules! impl_select { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Selects elements of `a` and `b` using mask. + /// + /// The lanes of the result for which the mask is `true` contain + /// the values of `a`. The remaining lanes contain the values of + /// `b`. + #[inline] + pub fn select<T>(self, a: Simd<T>, b: Simd<T>) -> Simd<T> + where + T: sealed::SimdArray< + NT = <[$elem_ty; $elem_count] as sealed::SimdArray>::NT, + >, + { + use crate::llvm::simd_select; + Simd(unsafe { simd_select(self.0, a.0, b.0) }) + } + } + + test_select!(bool, $id, $id, (false, true) | $test_tt); + }; +} + +macro_rules! test_select { + ( + $elem_ty:ident, + $mask_ty:ident, + $vec_ty:ident,($small:expr, $large:expr) | + $test_tt:tt + ) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$vec_ty _select>] { + use super::*; + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn select() { + let o = $small as $elem_ty; + let t = $large as $elem_ty; + + let a = $vec_ty::splat(o); + let b = $vec_ty::splat(t); + let m = a.lt(b); + assert_eq!(m.select(a, b), a); + + let m = b.lt(a); + assert_eq!(m.select(b, a), a); + + let mut c = a; + let mut d = b; + let mut m_e = $mask_ty::splat(false); + for i in 0..$vec_ty::lanes() { + if i % 2 == 0 { + let c_tmp = c.extract(i); + c = c.replace(i, d.extract(i)); + d = d.replace(i, c_tmp); + } else { + m_e = m_e.replace(i, true); + } + } + + let m = c.lt(d); + assert_eq!(m_e, m); + assert_eq!(m.select(c, d), a); + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/shuffle.rs b/third_party/rust/packed_simd/src/api/shuffle.rs new file mode 100644 index 0000000000..13a7fae5fc --- /dev/null +++ b/third_party/rust/packed_simd/src/api/shuffle.rs @@ -0,0 +1,190 @@ +//! Implements portable vector shuffles with immediate indices. + +// FIXME: comprehensive tests +// https://github.com/rust-lang-nursery/packed_simd/issues/20 + +/// Shuffles vector elements. +/// +/// This macro returns a new vector that contains a shuffle of the elements in +/// one (`shuffle!(vec, [indices...])`) or two (`shuffle!(vec0, vec1, +/// [indices...])`) input vectors. +/// +/// The type of `vec0` and `vec1` must be equal, and the element type of the +/// resulting vector is the element type of the input vector. +/// +/// The number of `indices` must be a power-of-two in range `[0, 64)`, since +/// currently, the largest vector supported by the library has 64 lanes. The +/// length of the resulting vector equals the number of indices provided. +/// +/// The indices must be in range `[0, M * N)` where `M` is the number of input +/// vectors (`1` or `2`) and `N` is the number of lanes of the input vectors. +/// The indices `i` in range `[0, N)` refer to the `i`-th element of `vec0`, +/// while the indices in range `[N, 2*N)` refer to the `i - N`-th element of +/// `vec1`. +/// +/// # Examples +/// +/// Shuffling elements of two vectors: +/// +/// ``` +/// # #[macro_use] +/// # extern crate packed_simd; +/// # use packed_simd::*; +/// # fn main() { +/// // Shuffle allows reordering the elements: +/// let x = i32x4::new(1, 2, 3, 4); +/// let y = i32x4::new(5, 6, 7, 8); +/// let r = shuffle!(x, y, [4, 0, 5, 1]); +/// assert_eq!(r, i32x4::new(5, 1, 6, 2)); +/// +/// // The resulting vector can als be smaller than the input: +/// let r = shuffle!(x, y, [1, 6]); +/// assert_eq!(r, i32x2::new(2, 7)); +/// +/// // Or larger: +/// let r = shuffle!(x, y, [1, 3, 4, 2, 1, 7, 2, 2]); +/// assert_eq!(r, i32x8::new(2, 4, 5, 3, 2, 8, 3, 3)); +/// // At most 2 * the number of lanes in the input vector. +/// # } +/// ``` +/// +/// Shuffling elements of one vector: +/// +/// ``` +/// # #[macro_use] +/// # extern crate packed_simd; +/// # use packed_simd::*; +/// # fn main() { +/// // Shuffle allows reordering the elements of a vector: +/// let x = i32x4::new(1, 2, 3, 4); +/// let r = shuffle!(x, [2, 1, 3, 0]); +/// assert_eq!(r, i32x4::new(3, 2, 4, 1)); +/// +/// // The resulting vector can be smaller than the input: +/// let r = shuffle!(x, [1, 3]); +/// assert_eq!(r, i32x2::new(2, 4)); +/// +/// // Equal: +/// let r = shuffle!(x, [1, 3, 2, 0]); +/// assert_eq!(r, i32x4::new(2, 4, 3, 1)); +/// +/// // Or larger: +/// let r = shuffle!(x, [1, 3, 2, 2, 1, 3, 2, 2]); +/// assert_eq!(r, i32x8::new(2, 4, 3, 3, 2, 4, 3, 3)); +/// // At most 2 * the number of lanes in the input vector. +/// # } +/// ``` +#[macro_export] +macro_rules! shuffle { + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector2( + $vec0.0, + $vec1.0, + [$l0, $l1], + )) + } + }}; + ($vec0:expr, $vec1:expr, [$l0:expr, $l1:expr, $l2:expr, $l3:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector4( + $vec0.0, + $vec1.0, + [$l0, $l1, $l2, $l3], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector8( + $vec0.0, + $vec1.0, + [$l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector16( + $vec0.0, + $vec1.0, + [ + $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, + $l11, $l12, $l13, $l14, $l15, + ], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr, + $l16:expr, $l17:expr, $l18:expr, $l19:expr, + $l20:expr, $l21:expr, $l22:expr, $l23:expr, + $l24:expr, $l25:expr, $l26:expr, $l27:expr, + $l28:expr, $l29:expr, $l30:expr, $l31:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector32( + $vec0.0, + $vec1.0, + [ + $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, + $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, + $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, + $l29, $l30, $l31, + ], + )) + } + }}; + ($vec0:expr, $vec1:expr, + [$l0:expr, $l1:expr, $l2:expr, $l3:expr, + $l4:expr, $l5:expr, $l6:expr, $l7:expr, + $l8:expr, $l9:expr, $l10:expr, $l11:expr, + $l12:expr, $l13:expr, $l14:expr, $l15:expr, + $l16:expr, $l17:expr, $l18:expr, $l19:expr, + $l20:expr, $l21:expr, $l22:expr, $l23:expr, + $l24:expr, $l25:expr, $l26:expr, $l27:expr, + $l28:expr, $l29:expr, $l30:expr, $l31:expr, + $l32:expr, $l33:expr, $l34:expr, $l35:expr, + $l36:expr, $l37:expr, $l38:expr, $l39:expr, + $l40:expr, $l41:expr, $l42:expr, $l43:expr, + $l44:expr, $l45:expr, $l46:expr, $l47:expr, + $l48:expr, $l49:expr, $l50:expr, $l51:expr, + $l52:expr, $l53:expr, $l54:expr, $l55:expr, + $l56:expr, $l57:expr, $l58:expr, $l59:expr, + $l60:expr, $l61:expr, $l62:expr, $l63:expr]) => {{ + #[allow(unused_unsafe)] + unsafe { + $crate::Simd($crate::__shuffle_vector64( + $vec0.0, + $vec1.0, + [ + $l0, $l1, $l2, $l3, $l4, $l5, $l6, $l7, $l8, $l9, $l10, + $l11, $l12, $l13, $l14, $l15, $l16, $l17, $l18, $l19, + $l20, $l21, $l22, $l23, $l24, $l25, $l26, $l27, $l28, + $l29, $l30, $l31, $l32, $l33, $l34, $l35, $l36, $l37, + $l38, $l39, $l40, $l41, $l42, $l43, $l44, $l45, $l46, + $l47, $l48, $l49, $l50, $l51, $l52, $l53, $l54, $l55, + $l56, $l57, $l58, $l59, $l60, $l61, $l62, $l63, + ], + )) + } + }}; + ($vec:expr, [$($l:expr),*]) => { + match $vec { + v => shuffle!(v, v, [$($l),*]) + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs new file mode 100644 index 0000000000..64536be6cb --- /dev/null +++ b/third_party/rust/packed_simd/src/api/shuffle1_dyn.rs @@ -0,0 +1,159 @@ +//! Shuffle vector elements according to a dynamic vector of indices. + +macro_rules! impl_shuffle1_dyn { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Shuffle vector elements according to `indices`. + #[inline] + pub fn shuffle1_dyn<I>(self, indices: I) -> Self + where + Self: codegen::shuffle1_dyn::Shuffle1Dyn<Indices = I>, + { + codegen::shuffle1_dyn::Shuffle1Dyn::shuffle1_dyn(self, indices) + } + } + }; +} + +macro_rules! test_shuffle1_dyn { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _shuffle1_dyn>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn shuffle1_dyn() { + let increasing = { + let mut v = $id::splat(0 as $elem_ty); + for i in 0..$id::lanes() { + v = v.replace(i, i as $elem_ty); + } + v + }; + let decreasing = { + let mut v = $id::splat(0 as $elem_ty); + for i in 0..$id::lanes() { + v = v.replace( + i, + ($id::lanes() - 1 - i) as $elem_ty + ); + } + v + }; + + type Indices = < + $id as codegen::shuffle1_dyn::Shuffle1Dyn + >::Indices; + let increasing_ids: Indices = increasing.cast(); + let decreasing_ids: Indices = decreasing.cast(); + + assert_eq!( + increasing.shuffle1_dyn(increasing_ids), + increasing, + "(i,i)=>i" + ); + assert_eq!( + decreasing.shuffle1_dyn(increasing_ids), + decreasing, + "(d,i)=>d" + ); + assert_eq!( + increasing.shuffle1_dyn(decreasing_ids), + decreasing, + "(i,d)=>d" + ); + assert_eq!( + decreasing.shuffle1_dyn(decreasing_ids), + increasing, + "(d,d)=>i" + ); + + for i in 0..$id::lanes() { + let v_ids: Indices + = $id::splat(i as $elem_ty).cast(); + assert_eq!(increasing.shuffle1_dyn(v_ids), + $id::splat(increasing.extract(i)) + ); + assert_eq!(decreasing.shuffle1_dyn(v_ids), + $id::splat(decreasing.extract(i)) + ); + assert_eq!( + $id::splat(i as $elem_ty) + .shuffle1_dyn(increasing_ids), + $id::splat(i as $elem_ty) + ); + assert_eq!( + $id::splat(i as $elem_ty) + .shuffle1_dyn(decreasing_ids), + $id::splat(i as $elem_ty) + ); + } + } + } + } + } + }; +} + +macro_rules! test_shuffle1_dyn_mask { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + test_if! { + $test_tt: + paste::item! { + pub mod [<$id _shuffle1_dyn>] { + use super::*; + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn shuffle1_dyn() { + // alternating = [true, false, true, false, ...] + let mut alternating = $id::splat(false); + for i in 0..$id::lanes() { + if i % 2 == 0 { + alternating = alternating.replace(i, true); + } + } + + type Indices = < + $id as codegen::shuffle1_dyn::Shuffle1Dyn + >::Indices; + // even = [0, 0, 2, 2, 4, 4, ..] + let even = { + let mut v = Indices::splat(0); + for i in 0..$id::lanes() { + if i % 2 == 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 - 1).into()); + } + } + v + }; + // odd = [1, 1, 3, 3, 5, 5, ...] + let odd = { + let mut v = Indices::splat(0); + for i in 0..$id::lanes() { + if i % 2 != 0 { + v = v.replace(i, (i as u8).into()); + } else { + v = v.replace(i, (i as u8 + 1).into()); + } + } + v + }; + + assert_eq!( + alternating.shuffle1_dyn(even), + $id::splat(true) + ); + if $id::lanes() > 1 { + assert_eq!( + alternating.shuffle1_dyn(odd), + $id::splat(false) + ); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/slice.rs b/third_party/rust/packed_simd/src/api/slice.rs new file mode 100644 index 0000000000..526b848b5c --- /dev/null +++ b/third_party/rust/packed_simd/src/api/slice.rs @@ -0,0 +1,7 @@ +//! Slice from/to methods + +#[macro_use] +mod from_slice; + +#[macro_use] +mod write_to_slice; diff --git a/third_party/rust/packed_simd/src/api/slice/from_slice.rs b/third_party/rust/packed_simd/src/api/slice/from_slice.rs new file mode 100644 index 0000000000..25082d1e68 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/slice/from_slice.rs @@ -0,0 +1,218 @@ +//! Implements methods to read a vector type from a slice. + +macro_rules! impl_slice_from_slice { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::<Self>()` boundary. + #[inline] + pub fn from_slice_aligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked(0) as *const $elem_ty; + assert_eq!( + target_ptr + .align_offset(crate::mem::align_of::<Self>()), + 0 + ); + Self::from_slice_aligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn from_slice_unaligned(slice: &[$elem_ty]) -> Self { + unsafe { + assert!(slice.len() >= $elem_count); + Self::from_slice_unaligned_unchecked(slice) + } + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not aligned + /// to an `align_of::<Self>()` boundary, the behavior is undefined. + #[inline] + pub unsafe fn from_slice_aligned_unchecked( + slice: &[$elem_ty], + ) -> Self { + debug_assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked(0) as *const $elem_ty; + debug_assert_eq!( + target_ptr.align_offset(crate::mem::align_of::<Self>()), + 0 + ); + + #[allow(clippy::cast_ptr_alignment)] + *(target_ptr as *const Self) + } + + /// Instantiates a new vector with the values of the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn from_slice_unaligned_unchecked( + slice: &[$elem_ty], + ) -> Self { + use crate::mem::size_of; + debug_assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked(0) as *const $elem_ty as *const u8; + let mut x = Self::splat(0 as $elem_ty); + let self_ptr = &mut x as *mut Self as *mut u8; + crate::ptr::copy_nonoverlapping( + target_ptr, + self_ptr, + size_of::<Self>(), + ); + x + } + } + + test_if! { + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _slice_from_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_unaligned() { + let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; + unaligned[0] = 0 as $elem_ty; + let vec = $id::from_slice_unaligned(&unaligned[1..]); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_unaligned_fail() { + let mut unaligned = [42 as $elem_ty; $id::lanes() + 1]; + unaligned[0] = 0 as $elem_ty; + // the slice is not large enough => panic + let _vec = $id::from_slice_unaligned(&unaligned[2..]); + } + + union A { + data: [$elem_ty; 2 * $id::lanes()], + _vec: $id, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_slice_aligned() { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + for i in $id::lanes()..(2 * $id::lanes()) { + unsafe { + aligned.data[i] = 42 as $elem_ty; + } + } + + let vec = unsafe { + $id::from_slice_aligned( + &aligned.data[$id::lanes()..] + ) + }; + for (index, &b) in + unsafe { aligned.data.iter().enumerate() } { + if index < $id::lanes() { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!( + b, vec.extract(index - $id::lanes()) + ); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_lanes() { + let aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + let _vec = unsafe { + $id::from_slice_aligned( + &aligned.data[2 * $id::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn from_slice_aligned_fail_align() { + unsafe { + let aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + + // get a pointer to the front of data + let ptr: *const $elem_ty = aligned.data.as_ptr() + as *const $elem_ty; + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset( + crate::mem::align_of::<$id>() + ) == 0 { + // the pointer is properly aligned, so + // from_slice_aligned won't fail here (e.g. this + // can happen for i128x1). So we panic to make + // the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s: &[$elem_ty] = slice::from_raw_parts( + ptr, $id::lanes() + ); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let _vec = $id::from_slice_aligned(s); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs new file mode 100644 index 0000000000..b634d98b99 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/slice/write_to_slice.rs @@ -0,0 +1,213 @@ +//! Implements methods to write a vector type to a slice. + +macro_rules! impl_slice_write_to_slice { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::<Self>()` boundary. + #[inline] + pub fn write_to_slice_aligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty; + assert_eq!( + target_ptr + .align_offset(crate::mem::align_of::<Self>()), + 0 + ); + self.write_to_slice_aligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Panics + /// + /// If `slice.len() < Self::lanes()`. + #[inline] + pub fn write_to_slice_unaligned(self, slice: &mut [$elem_ty]) { + unsafe { + assert!(slice.len() >= $elem_count); + self.write_to_slice_unaligned_unchecked(slice); + } + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` or `&slice[0]` is not + /// aligned to an `align_of::<Self>()` boundary, the behavior is + /// undefined. + #[inline] + pub unsafe fn write_to_slice_aligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + debug_assert!(slice.len() >= $elem_count); + let target_ptr = slice.get_unchecked_mut(0) as *mut $elem_ty; + debug_assert_eq!( + target_ptr.align_offset(crate::mem::align_of::<Self>()), + 0 + ); + + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + #[allow(clippy::cast_ptr_alignment)] + *(target_ptr as *mut Self) = self; + } + + /// Writes the values of the vector to the `slice`. + /// + /// # Safety + /// + /// If `slice.len() < Self::lanes()` the behavior is undefined. + #[inline] + pub unsafe fn write_to_slice_unaligned_unchecked( + self, slice: &mut [$elem_ty], + ) { + debug_assert!(slice.len() >= $elem_count); + let target_ptr = + slice.get_unchecked_mut(0) as *mut $elem_ty as *mut u8; + let self_ptr = &self as *const Self as *const u8; + crate::ptr::copy_nonoverlapping( + self_ptr, + target_ptr, + crate::mem::size_of::<Self>(), + ); + } + } + + test_if! { + $test_tt: + paste::item! { + // Comparisons use integer casts within mantissa^1 range. + #[allow(clippy::float_cmp)] + pub mod [<$id _slice_write_to_slice>] { + use super::*; + use crate::iter::Iterator; + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_unaligned() { + let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; + let vec = $id::splat(42 as $elem_ty); + vec.write_to_slice_unaligned(&mut unaligned[1..]); + for (index, &b) in unaligned.iter().enumerate() { + if index == 0 { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!(b, vec.extract(index - 1)); + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_unaligned_fail() { + let mut unaligned = [0 as $elem_ty; $id::lanes() + 1]; + let vec = $id::splat(42 as $elem_ty); + vec.write_to_slice_unaligned(&mut unaligned[2..]); + } + + union A { + data: [$elem_ty; 2 * $id::lanes()], + _vec: $id, + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] + #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn write_to_slice_aligned() { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + let vec = $id::splat(42 as $elem_ty); + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[$id::lanes()..] + ); + for (idx, &b) in aligned.data.iter().enumerate() { + if idx < $id::lanes() { + assert_eq!(b, 0 as $elem_ty); + } else { + assert_eq!(b, 42 as $elem_ty); + assert_eq!( + b, vec.extract(idx - $id::lanes()) + ); + } + } + } + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_lanes() { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + let vec = $id::splat(42 as $elem_ty); + unsafe { + vec.write_to_slice_aligned( + &mut aligned.data[2 * $id::lanes()..] + ) + }; + } + + // FIXME: wasm-bindgen-test does not support #[should_panic] + // #[cfg_attr(not(target_arch = "wasm32"), test)] + // #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + #[cfg(not(target_arch = "wasm32"))] + #[test] + #[should_panic] + fn write_to_slice_aligned_fail_align() { + unsafe { + let mut aligned = A { + data: [0 as $elem_ty; 2 * $id::lanes()], + }; + + // get a pointer to the front of data + let ptr: *mut $elem_ty + = aligned.data.as_mut_ptr() as *mut $elem_ty; + // offset pointer by one element + let ptr = ptr.wrapping_add(1); + + if ptr.align_offset(crate::mem::align_of::<$id>()) + == 0 { + // the pointer is properly aligned, so + // write_to_slice_aligned won't fail here (e.g. + // this can happen for i128x1). So we panic to + // make the "should_fail" test pass: + panic!("ok"); + } + + // create a slice - this is safe, because the + // elements of the slice exist, are properly + // initialized, and properly aligned: + let s: &mut [$elem_ty] + = slice::from_raw_parts_mut(ptr, $id::lanes()); + // this should always panic because the slice + // alignment does not match the alignment + // requirements for the vector type: + let vec = $id::splat(42 as $elem_ty); + vec.write_to_slice_aligned(s); + } + } + } + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/api/swap_bytes.rs b/third_party/rust/packed_simd/src/api/swap_bytes.rs new file mode 100644 index 0000000000..53bba25bd3 --- /dev/null +++ b/third_party/rust/packed_simd/src/api/swap_bytes.rs @@ -0,0 +1,192 @@ +//! Horizontal swap bytes + +macro_rules! impl_swap_bytes { + ([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => { + impl $id { + /// Reverses the byte order of the vector. + #[inline] + pub fn swap_bytes(self) -> Self { + super::codegen::swap_bytes::SwapBytes::swap_bytes(self) + } + + /// Converts self to little endian from the target's endianness. + /// + /// On little endian this is a no-op. On big endian the bytes are + /// swapped. + #[inline] + pub fn to_le(self) -> Self { + #[cfg(target_endian = "little")] + { + self + } + #[cfg(not(target_endian = "little"))] + { + self.swap_bytes() + } + } + + /// Converts self to big endian from the target's endianness. + /// + /// On big endian this is a no-op. On little endian the bytes are + /// swapped. + #[inline] + pub fn to_be(self) -> Self { + #[cfg(target_endian = "big")] + { + self + } + #[cfg(not(target_endian = "big"))] + { + self.swap_bytes() + } + } + + /// Converts a vector from little endian to the target's endianness. + /// + /// On little endian this is a no-op. On big endian the bytes are + /// swapped. + #[inline] + pub fn from_le(x: Self) -> Self { + #[cfg(target_endian = "little")] + { + x + } + #[cfg(not(target_endian = "little"))] + { + x.swap_bytes() + } + } + + /// Converts a vector from big endian to the target's endianness. + /// + /// On big endian this is a no-op. On little endian the bytes are + /// swapped. + #[inline] + pub fn from_be(x: Self) -> Self { + #[cfg(target_endian = "big")] + { + x + } + #[cfg(not(target_endian = "big"))] + { + x.swap_bytes() + } + } + } + + test_if! { + $test_tt: + paste::item_with_macros! { + pub mod [<$id _swap_bytes>] { + use super::*; + + const BYTES: [u8; 64] = [ + 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15, + 16, 17, 18, 19, 20, 21, 22, 23, + 24, 25, 26, 27, 28, 29, 30, 31, + 32, 33, 34, 35, 36, 37, 38, 39, + 40, 41, 42, 43, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, + 56, 57, 58, 59, 60, 61, 62, 63, + ]; + + macro_rules! swap { + ($func: ident) => {{ + // catch possible future >512 vectors + assert!(mem::size_of::<$id>() <= 64); + + let mut actual = BYTES; + let elems: &mut [$elem_ty] = unsafe { + slice::from_raw_parts_mut( + actual.as_mut_ptr() as *mut $elem_ty, + $id::lanes(), + ) + }; + + let vec = $id::from_slice_unaligned(elems); + $id::$func(vec).write_to_slice_unaligned(elems); + + actual + }}; + } + + macro_rules! test_swap { + ($func: ident) => {{ + let actual = swap!($func); + let expected = + BYTES.iter().rev() + .skip(64 - crate::mem::size_of::<$id>()); + assert!(actual.iter().zip(expected) + .all(|(x, y)| x == y)); + }}; + } + + macro_rules! test_no_swap { + ($func: ident) => {{ + let actual = swap!($func); + let expected = BYTES.iter() + .take(mem::size_of::<$id>()); + + assert!(actual.iter().zip(expected) + .all(|(x, y)| x == y)); + }}; + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn swap_bytes() { + test_swap!(swap_bytes); + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn to_le() { + #[cfg(target_endian = "little")] + { + test_no_swap!(to_le); + } + #[cfg(not(target_endian = "little"))] + { + test_swap!(to_le); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn to_be() { + #[cfg(target_endian = "big")] + { + test_no_swap!(to_be); + } + #[cfg(not(target_endian = "big"))] + { + test_swap!(to_be); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_le() { + #[cfg(target_endian = "little")] + { + test_no_swap!(from_le); + } + #[cfg(not(target_endian = "little"))] + { + test_swap!(from_le); + } + } + + #[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)] + fn from_be() { + #[cfg(target_endian = "big")] + { + test_no_swap!(from_be); + } + #[cfg(not(target_endian = "big"))] + { + test_swap!(from_be); + } + } + } + } + } + }; +} |