diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-07 19:33:14 +0000 |
commit | 36d22d82aa202bb199967e9512281e9a53db42c9 (patch) | |
tree | 105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/packed_simd/src/codegen/reductions | |
parent | Initial commit. (diff) | |
download | firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip |
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/packed_simd/src/codegen/reductions')
10 files changed, 900 insertions, 0 deletions
diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs new file mode 100644 index 0000000000..a78bcc5632 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask.rs @@ -0,0 +1,69 @@ +//! Code generation workaround for `all()` mask horizontal reduction. +//! +//! Works around [LLVM bug 36702]. +//! +//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 +#![allow(unused_macros)] + +use crate::*; + +pub(crate) trait All: crate::marker::Sized { + unsafe fn all(self) -> bool; +} + +pub(crate) trait Any: crate::marker::Sized { + unsafe fn any(self) -> bool; +} + +#[macro_use] +mod fallback_impl; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + #[macro_use] + mod x86; + } else if #[cfg(all(target_arch = "arm", target_feature = "v7", + target_feature = "neon", + any(feature = "core_arch", libcore_neon)))] { + #[macro_use] + mod arm; + } else if #[cfg(all(target_arch = "aarch64", target_feature = "neon"))] { + #[macro_use] + mod aarch64; + } else { + #[macro_use] + mod fallback; + } +} + +impl_mask_reductions!(m8x2); +impl_mask_reductions!(m8x4); +impl_mask_reductions!(m8x8); +impl_mask_reductions!(m8x16); +impl_mask_reductions!(m8x32); +impl_mask_reductions!(m8x64); + +impl_mask_reductions!(m16x2); +impl_mask_reductions!(m16x4); +impl_mask_reductions!(m16x8); +impl_mask_reductions!(m16x16); +impl_mask_reductions!(m16x32); + +impl_mask_reductions!(m32x2); +impl_mask_reductions!(m32x4); +impl_mask_reductions!(m32x8); +impl_mask_reductions!(m32x16); + +// FIXME: 64-bit single element vector +// impl_mask_reductions!(m64x1); +impl_mask_reductions!(m64x2); +impl_mask_reductions!(m64x4); +impl_mask_reductions!(m64x8); + +impl_mask_reductions!(m128x1); +impl_mask_reductions!(m128x2); +impl_mask_reductions!(m128x4); + +impl_mask_reductions!(msizex2); +impl_mask_reductions!(msizex4); +impl_mask_reductions!(msizex8); diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs new file mode 100644 index 0000000000..b2db52c891 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/aarch64.rs @@ -0,0 +1,81 @@ +//! Mask reductions implementation for `aarch64` targets + +/// 128-bit wide vectors +macro_rules! aarch64_128_neon_impl { + ($id:ident, $vmin:ident, $vmax:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn all(self) -> bool { + use crate::arch::aarch64::$vmin; + $vmin(crate::mem::transmute(self)) != 0 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn any(self) -> bool { + use crate::arch::aarch64::$vmax; + $vmax(crate::mem::transmute(self)) != 0 + } + } + }; +} + +/// 64-bit wide vectors +macro_rules! aarch64_64_neon_impl { + ($id:ident, $vec128:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn all(self) -> bool { + // Duplicates the 64-bit vector into a 128-bit one and + // calls all on that. + union U { + halves: ($id, $id), + vec: $vec128, + } + U { halves: (self, self) }.vec.all() + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "neon")] + unsafe fn any(self) -> bool { + union U { + halves: ($id, $id), + vec: $vec128, + } + U { halves: (self, self) }.vec.any() + } + } + }; +} + +/// Mask reduction implementation for `aarch64` targets +macro_rules! impl_mask_reductions { + // 64-bit wide masks + (m8x8) => { + aarch64_64_neon_impl!(m8x8, m8x16); + }; + (m16x4) => { + aarch64_64_neon_impl!(m16x4, m16x8); + }; + (m32x2) => { + aarch64_64_neon_impl!(m32x2, m32x4); + }; + // 128-bit wide masks + (m8x16) => { + aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); + }; + (m16x8) => { + aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); + }; + (m32x4) => { + aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); + }; + // Fallback to LLVM's default code-generation: + ($id:ident) => { + fallback_impl!($id); + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs new file mode 100644 index 0000000000..41c3cbc58a --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/arm.rs @@ -0,0 +1,56 @@ +//! Mask reductions implementation for `arm` targets + +/// Implementation for ARM + v7 + NEON for 64-bit or 128-bit wide vectors with +/// more than two elements. +macro_rules! arm_128_v7_neon_impl { + ($id:ident, $half:ident, $vpmin:ident, $vpmax:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "v7,neon")] + unsafe fn all(self) -> bool { + use crate::arch::arm::$vpmin; + use crate::mem::transmute; + union U { + halves: ($half, $half), + vec: $id, + } + let halves = U { vec: self }.halves; + let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1))); + h.all() + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "v7,neon")] + unsafe fn any(self) -> bool { + use crate::arch::arm::$vpmax; + use crate::mem::transmute; + union U { + halves: ($half, $half), + vec: $id, + } + let halves = U { vec: self }.halves; + let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1))); + h.any() + } + } + }; +} + +/// Mask reduction implementation for `arm` targets +macro_rules! impl_mask_reductions { + // 128-bit wide masks + (m8x16) => { + arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); + }; + (m16x8) => { + arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); + }; + (m32x4) => { + arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); + }; + // Fallback to LLVM's default code-generation: + ($id:ident) => { + fallback_impl!($id); + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs new file mode 100644 index 0000000000..4c377a6878 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback.rs @@ -0,0 +1,8 @@ +//! Default mask reduction implementations. + +/// Default mask reduction implementation +macro_rules! impl_mask_reductions { + ($id:ident) => { + fallback_impl!($id); + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs new file mode 100644 index 0000000000..0d246e2fda --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/fallback_impl.rs @@ -0,0 +1,237 @@ +//! Default implementation of a mask reduction for any target. + +macro_rules! fallback_to_other_impl { + ($id:ident, $other:ident) => { + impl All for $id { + #[inline] + unsafe fn all(self) -> bool { + let m: $other = crate::mem::transmute(self); + m.all() + } + } + impl Any for $id { + #[inline] + unsafe fn any(self) -> bool { + let m: $other = crate::mem::transmute(self); + m.any() + } + } + }; +} + +/// Fallback implementation. +macro_rules! fallback_impl { + // 16-bit wide masks: + (m8x2) => { + impl All for m8x2 { + #[inline] + unsafe fn all(self) -> bool { + let i: u16 = crate::mem::transmute(self); + i == u16::max_value() + } + } + impl Any for m8x2 { + #[inline] + unsafe fn any(self) -> bool { + let i: u16 = crate::mem::transmute(self); + i != 0 + } + } + }; + // 32-bit wide masks + (m8x4) => { + impl All for m8x4 { + #[inline] + unsafe fn all(self) -> bool { + let i: u32 = crate::mem::transmute(self); + i == u32::max_value() + } + } + impl Any for m8x4 { + #[inline] + unsafe fn any(self) -> bool { + let i: u32 = crate::mem::transmute(self); + i != 0 + } + } + }; + (m16x2) => { + fallback_to_other_impl!(m16x2, m8x4); + }; + // 64-bit wide masks: + (m8x8) => { + impl All for m8x8 { + #[inline] + unsafe fn all(self) -> bool { + let i: u64 = crate::mem::transmute(self); + i == u64::max_value() + } + } + impl Any for m8x8 { + #[inline] + unsafe fn any(self) -> bool { + let i: u64 = crate::mem::transmute(self); + i != 0 + } + } + }; + (m16x4) => { + fallback_to_other_impl!(m16x4, m8x8); + }; + (m32x2) => { + fallback_to_other_impl!(m32x2, m16x4); + }; + // FIXME: 64x1 maxk + // 128-bit wide masks: + (m8x16) => { + impl All for m8x16 { + #[inline] + unsafe fn all(self) -> bool { + let i: u128 = crate::mem::transmute(self); + i == u128::max_value() + } + } + impl Any for m8x16 { + #[inline] + unsafe fn any(self) -> bool { + let i: u128 = crate::mem::transmute(self); + i != 0 + } + } + }; + (m16x8) => { + fallback_to_other_impl!(m16x8, m8x16); + }; + (m32x4) => { + fallback_to_other_impl!(m32x4, m16x8); + }; + (m64x2) => { + fallback_to_other_impl!(m64x2, m32x4); + }; + (m128x1) => { + fallback_to_other_impl!(m128x1, m64x2); + }; + // 256-bit wide masks + (m8x32) => { + impl All for m8x32 { + #[inline] + unsafe fn all(self) -> bool { + let i: [u128; 2] = crate::mem::transmute(self); + let o: [u128; 2] = [u128::max_value(); 2]; + i == o + } + } + impl Any for m8x32 { + #[inline] + unsafe fn any(self) -> bool { + let i: [u128; 2] = crate::mem::transmute(self); + let o: [u128; 2] = [0; 2]; + i != o + } + } + }; + (m16x16) => { + fallback_to_other_impl!(m16x16, m8x32); + }; + (m32x8) => { + fallback_to_other_impl!(m32x8, m16x16); + }; + (m64x4) => { + fallback_to_other_impl!(m64x4, m32x8); + }; + (m128x2) => { + fallback_to_other_impl!(m128x2, m64x4); + }; + // 512-bit wide masks + (m8x64) => { + impl All for m8x64 { + #[inline] + unsafe fn all(self) -> bool { + let i: [u128; 4] = crate::mem::transmute(self); + let o: [u128; 4] = [u128::max_value(); 4]; + i == o + } + } + impl Any for m8x64 { + #[inline] + unsafe fn any(self) -> bool { + let i: [u128; 4] = crate::mem::transmute(self); + let o: [u128; 4] = [0; 4]; + i != o + } + } + }; + (m16x32) => { + fallback_to_other_impl!(m16x32, m8x64); + }; + (m32x16) => { + fallback_to_other_impl!(m32x16, m16x32); + }; + (m64x8) => { + fallback_to_other_impl!(m64x8, m32x16); + }; + (m128x4) => { + fallback_to_other_impl!(m128x4, m64x8); + }; + // Masks with pointer-sized elements64 + (msizex2) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex2, m64x2); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex2, m32x2); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex4) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex4, m64x4); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex4, m32x4); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex8) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex8, m64x8); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex8, m32x8); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; +} + +macro_rules! recurse_half { + ($vid:ident, $vid_h:ident) => { + impl All for $vid { + #[inline] + unsafe fn all(self) -> bool { + union U { + halves: ($vid_h, $vid_h), + vec: $vid, + } + let halves = U { vec: self }.halves; + halves.0.all() && halves.1.all() + } + } + impl Any for $vid { + #[inline] + unsafe fn any(self) -> bool { + union U { + halves: ($vid_h, $vid_h), + vec: $vid, + } + let halves = U { vec: self }.halves; + halves.0.any() || halves.1.any() + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs new file mode 100644 index 0000000000..4bf5098065 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86.rs @@ -0,0 +1,216 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets + +#[cfg(target_feature = "sse")] +#[macro_use] +mod sse; + +#[cfg(target_feature = "sse2")] +#[macro_use] +mod sse2; + +#[cfg(target_feature = "avx")] +#[macro_use] +mod avx; + +#[cfg(target_feature = "avx2")] +#[macro_use] +mod avx2; + +/// x86 64-bit m8x8 implementation +macro_rules! x86_m8x8_impl { + ($id:ident) => { + fallback_impl!($id); + }; +} + +/// x86 128-bit m8x16 implementation +macro_rules! x86_m8x16_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(target_feature = "sse2")] { + x86_m8x16_sse2_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 128-bit m32x4 implementation +macro_rules! x86_m32x4_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(target_feature = "sse")] { + x86_m32x4_sse_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 128-bit m64x2 implementation +macro_rules! x86_m64x2_impl { + ($id:ident) => { + cfg_if! { + if #[cfg(target_feature = "sse2")] { + x86_m64x2_sse2_impl!($id); + } else if #[cfg(target_feature = "sse")] { + x86_m32x4_sse_impl!($id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 256-bit m8x32 implementation +macro_rules! x86_m8x32_impl { + ($id:ident, $half_id:ident) => { + cfg_if! { + if #[cfg(target_feature = "avx2")] { + x86_m8x32_avx2_impl!($id); + } else if #[cfg(target_feature = "avx")] { + x86_m8x32_avx_impl!($id); + } else if #[cfg(target_feature = "sse2")] { + recurse_half!($id, $half_id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 256-bit m32x8 implementation +macro_rules! x86_m32x8_impl { + ($id:ident, $half_id:ident) => { + cfg_if! { + if #[cfg(target_feature = "avx")] { + x86_m32x8_avx_impl!($id); + } else if #[cfg(target_feature = "sse")] { + recurse_half!($id, $half_id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// x86 256-bit m64x4 implementation +macro_rules! x86_m64x4_impl { + ($id:ident, $half_id:ident) => { + cfg_if! { + if #[cfg(target_feature = "avx")] { + x86_m64x4_avx_impl!($id); + } else if #[cfg(target_feature = "sse")] { + recurse_half!($id, $half_id); + } else { + fallback_impl!($id); + } + } + }; +} + +/// Fallback implementation. +macro_rules! x86_intr_impl { + ($id:ident) => { + impl All for $id { + #[inline] + unsafe fn all(self) -> bool { + use crate::llvm::simd_reduce_all; + simd_reduce_all(self.0) + } + } + impl Any for $id { + #[inline] + unsafe fn any(self) -> bool { + use crate::llvm::simd_reduce_any; + simd_reduce_any(self.0) + } + } + }; +} + +/// Mask reduction implementation for `x86` and `x86_64` targets +macro_rules! impl_mask_reductions { + // 64-bit wide masks + (m8x8) => { + x86_m8x8_impl!(m8x8); + }; + (m16x4) => { + x86_m8x8_impl!(m16x4); + }; + (m32x2) => { + x86_m8x8_impl!(m32x2); + }; + // 128-bit wide masks + (m8x16) => { + x86_m8x16_impl!(m8x16); + }; + (m16x8) => { + x86_m8x16_impl!(m16x8); + }; + (m32x4) => { + x86_m32x4_impl!(m32x4); + }; + (m64x2) => { + x86_m64x2_impl!(m64x2); + }; + (m128x1) => { + x86_intr_impl!(m128x1); + }; + // 256-bit wide masks: + (m8x32) => { + x86_m8x32_impl!(m8x32, m8x16); + }; + (m16x16) => { + x86_m8x32_impl!(m16x16, m16x8); + }; + (m32x8) => { + x86_m32x8_impl!(m32x8, m32x4); + }; + (m64x4) => { + x86_m64x4_impl!(m64x4, m64x2); + }; + (m128x2) => { + x86_intr_impl!(m128x2); + }; + (msizex2) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex2, m64x2); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex2, m32x2); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex4) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex4, m64x4); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex4, m32x4); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + (msizex8) => { + cfg_if! { + if #[cfg(target_pointer_width = "64")] { + fallback_to_other_impl!(msizex8, m64x8); + } else if #[cfg(target_pointer_width = "32")] { + fallback_to_other_impl!(msizex8, m32x8); + } else { + compile_error!("unsupported target_pointer_width"); + } + } + }; + + // Fallback to LLVM's default code-generation: + ($id:ident) => { + fallback_impl!($id); + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs new file mode 100644 index 0000000000..61f352d228 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx.rs @@ -0,0 +1,95 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX` + +/// `x86`/`x86_64` 256-bit `AVX` implementation +/// FIXME: it might be faster here to do two `_mm_movmask_epi8` +#[cfg(target_feature = "avx")] +macro_rules! x86_m8x32_avx_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "avx")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_testc_si256; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_testc_si256; + _mm256_testc_si256(crate::mem::transmute(self), crate::mem::transmute($id::splat(true))) != 0 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "avx")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_testz_si256; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_testz_si256; + _mm256_testz_si256(crate::mem::transmute(self), crate::mem::transmute(self)) == 0 + } + } + }; +} + +/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation +macro_rules! x86_m32x8_avx_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_ps; + // _mm256_movemask_ps(a) creates a 8bit mask containing the + // most significant bit of each lane of `a`. If all bits are + // set, then all 8 lanes of the mask are true. + _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_ps; + + _mm256_movemask_ps(crate::mem::transmute(self)) != 0 + } + } + }; +} + +/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation +macro_rules! x86_m64x4_avx_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_pd; + // _mm256_movemask_pd(a) creates a 4bit mask containing the + // most significant bit of each lane of `a`. If all bits are + // set, then all 4 lanes of the mask are true. + _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_pd; + + _mm256_movemask_pd(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs new file mode 100644 index 0000000000..d37d023420 --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/avx2.rs @@ -0,0 +1,35 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX2`. +#![allow(unused)] + +/// x86/x86_64 256-bit m8x32 AVX2 implementation +macro_rules! x86_m8x32_avx2_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_epi8; + // _mm256_movemask_epi8(a) creates a 32bit mask containing the + // most significant bit of each byte of `a`. If all + // bits are set, then all 32 lanes of the mask are + // true. + _mm256_movemask_epi8(crate::mem::transmute(self)) == -1_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm256_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm256_movemask_epi8; + + _mm256_movemask_epi8(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs new file mode 100644 index 0000000000..e0c9aee92b --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse.rs @@ -0,0 +1,35 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE`. +#![allow(unused)] + +/// `x86`/`x86_64` 128-bit `m32x4` `SSE` implementation +macro_rules! x86_m32x4_sse_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_ps; + // _mm_movemask_ps(a) creates a 4bit mask containing the + // most significant bit of each lane of `a`. If all + // bits are set, then all 4 lanes of the mask are + // true. + _mm_movemask_ps(crate::mem::transmute(self)) == 0b_1111_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_ps; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_ps; + + _mm_movemask_ps(crate::mem::transmute(self)) != 0 + } + } + }; +} diff --git a/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs new file mode 100644 index 0000000000..bbb52fa47e --- /dev/null +++ b/third_party/rust/packed_simd/src/codegen/reductions/mask/x86/sse2.rs @@ -0,0 +1,68 @@ +//! Mask reductions implementation for `x86` and `x86_64` targets with `SSE2`. +#![allow(unused)] + +/// `x86`/`x86_64` 128-bit m64x2 `SSE2` implementation +macro_rules! x86_m64x2_sse2_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pd; + // _mm_movemask_pd(a) creates a 2bit mask containing the + // most significant bit of each lane of `a`. If all + // bits are set, then all 2 lanes of the mask are + // true. + _mm_movemask_pd(crate::mem::transmute(self)) == 0b_11_i32 + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_pd; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_pd; + + _mm_movemask_pd(crate::mem::transmute(self)) != 0 + } + } + }; +} + +/// `x86`/`x86_64` 128-bit m8x16 `SSE2` implementation +macro_rules! x86_m8x16_sse2_impl { + ($id:ident) => { + impl All for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn all(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_epi8; + // _mm_movemask_epi8(a) creates a 16bit mask containing the + // most significant bit of each byte of `a`. If all + // bits are set, then all 16 lanes of the mask are + // true. + _mm_movemask_epi8(crate::mem::transmute(self)) == i32::from(u16::max_value()) + } + } + impl Any for $id { + #[inline] + #[target_feature(enable = "sse2")] + unsafe fn any(self) -> bool { + #[cfg(target_arch = "x86")] + use crate::arch::x86::_mm_movemask_epi8; + #[cfg(target_arch = "x86_64")] + use crate::arch::x86_64::_mm_movemask_epi8; + + _mm_movemask_epi8(crate::mem::transmute(self)) != 0 + } + } + }; +} |