summaryrefslogtreecommitdiffstats
path: root/vendor/packed_simd_2/src/codegen/reductions
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:13 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:19:13 +0000
commit218caa410aa38c29984be31a5229b9fa717560ee (patch)
treec54bd55eeb6e4c508940a30e94c0032fbd45d677 /vendor/packed_simd_2/src/codegen/reductions
parentReleasing progress-linux version 1.67.1+dfsg1-1~progress7.99u1. (diff)
downloadrustc-218caa410aa38c29984be31a5229b9fa717560ee.tar.xz
rustc-218caa410aa38c29984be31a5229b9fa717560ee.zip
Merging upstream version 1.68.2+dfsg1.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'vendor/packed_simd_2/src/codegen/reductions')
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask.rs6
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs38
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs26
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs4
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs70
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs10
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs3
-rw-r--r--vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs6
8 files changed, 98 insertions, 65 deletions
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask.rs b/vendor/packed_simd_2/src/codegen/reductions/mask.rs
index 97260c6d4..a78bcc563 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask.rs
@@ -1,17 +1,17 @@
//! Code generation workaround for `all()` mask horizontal reduction.
//!
-//! Works arround [LLVM bug 36702].
+//! Works around [LLVM bug 36702].
//!
//! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702
#![allow(unused_macros)]
use crate::*;
-crate trait All: crate::marker::Sized {
+pub(crate) trait All: crate::marker::Sized {
unsafe fn all(self) -> bool;
}
-crate trait Any: crate::marker::Sized {
+pub(crate) trait Any: crate::marker::Sized {
unsafe fn any(self) -> bool;
}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
index e9586eace..b2db52c89 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs
@@ -19,7 +19,7 @@ macro_rules! aarch64_128_neon_impl {
$vmax(crate::mem::transmute(self)) != 0
}
}
- }
+ };
}
/// 64-bit wide vectors
@@ -35,9 +35,7 @@ macro_rules! aarch64_64_neon_impl {
halves: ($id, $id),
vec: $vec128,
}
- U {
- halves: (self, self),
- }.vec.all()
+ U { halves: (self, self) }.vec.all()
}
}
impl Any for $id {
@@ -48,9 +46,7 @@ macro_rules! aarch64_64_neon_impl {
halves: ($id, $id),
vec: $vec128,
}
- U {
- halves: (self, self),
- }.vec.any()
+ U { halves: (self, self) }.vec.any()
}
}
};
@@ -59,13 +55,27 @@ macro_rules! aarch64_64_neon_impl {
/// Mask reduction implementation for `aarch64` targets
macro_rules! impl_mask_reductions {
// 64-bit wide masks
- (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); };
- (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); };
- (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); };
+ (m8x8) => {
+ aarch64_64_neon_impl!(m8x8, m8x16);
+ };
+ (m16x4) => {
+ aarch64_64_neon_impl!(m16x4, m16x8);
+ };
+ (m32x2) => {
+ aarch64_64_neon_impl!(m32x2, m32x4);
+ };
// 128-bit wide masks
- (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); };
- (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); };
- (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); };
+ (m8x16) => {
+ aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8);
+ };
+ (m16x8) => {
+ aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16);
+ };
+ (m32x4) => {
+ aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32);
+ };
// Fallback to LLVM's default code-generation:
- ($id:ident) => { fallback_impl!($id); };
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
index 1987af7a9..41c3cbc58 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs
@@ -15,10 +15,7 @@ macro_rules! arm_128_v7_neon_impl {
vec: $id,
}
let halves = U { vec: self }.halves;
- let h: $half = transmute($vpmin(
- transmute(halves.0),
- transmute(halves.1),
- ));
+ let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1)));
h.all()
}
}
@@ -33,10 +30,7 @@ macro_rules! arm_128_v7_neon_impl {
vec: $id,
}
let halves = U { vec: self }.halves;
- let h: $half = transmute($vpmax(
- transmute(halves.0),
- transmute(halves.1),
- ));
+ let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1)));
h.any()
}
}
@@ -46,9 +40,17 @@ macro_rules! arm_128_v7_neon_impl {
/// Mask reduction implementation for `arm` targets
macro_rules! impl_mask_reductions {
// 128-bit wide masks
- (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); };
- (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); };
- (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); };
+ (m8x16) => {
+ arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8);
+ };
+ (m16x8) => {
+ arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16);
+ };
+ (m32x4) => {
+ arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32);
+ };
// Fallback to LLVM's default code-generation:
- ($id:ident) => { fallback_impl!($id); };
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
index 25e5c813a..4c377a687 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs
@@ -2,5 +2,7 @@
/// Default mask reduction implementation
macro_rules! impl_mask_reductions {
- ($id:ident) => { fallback_impl!($id); };
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
index bcfb1a6e1..4bf509806 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs
@@ -114,17 +114,17 @@ macro_rules! x86_m64x4_impl {
/// Fallback implementation.
macro_rules! x86_intr_impl {
($id:ident) => {
- impl All for $id {
- #[inline]
- unsafe fn all(self) -> bool {
- use crate::llvm::simd_reduce_all;
- simd_reduce_all(self.0)
+ impl All for $id {
+ #[inline]
+ unsafe fn all(self) -> bool {
+ use crate::llvm::simd_reduce_all;
+ simd_reduce_all(self.0)
+ }
}
- }
impl Any for $id {
#[inline]
unsafe fn any(self) -> bool {
- use crate::llvm::simd_reduce_any;
+ use crate::llvm::simd_reduce_any;
simd_reduce_any(self.0)
}
}
@@ -134,21 +134,47 @@ macro_rules! x86_intr_impl {
/// Mask reduction implementation for `x86` and `x86_64` targets
macro_rules! impl_mask_reductions {
// 64-bit wide masks
- (m8x8) => { x86_m8x8_impl!(m8x8); };
- (m16x4) => { x86_m8x8_impl!(m16x4); };
- (m32x2) => { x86_m8x8_impl!(m32x2); };
+ (m8x8) => {
+ x86_m8x8_impl!(m8x8);
+ };
+ (m16x4) => {
+ x86_m8x8_impl!(m16x4);
+ };
+ (m32x2) => {
+ x86_m8x8_impl!(m32x2);
+ };
// 128-bit wide masks
- (m8x16) => { x86_m8x16_impl!(m8x16); };
- (m16x8) => { x86_m8x16_impl!(m16x8); };
- (m32x4) => { x86_m32x4_impl!(m32x4); };
- (m64x2) => { x86_m64x2_impl!(m64x2); };
- (m128x1) => { x86_intr_impl!(m128x1); };
+ (m8x16) => {
+ x86_m8x16_impl!(m8x16);
+ };
+ (m16x8) => {
+ x86_m8x16_impl!(m16x8);
+ };
+ (m32x4) => {
+ x86_m32x4_impl!(m32x4);
+ };
+ (m64x2) => {
+ x86_m64x2_impl!(m64x2);
+ };
+ (m128x1) => {
+ x86_intr_impl!(m128x1);
+ };
// 256-bit wide masks:
- (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); };
- (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); };
- (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); };
- (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); };
- (m128x2) => { x86_intr_impl!(m128x2); };
+ (m8x32) => {
+ x86_m8x32_impl!(m8x32, m8x16);
+ };
+ (m16x16) => {
+ x86_m8x32_impl!(m16x16, m16x8);
+ };
+ (m32x8) => {
+ x86_m32x8_impl!(m32x8, m32x4);
+ };
+ (m64x4) => {
+ x86_m64x4_impl!(m64x4, m64x2);
+ };
+ (m128x2) => {
+ x86_intr_impl!(m128x2);
+ };
(msizex2) => {
cfg_if! {
if #[cfg(target_pointer_width = "64")] {
@@ -184,5 +210,7 @@ macro_rules! impl_mask_reductions {
};
// Fallback to LLVM's default code-generation:
- ($id:ident) => { fallback_impl!($id); };
+ ($id:ident) => {
+ fallback_impl!($id);
+ };
}
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
index d18736fb0..61f352d22 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
@@ -13,10 +13,7 @@ macro_rules! x86_m8x32_avx_impl {
use crate::arch::x86::_mm256_testc_si256;
#[cfg(target_arch = "x86_64")]
use crate::arch::x86_64::_mm256_testc_si256;
- _mm256_testc_si256(
- crate::mem::transmute(self),
- crate::mem::transmute($id::splat(true)),
- ) != 0
+ _mm256_testc_si256(crate::mem::transmute(self), crate::mem::transmute($id::splat(true))) != 0
}
}
impl Any for $id {
@@ -27,10 +24,7 @@ macro_rules! x86_m8x32_avx_impl {
use crate::arch::x86::_mm256_testz_si256;
#[cfg(target_arch = "x86_64")]
use crate::arch::x86_64::_mm256_testz_si256;
- _mm256_testz_si256(
- crate::mem::transmute(self),
- crate::mem::transmute(self),
- ) == 0
+ _mm256_testz_si256(crate::mem::transmute(self), crate::mem::transmute(self)) == 0
}
}
};
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
index eb1ef7fac..e0c9aee92 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs
@@ -16,8 +16,7 @@ macro_rules! x86_m32x4_sse_impl {
// most significant bit of each lane of `a`. If all
// bits are set, then all 4 lanes of the mask are
// true.
- _mm_movemask_ps(crate::mem::transmute(self))
- == 0b_1111_i32
+ _mm_movemask_ps(crate::mem::transmute(self)) == 0b_1111_i32
}
}
impl Any for $id {
diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
index a99c606f5..bbb52fa47 100644
--- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
+++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs
@@ -16,8 +16,7 @@ macro_rules! x86_m64x2_sse2_impl {
// most significant bit of each lane of `a`. If all
// bits are set, then all 2 lanes of the mask are
// true.
- _mm_movemask_pd(crate::mem::transmute(self))
- == 0b_11_i32
+ _mm_movemask_pd(crate::mem::transmute(self)) == 0b_11_i32
}
}
impl Any for $id {
@@ -50,8 +49,7 @@ macro_rules! x86_m8x16_sse2_impl {
// most significant bit of each byte of `a`. If all
// bits are set, then all 16 lanes of the mask are
// true.
- _mm_movemask_epi8(crate::mem::transmute(self))
- == i32::from(u16::max_value())
+ _mm_movemask_epi8(crate::mem::transmute(self)) == i32::from(u16::max_value())
}
}
impl Any for $id {