From 218caa410aa38c29984be31a5229b9fa717560ee Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:19:13 +0200 Subject: Merging upstream version 1.68.2+dfsg1. Signed-off-by: Daniel Baumann --- .../packed_simd_2/src/codegen/reductions/mask.rs | 6 +- .../src/codegen/reductions/mask/aarch64.rs | 38 +++++++----- .../src/codegen/reductions/mask/arm.rs | 26 ++++---- .../src/codegen/reductions/mask/fallback.rs | 4 +- .../src/codegen/reductions/mask/x86.rs | 70 +++++++++++++++------- .../src/codegen/reductions/mask/x86/avx.rs | 10 +--- .../src/codegen/reductions/mask/x86/sse.rs | 3 +- .../src/codegen/reductions/mask/x86/sse2.rs | 6 +- 8 files changed, 98 insertions(+), 65 deletions(-) (limited to 'vendor/packed_simd_2/src/codegen/reductions') diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask.rs b/vendor/packed_simd_2/src/codegen/reductions/mask.rs index 97260c6d4..a78bcc563 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask.rs @@ -1,17 +1,17 @@ //! Code generation workaround for `all()` mask horizontal reduction. //! -//! Works arround [LLVM bug 36702]. +//! Works around [LLVM bug 36702]. //! //! [LLVM bug 36702]: https://bugs.llvm.org/show_bug.cgi?id=36702 #![allow(unused_macros)] use crate::*; -crate trait All: crate::marker::Sized { +pub(crate) trait All: crate::marker::Sized { unsafe fn all(self) -> bool; } -crate trait Any: crate::marker::Sized { +pub(crate) trait Any: crate::marker::Sized { unsafe fn any(self) -> bool; } diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs index e9586eace..b2db52c89 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/aarch64.rs @@ -19,7 +19,7 @@ macro_rules! aarch64_128_neon_impl { $vmax(crate::mem::transmute(self)) != 0 } } - } + }; } /// 64-bit wide vectors @@ -35,9 +35,7 @@ macro_rules! aarch64_64_neon_impl { halves: ($id, $id), vec: $vec128, } - U { - halves: (self, self), - }.vec.all() + U { halves: (self, self) }.vec.all() } } impl Any for $id { @@ -48,9 +46,7 @@ macro_rules! aarch64_64_neon_impl { halves: ($id, $id), vec: $vec128, } - U { - halves: (self, self), - }.vec.any() + U { halves: (self, self) }.vec.any() } } }; @@ -59,13 +55,27 @@ macro_rules! aarch64_64_neon_impl { /// Mask reduction implementation for `aarch64` targets macro_rules! impl_mask_reductions { // 64-bit wide masks - (m8x8) => { aarch64_64_neon_impl!(m8x8, m8x16); }; - (m16x4) => { aarch64_64_neon_impl!(m16x4, m16x8); }; - (m32x2) => { aarch64_64_neon_impl!(m32x2, m32x4); }; + (m8x8) => { + aarch64_64_neon_impl!(m8x8, m8x16); + }; + (m16x4) => { + aarch64_64_neon_impl!(m16x4, m16x8); + }; + (m32x2) => { + aarch64_64_neon_impl!(m32x2, m32x4); + }; // 128-bit wide masks - (m8x16) => { aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); }; - (m16x8) => { aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); }; - (m32x4) => { aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); }; + (m8x16) => { + aarch64_128_neon_impl!(m8x16, vminvq_u8, vmaxvq_u8); + }; + (m16x8) => { + aarch64_128_neon_impl!(m16x8, vminvq_u16, vmaxvq_u16); + }; + (m32x4) => { + aarch64_128_neon_impl!(m32x4, vminvq_u32, vmaxvq_u32); + }; // Fallback to LLVM's default code-generation: - ($id:ident) => { fallback_impl!($id); }; + ($id:ident) => { + fallback_impl!($id); + }; } diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs index 1987af7a9..41c3cbc58 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/arm.rs @@ -15,10 +15,7 @@ macro_rules! arm_128_v7_neon_impl { vec: $id, } let halves = U { vec: self }.halves; - let h: $half = transmute($vpmin( - transmute(halves.0), - transmute(halves.1), - )); + let h: $half = transmute($vpmin(transmute(halves.0), transmute(halves.1))); h.all() } } @@ -33,10 +30,7 @@ macro_rules! arm_128_v7_neon_impl { vec: $id, } let halves = U { vec: self }.halves; - let h: $half = transmute($vpmax( - transmute(halves.0), - transmute(halves.1), - )); + let h: $half = transmute($vpmax(transmute(halves.0), transmute(halves.1))); h.any() } } @@ -46,9 +40,17 @@ macro_rules! arm_128_v7_neon_impl { /// Mask reduction implementation for `arm` targets macro_rules! impl_mask_reductions { // 128-bit wide masks - (m8x16) => { arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); }; - (m16x8) => { arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); }; - (m32x4) => { arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); }; + (m8x16) => { + arm_128_v7_neon_impl!(m8x16, m8x8, vpmin_u8, vpmax_u8); + }; + (m16x8) => { + arm_128_v7_neon_impl!(m16x8, m16x4, vpmin_u16, vpmax_u16); + }; + (m32x4) => { + arm_128_v7_neon_impl!(m32x4, m32x2, vpmin_u32, vpmax_u32); + }; // Fallback to LLVM's default code-generation: - ($id:ident) => { fallback_impl!($id); }; + ($id:ident) => { + fallback_impl!($id); + }; } diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs index 25e5c813a..4c377a687 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/fallback.rs @@ -2,5 +2,7 @@ /// Default mask reduction implementation macro_rules! impl_mask_reductions { - ($id:ident) => { fallback_impl!($id); }; + ($id:ident) => { + fallback_impl!($id); + }; } diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs index bcfb1a6e1..4bf509806 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86.rs @@ -114,17 +114,17 @@ macro_rules! x86_m64x4_impl { /// Fallback implementation. macro_rules! x86_intr_impl { ($id:ident) => { - impl All for $id { - #[inline] - unsafe fn all(self) -> bool { - use crate::llvm::simd_reduce_all; - simd_reduce_all(self.0) + impl All for $id { + #[inline] + unsafe fn all(self) -> bool { + use crate::llvm::simd_reduce_all; + simd_reduce_all(self.0) + } } - } impl Any for $id { #[inline] unsafe fn any(self) -> bool { - use crate::llvm::simd_reduce_any; + use crate::llvm::simd_reduce_any; simd_reduce_any(self.0) } } @@ -134,21 +134,47 @@ macro_rules! x86_intr_impl { /// Mask reduction implementation for `x86` and `x86_64` targets macro_rules! impl_mask_reductions { // 64-bit wide masks - (m8x8) => { x86_m8x8_impl!(m8x8); }; - (m16x4) => { x86_m8x8_impl!(m16x4); }; - (m32x2) => { x86_m8x8_impl!(m32x2); }; + (m8x8) => { + x86_m8x8_impl!(m8x8); + }; + (m16x4) => { + x86_m8x8_impl!(m16x4); + }; + (m32x2) => { + x86_m8x8_impl!(m32x2); + }; // 128-bit wide masks - (m8x16) => { x86_m8x16_impl!(m8x16); }; - (m16x8) => { x86_m8x16_impl!(m16x8); }; - (m32x4) => { x86_m32x4_impl!(m32x4); }; - (m64x2) => { x86_m64x2_impl!(m64x2); }; - (m128x1) => { x86_intr_impl!(m128x1); }; + (m8x16) => { + x86_m8x16_impl!(m8x16); + }; + (m16x8) => { + x86_m8x16_impl!(m16x8); + }; + (m32x4) => { + x86_m32x4_impl!(m32x4); + }; + (m64x2) => { + x86_m64x2_impl!(m64x2); + }; + (m128x1) => { + x86_intr_impl!(m128x1); + }; // 256-bit wide masks: - (m8x32) => { x86_m8x32_impl!(m8x32, m8x16); }; - (m16x16) => { x86_m8x32_impl!(m16x16, m16x8); }; - (m32x8) => { x86_m32x8_impl!(m32x8, m32x4); }; - (m64x4) => { x86_m64x4_impl!(m64x4, m64x2); }; - (m128x2) => { x86_intr_impl!(m128x2); }; + (m8x32) => { + x86_m8x32_impl!(m8x32, m8x16); + }; + (m16x16) => { + x86_m8x32_impl!(m16x16, m16x8); + }; + (m32x8) => { + x86_m32x8_impl!(m32x8, m32x4); + }; + (m64x4) => { + x86_m64x4_impl!(m64x4, m64x2); + }; + (m128x2) => { + x86_intr_impl!(m128x2); + }; (msizex2) => { cfg_if! { if #[cfg(target_pointer_width = "64")] { @@ -184,5 +210,7 @@ macro_rules! impl_mask_reductions { }; // Fallback to LLVM's default code-generation: - ($id:ident) => { fallback_impl!($id); }; + ($id:ident) => { + fallback_impl!($id); + }; } diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs index d18736fb0..61f352d22 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs @@ -13,10 +13,7 @@ macro_rules! x86_m8x32_avx_impl { use crate::arch::x86::_mm256_testc_si256; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_testc_si256; - _mm256_testc_si256( - crate::mem::transmute(self), - crate::mem::transmute($id::splat(true)), - ) != 0 + _mm256_testc_si256(crate::mem::transmute(self), crate::mem::transmute($id::splat(true))) != 0 } } impl Any for $id { @@ -27,10 +24,7 @@ macro_rules! x86_m8x32_avx_impl { use crate::arch::x86::_mm256_testz_si256; #[cfg(target_arch = "x86_64")] use crate::arch::x86_64::_mm256_testz_si256; - _mm256_testz_si256( - crate::mem::transmute(self), - crate::mem::transmute(self), - ) == 0 + _mm256_testz_si256(crate::mem::transmute(self), crate::mem::transmute(self)) == 0 } } }; diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs index eb1ef7fac..e0c9aee92 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse.rs @@ -16,8 +16,7 @@ macro_rules! x86_m32x4_sse_impl { // most significant bit of each lane of `a`. If all // bits are set, then all 4 lanes of the mask are // true. - _mm_movemask_ps(crate::mem::transmute(self)) - == 0b_1111_i32 + _mm_movemask_ps(crate::mem::transmute(self)) == 0b_1111_i32 } } impl Any for $id { diff --git a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs index a99c606f5..bbb52fa47 100644 --- a/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs +++ b/vendor/packed_simd_2/src/codegen/reductions/mask/x86/sse2.rs @@ -16,8 +16,7 @@ macro_rules! x86_m64x2_sse2_impl { // most significant bit of each lane of `a`. If all // bits are set, then all 2 lanes of the mask are // true. - _mm_movemask_pd(crate::mem::transmute(self)) - == 0b_11_i32 + _mm_movemask_pd(crate::mem::transmute(self)) == 0b_11_i32 } } impl Any for $id { @@ -50,8 +49,7 @@ macro_rules! x86_m8x16_sse2_impl { // most significant bit of each byte of `a`. If all // bits are set, then all 16 lanes of the mask are // true. - _mm_movemask_epi8(crate::mem::transmute(self)) - == i32::from(u16::max_value()) + _mm_movemask_epi8(crate::mem::transmute(self)) == i32::from(u16::max_value()) } } impl Any for $id { -- cgit v1.2.3