summaryrefslogtreecommitdiffstats
path: root/vendor/packed_simd_2/src/codegen/reductions/mask/x86/avx.rs
blob: d18736fb0399c315c0b2d8eaf4e872a76963e777 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
//! Mask reductions implementation for `x86` and `x86_64` targets with `AVX`

/// `x86`/`x86_64` 256-bit `AVX` implementation
/// FIXME: it might be faster here to do two `_mm_movmask_epi8`
#[cfg(target_feature = "avx")]
macro_rules! x86_m8x32_avx_impl {
    ($id:ident) => {
        impl All for $id {
            #[inline]
            #[target_feature(enable = "avx")]
            unsafe fn all(self) -> bool {
                #[cfg(target_arch = "x86")]
                use crate::arch::x86::_mm256_testc_si256;
                #[cfg(target_arch = "x86_64")]
                use crate::arch::x86_64::_mm256_testc_si256;
                _mm256_testc_si256(
                    crate::mem::transmute(self),
                    crate::mem::transmute($id::splat(true)),
                ) != 0
            }
        }
        impl Any for $id {
            #[inline]
            #[target_feature(enable = "avx")]
            unsafe fn any(self) -> bool {
                #[cfg(target_arch = "x86")]
                use crate::arch::x86::_mm256_testz_si256;
                #[cfg(target_arch = "x86_64")]
                use crate::arch::x86_64::_mm256_testz_si256;
                _mm256_testz_si256(
                    crate::mem::transmute(self),
                    crate::mem::transmute(self),
                ) == 0
            }
        }
    };
}

/// `x86`/`x86_64` 256-bit m32x8 `AVX` implementation
macro_rules! x86_m32x8_avx_impl {
    ($id:ident) => {
        impl All for $id {
            #[inline]
            #[target_feature(enable = "sse")]
            unsafe fn all(self) -> bool {
                #[cfg(target_arch = "x86")]
                use crate::arch::x86::_mm256_movemask_ps;
                #[cfg(target_arch = "x86_64")]
                use crate::arch::x86_64::_mm256_movemask_ps;
                // _mm256_movemask_ps(a) creates a 8bit mask containing the
                // most significant bit of each lane of `a`. If all bits are
                // set, then all 8 lanes of the mask are true.
                _mm256_movemask_ps(crate::mem::transmute(self)) == 0b_1111_1111_i32
            }
        }
        impl Any for $id {
            #[inline]
            #[target_feature(enable = "sse")]
            unsafe fn any(self) -> bool {
                #[cfg(target_arch = "x86")]
                use crate::arch::x86::_mm256_movemask_ps;
                #[cfg(target_arch = "x86_64")]
                use crate::arch::x86_64::_mm256_movemask_ps;

                _mm256_movemask_ps(crate::mem::transmute(self)) != 0
            }
        }
    };
}

/// `x86`/`x86_64` 256-bit m64x4 `AVX` implementation
macro_rules! x86_m64x4_avx_impl {
    ($id:ident) => {
        impl All for $id {
            #[inline]
            #[target_feature(enable = "sse")]
            unsafe fn all(self) -> bool {
                #[cfg(target_arch = "x86")]
                use crate::arch::x86::_mm256_movemask_pd;
                #[cfg(target_arch = "x86_64")]
                use crate::arch::x86_64::_mm256_movemask_pd;
                // _mm256_movemask_pd(a) creates a 4bit mask containing the
                // most significant bit of each lane of `a`. If all bits are
                // set, then all 4 lanes of the mask are true.
                _mm256_movemask_pd(crate::mem::transmute(self)) == 0b_1111_i32
            }
        }
        impl Any for $id {
            #[inline]
            #[target_feature(enable = "sse")]
            unsafe fn any(self) -> bool {
                #[cfg(target_arch = "x86")]
                use crate::arch::x86::_mm256_movemask_pd;
                #[cfg(target_arch = "x86_64")]
                use crate::arch::x86_64::_mm256_movemask_pd;

                _mm256_movemask_pd(crate::mem::transmute(self)) != 0
            }
        }
    };
}