diff options
Diffstat (limited to 'vendor/cpufeatures/src/x86.rs')
-rw-r--r-- | vendor/cpufeatures/src/x86.rs | 122 |
1 files changed, 77 insertions, 45 deletions
diff --git a/vendor/cpufeatures/src/x86.rs b/vendor/cpufeatures/src/x86.rs index a60fa0d30..2199f2779 100644 --- a/vendor/cpufeatures/src/x86.rs +++ b/vendor/cpufeatures/src/x86.rs @@ -3,22 +3,23 @@ //! Portable, `no_std`-friendly implementation that relies on the x86 `CPUID` //! instruction for feature detection. -// Evaluate the given `$body` expression any of the supplied target features -// are not enabled. Otherwise returns true. -// -// The `$body` expression is not evaluated on SGX targets, and returns false -// on these targets unless *all* supplied target features are enabled. +/// Evaluate the given `$body` expression any of the supplied target features +/// are not enabled. Otherwise returns true. +/// +/// The `$body` expression is not evaluated on SGX targets, and returns false +/// on these targets unless *all* supplied target features are enabled. #[macro_export] #[doc(hidden)] macro_rules! __unless_target_features { ($($tf:tt),+ => $body:expr ) => {{ #[cfg(not(all($(target_feature=$tf,)*)))] { - #[cfg(not(target_env = "sgx"))] + #[cfg(not(any(target_env = "sgx", target_os = "", target_os = "uefi")))] $body - // CPUID is not available on SGX targets - #[cfg(target_env = "sgx")] + // CPUID is not available on SGX. Freestanding and UEFI targets + // do not support SIMD features with default compilation flags. + #[cfg(any(target_env = "sgx", target_os = "", target_os = "uefi"))] false } @@ -27,7 +28,7 @@ macro_rules! __unless_target_features { }}; } -// Use CPUID to detect the presence of all supplied target features. +/// Use CPUID to detect the presence of all supplied target features. #[macro_export] #[doc(hidden)] macro_rules! __detect_target_features { @@ -60,54 +61,85 @@ macro_rules! __detect_target_features { }}; } +/// Check that OS supports required SIMD registers +#[macro_export] +#[doc(hidden)] +macro_rules! __xgetbv { + ($cr:expr, $mask:expr) => {{ + #[cfg(target_arch = "x86")] + use core::arch::x86 as arch; + #[cfg(target_arch = "x86_64")] + use core::arch::x86_64 as arch; + + // Check bits 26 and 27 + let xmask = 0b11 << 26; + let xsave = $cr[0].ecx & xmask == xmask; + if xsave { + let xcr0 = unsafe { arch::_xgetbv(arch::_XCR_XFEATURE_ENABLED_MASK) }; + (xcr0 & $mask) == $mask + } else { + false + } + }}; +} + macro_rules! __expand_check_macro { - ($(($name:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => { + ($(($name:tt, $reg_cap:tt $(, $i:expr, $reg:ident, $offset:expr)*)),* $(,)?) => { #[macro_export] #[doc(hidden)] macro_rules! check { $( - ($cr:expr, $name) => { - true + ($cr:expr, $name) => {{ + // Register bits are listed here: + // https://wiki.osdev.org/CPU_Registers_x86#Extended_Control_Registers + let reg_cap = match $reg_cap { + // Bit 1 + "xmm" => $crate::__xgetbv!($cr, 0b10), + // Bits 1 and 2 + "ymm" => $crate::__xgetbv!($cr, 0b110), + // Bits 1, 2, 5, 6, and 7 + "zmm" => $crate::__xgetbv!($cr, 0b1110_0110), + _ => true, + }; + reg_cap $( & ($cr[$i].$reg & (1 << $offset) != 0) )* - }; + }}; )* } }; } -// Note that according to the [Intel manual][0] AVX2 and FMA require -// that we check availability of AVX before using them. -// -// [0]: https://www.intel.com/content/dam/develop/external/us/en/documents/36945 __expand_check_macro! { - ("mmx", 0, edx, 23), - ("sse", 0, edx, 25), - ("sse2", 0, edx, 26), - ("sse3", 0, ecx, 0), - ("pclmulqdq", 0, ecx, 1), - ("ssse3", 0, ecx, 9), - ("fma", 0, ecx, 28, 0, ecx, 12), - ("sse4.1", 0, ecx, 19), - ("sse4.2", 0, ecx, 20), - ("popcnt", 0, ecx, 23), - ("aes", 0, ecx, 25), - ("avx", 0, ecx, 28), - ("rdrand", 0, ecx, 30), - ("sgx", 1, ebx, 2), - ("bmi1", 1, ebx, 3), - ("avx2", 0, ecx, 28, 1, ebx, 5), - ("bmi2", 1, ebx, 8), - ("avx512f", 1, ebx, 16), - ("avx512dq", 1, ebx, 17), - ("rdseed", 1, ebx, 18), - ("adx", 1, ebx, 19), - ("avx512ifma", 1, ebx, 21), - ("avx512pf", 1, ebx, 26), - ("avx512er", 1, ebx, 27), - ("avx512cd", 1, ebx, 28), - ("sha", 1, ebx, 29), - ("avx512bw", 1, ebx, 30), - ("avx512vl", 1, ebx, 31), + ("sse3", "xmm", 0, ecx, 0), + ("pclmulqdq", "xmm", 0, ecx, 1), + ("ssse3", "xmm", 0, ecx, 9), + ("fma", "xmm", 0, ecx, 12, 0, ecx, 28), + ("sse4.1", "xmm", 0, ecx, 19), + ("sse4.2", "xmm", 0, ecx, 20), + ("popcnt", "", 0, ecx, 23), + ("aes", "xmm", 0, ecx, 25), + ("avx", "xmm", 0, ecx, 28), + ("rdrand", "", 0, ecx, 30), + + ("mmx", "", 0, edx, 23), + ("sse", "xmm", 0, edx, 25), + ("sse2", "xmm", 0, edx, 26), + + ("sgx", "", 1, ebx, 2), + ("bmi1", "", 1, ebx, 3), + ("bmi2", "", 1, ebx, 8), + ("avx2", "ymm", 1, ebx, 5, 0, ecx, 28), + ("avx512f", "zmm", 1, ebx, 16), + ("avx512dq", "zmm", 1, ebx, 17), + ("rdseed", "", 1, ebx, 18), + ("adx", "", 1, ebx, 19), + ("avx512ifma", "zmm", 1, ebx, 21), + ("avx512pf", "zmm", 1, ebx, 26), + ("avx512er", "zmm", 1, ebx, 27), + ("avx512cd", "zmm", 1, ebx, 28), + ("sha", "xmm", 1, ebx, 29), + ("avx512bw", "zmm", 1, ebx, 30), + ("avx512vl", "zmm", 1, ebx, 31), } |