From 1376c5a617be5c25655d0d7cb63e3beaa5a6e026 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 17 Apr 2024 14:20:39 +0200 Subject: Merging upstream version 1.70.0+dfsg1. Signed-off-by: Daniel Baumann --- library/stdarch/crates/std_detect/README.md | 18 +++- .../crates/std_detect/src/detect/arch/x86.rs | 6 ++ .../stdarch/crates/std_detect/src/detect/mod.rs | 6 ++ .../crates/std_detect/src/detect/os/aarch64.rs | 112 +++++++++++++-------- .../std_detect/src/detect/os/freebsd/aarch64.rs | 18 ---- .../crates/std_detect/src/detect/os/freebsd/arm.rs | 22 +++- .../std_detect/src/detect/os/linux/aarch64.rs | 47 ++++++++- .../std_detect/src/detect/os/linux/auxvec.rs | 22 +++- .../std_detect/src/detect/os/openbsd/aarch64.rs | 55 ++++++++++ .../std_detect/src/detect/os/windows/aarch64.rs | 36 +++++-- .../stdarch/crates/std_detect/src/detect/os/x86.rs | 3 + library/stdarch/crates/std_detect/src/lib.rs | 3 + .../crates/std_detect/tests/cpu-detection.rs | 59 ++++++++++- .../crates/std_detect/tests/x86-specific.rs | 4 + 14 files changed, 325 insertions(+), 86 deletions(-) create mode 100644 library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs (limited to 'library/stdarch/crates/std_detect') diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md index bea7d941a..71f474d65 100644 --- a/library/stdarch/crates/std_detect/README.md +++ b/library/stdarch/crates/std_detect/README.md @@ -30,6 +30,8 @@ run-time feature detection. When this feature is disabled, `std_detect` assumes that [`getauxval`] is linked to the binary. If that is not the case the behavior is undefined. + Note: This feature is ignored on `*-linux-gnu*` targets, since all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html), and we can safely assume [`getauxval`] is linked to the binary. + * `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant methods fail. This feature requires `libstd` as a dependency, preventing the @@ -44,17 +46,25 @@ crate from working on applications in which `std` is not available. the operating system. `std_detect` assumes that the binary is an user-space application. If you need raw support for querying `cpuid`, consider using the [`cupid`](https://crates.io/crates/cupid) crate. - -* Linux: - * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`: `std_detect` + +* Linux/Android: + * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`, `riscv{32,64}`: `std_detect` supports these on Linux by querying ELF auxiliary vectors (using `getauxval` - when available), and if that fails, by querying `/proc/cpuinfo`. + when available), and if that fails, by querying `/proc/cpuinfo`. * `arm64`: partial support for doing run-time feature detection by directly querying `mrs` is implemented for Linux >= 4.11, but not enabled by default. * FreeBSD: + * `arm32`, `powerpc64`: `std_detect` supports these on FreeBSD by querying ELF + auxiliary vectors using `sysctl`. * `arm64`: run-time feature detection is implemented by directly querying `mrs`. +* OpenBSD: + * `arm64`: run-time feature detection is implemented by querying `sysctl`. + +* Windows: + * `arm64`: run-time feature detection is implemented by querying `IsProcessorFeaturePresent`. + # License This project is licensed under either of diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs index d0bf92d3e..828ac5c38 100644 --- a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -91,6 +91,8 @@ features! { /// * `"cmpxchg16b"` /// * `"adx"` /// * `"rtm"` + /// * `"movbe"` + /// * `"ermsb"` /// /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide #[stable(feature = "simd_x86", since = "1.27.0")] @@ -197,4 +199,8 @@ features! { /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm"; /// RTM, Intel (Restricted Transactional Memory) + @FEATURE: #[stable(feature = "movbe_target_feature", since = "1.67.0")] movbe: "movbe"; + /// MOVBE (Move Data After Swapping Bytes) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ermsb: "ermsb"; + /// ERMSB, Enhanced REP MOVSB and STOSB } diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs index 9a135c90a..db7018232 100644 --- a/library/stdarch/crates/std_detect/src/detect/mod.rs +++ b/library/stdarch/crates/std_detect/src/detect/mod.rs @@ -56,6 +56,12 @@ cfg_if! { mod aarch64; #[path = "os/freebsd/mod.rs"] mod os; + } else if #[cfg(all(target_os = "openbsd", target_arch = "aarch64", feature = "libc"))] { + #[allow(dead_code)] // we don't use code that calls the mrs instruction. + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/openbsd/aarch64.rs"] + mod os; } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] { #[path = "os/windows/aarch64.rs"] mod os; diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs index e0e62ee33..5790f0168 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs @@ -15,6 +15,7 @@ //! //! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) //! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) +//! - [ARM documentation](https://developer.arm.com/documentation/ddi0601/2022-12/AArch64-Registers?lang=en) use crate::detect::{cache, Feature}; use core::arch::asm; @@ -23,40 +24,71 @@ use core::arch::asm; /// /// This will cause SIGILL if the current OS is not trapping the mrs instruction. pub(crate) fn detect_features() -> cache::Initializer { - let mut value = cache::Initializer::default(); + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, preserves_flags, nostack) + ); + } - { - let mut enable_feature = |f, enable| { - if enable { - value.set(f as u32); - } - }; + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, preserves_flags, nostack) + ); + } - // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 - let aa64isar0: u64; - unsafe { - asm!( - "mrs {}, ID_AA64ISAR0_EL1", - out(reg) aa64isar0, - options(pure, nomem, preserves_flags, nostack) - ); - } + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + let aa64mmfr2: u64; + unsafe { + asm!( + "mrs {}, ID_AA64MMFR2_EL1", + out(reg) aa64mmfr2, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64PFR0_EL1", + out(reg) aa64pfr0, + options(pure, nomem, preserves_flags, nostack) + ); + } - enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); - enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); - enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1); - enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, Some(aa64pfr0)) +} + +pub(crate) fn parse_system_registers( + aa64isar0: u64, + aa64isar1: u64, + aa64mmfr2: u64, + aa64pfr0: Option, +) -> cache::Initializer { + let mut value = cache::Initializer::default(); - // ID_AA64PFR0_EL1 - Processor Feature Register 0 - let aa64pfr0: u64; - unsafe { - asm!( - "mrs {}, ID_AA64PFR0_EL1", - out(reg) aa64pfr0, - options(pure, nomem, preserves_flags, nostack) - ); + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); } + }; + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); + enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 2); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + if let Some(aa64pfr0) = aa64pfr0 { let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; @@ -77,23 +109,17 @@ pub(crate) fn detect_features() -> cache::Initializer { asimd && bits_shift(aa64isar0, 47, 44) >= 1, ); enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + } - // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 - let aa64isar1: u64; - unsafe { - asm!( - "mrs {}, ID_AA64ISAR1_EL1", - out(reg) aa64isar1, - options(pure, nomem, preserves_flags, nostack) - ); - } + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + // Check for either APA or API field + enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + // Check for either GPA or GPI field + enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); - // Check for either APA or API field - enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); - enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); - // Check for either GPA or GPI field - enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); - } + // ID_AA64MMFR2_EL1 - AArch64 Memory Model Feature Register 2 + enable_feature(Feature::lse2, bits_shift(aa64mmfr2, 35, 32) >= 1); value } diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs index 7d972b373..ccc48f536 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs @@ -1,21 +1,3 @@ //! Run-time feature detection for Aarch64 on FreeBSD. pub(crate) use super::super::aarch64::detect_features; - -#[cfg(test)] -mod tests { - #[test] - fn dump() { - println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); - println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); - println!("fp: {:?}", is_aarch64_feature_detected!("fp")); - println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); - println!("sve: {:?}", is_aarch64_feature_detected!("sve")); - println!("crc: {:?}", is_aarch64_feature_detected!("crc")); - println!("lse: {:?}", is_aarch64_feature_detected!("lse")); - println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); - println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); - println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); - println!("tme: {:?}", is_aarch64_feature_detected!("tme")); - } -} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs index 4c9d763b4..97ede1d26 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs @@ -3,6 +3,15 @@ use super::auxvec; use crate::detect::{cache, Feature}; +// Defined in machine/elf.h. +// https://github.com/freebsd/freebsd-src/blob/deb63adf945d446ed91a9d84124c71f15ae571d1/sys/arm/include/elf.h +const HWCAP_NEON: usize = 0x00001000; +const HWCAP2_AES: usize = 0x00000001; +const HWCAP2_PMULL: usize = 0x00000002; +const HWCAP2_SHA1: usize = 0x00000004; +const HWCAP2_SHA2: usize = 0x00000008; +const HWCAP2_CRC32: usize = 0x00000010; + /// Try to read the features from the auxiliary vector pub(crate) fn detect_features() -> cache::Initializer { let mut value = cache::Initializer::default(); @@ -13,8 +22,17 @@ pub(crate) fn detect_features() -> cache::Initializer { }; if let Ok(auxv) = auxvec::auxv() { - enable_feature(&mut value, Feature::neon, auxv.hwcap & 0x00001000 != 0); - enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & 0x00000002 != 0); + enable_feature(&mut value, Feature::neon, auxv.hwcap & HWCAP_NEON != 0); + let pmull = auxv.hwcap2 & HWCAP2_PMULL != 0; + enable_feature(&mut value, Feature::pmull, pmull); + enable_feature(&mut value, Feature::crc, auxv.hwcap2 & HWCAP2_CRC32 != 0); + let aes = auxv.hwcap2 & HWCAP2_AES != 0; + enable_feature(&mut value, Feature::aes, aes); + // SHA2 requires SHA1 & SHA2 features + let sha1 = auxv.hwcap2 & HWCAP2_SHA1 != 0; + let sha2 = auxv.hwcap2 & HWCAP2_SHA2 != 0; + enable_feature(&mut value, Feature::sha2, sha1 && sha2); + enable_feature(&mut value, Feature::crypto, aes && pmull && sha1 && sha2); return value; } value diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs index a75185d43..caaa39f14 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -6,14 +6,34 @@ use crate::detect::{bit, cache, Feature}; /// Try to read the features from the auxiliary vector, and if that fails, try /// to read them from /proc/cpuinfo. pub(crate) fn detect_features() -> cache::Initializer { + #[cfg(target_os = "android")] + let is_exynos9810 = { + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // https://reviews.llvm.org/D114523 + let mut arch = [0_u8; libc::PROP_VALUE_MAX as usize]; + let len = unsafe { + libc::__system_property_get( + b"ro.arch\0".as_ptr() as *const libc::c_char, + arch.as_mut_ptr() as *mut libc::c_char, + ) + }; + // On Exynos, ro.arch is not available on Android 12+, but it is fine + // because Android 9+ includes the fix. + len > 0 && arch.starts_with(b"exynos9810") + }; + #[cfg(not(target_os = "android"))] + let is_exynos9810 = false; + if let Ok(auxv) = auxvec::auxv() { let hwcap: AtHwcap = auxv.into(); - return hwcap.cache(); + return hwcap.cache(is_exynos9810); } #[cfg(feature = "std_detect_file_io")] if let Ok(c) = super::cpuinfo::CpuInfo::new() { let hwcap: AtHwcap = c.into(); - return hwcap.cache(); + return hwcap.cache(is_exynos9810); } cache::Initializer::default() } @@ -207,9 +227,9 @@ impl From for AtHwcap { impl AtHwcap { /// Initializes the cache from the feature -bits. /// - /// The feature dependencies here come directly from LLVM's feature definintions: + /// The feature dependencies here come directly from LLVM's feature definitions: /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td - fn cache(self) -> cache::Initializer { + fn cache(self, is_exynos9810: bool) -> cache::Initializer { let mut value = cache::Initializer::default(); { let mut enable_feature = |f, enable| { @@ -218,6 +238,25 @@ impl AtHwcap { } }; + // Samsung Exynos 9810 has a bug that big and little cores have different + // ISAs. And on older Android (pre-9), the kernel incorrectly reports + // that features available only on some cores are available on all cores. + // So, only check features that are known to be available on exynos-m3: + // $ rustc --print cfg --target aarch64-linux-android -C target-cpu=exynos-m3 | grep target_feature + // See also https://github.com/rust-lang/stdarch/pull/1378#discussion_r1103748342. + if is_exynos9810 { + enable_feature(Feature::fp, self.fp); + enable_feature(Feature::crc, self.crc32); + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + return value; + } + enable_feature(Feature::fp, self.fp); // Half-float support requires float support enable_feature(Feature::fp16, self.fp && self.fphp); diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs index d9e7b28ea..11d9c103e 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -52,7 +52,12 @@ pub(crate) struct AuxVec { /// Note that run-time feature detection is not invoked for features that can /// be detected at compile-time. Also note that if this function returns an /// error, cpuinfo still can (and will) be used to try to perform run-time -/// feature detecton on some platforms. +/// feature detection on some platforms. +/// +/// Note: The `std_detect_dlsym_getauxval` cargo feature is ignored on `*-linux-gnu*` targets, +/// since [all `*-linux-gnu*` targets ([since Rust 1.64](https://blog.rust-lang.org/2022/08/01/Increasing-glibc-kernel-requirements.html)) +/// have glibc requirements higher than [glibc 2.16 that added `getauxval`](https://sourceware.org/legacy-ml/libc-announce/2012/msg00000.html), +/// and we can safely assume [`getauxval`] is linked to the binary. /// /// For more information about when `getauxval` is available check the great /// [`auxv` crate documentation][auxv_docs]. @@ -60,7 +65,10 @@ pub(crate) struct AuxVec { /// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h /// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ pub(crate) fn auxv() -> Result { - #[cfg(feature = "std_detect_dlsym_getauxval")] + #[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all(target_os = "linux", target_env = "gnu")) + ))] { // Try to call a dynamically-linked getauxval function. if let Ok(hwcap) = getauxval(AT_HWCAP) { @@ -101,7 +109,10 @@ pub(crate) fn auxv() -> Result { } } - #[cfg(not(feature = "std_detect_dlsym_getauxval"))] + #[cfg(any( + not(feature = "std_detect_dlsym_getauxval"), + all(target_os = "linux", target_env = "gnu") + ))] { // Targets with only AT_HWCAP: #[cfg(any( @@ -154,7 +165,10 @@ pub(crate) fn auxv() -> Result { /// Tries to read the `key` from the auxiliary vector by calling the /// dynamically-linked `getauxval` function. If the function is not linked, /// this function return `Err`. -#[cfg(feature = "std_detect_dlsym_getauxval")] +#[cfg(all( + feature = "std_detect_dlsym_getauxval", + not(all(target_os = "linux", target_env = "gnu")) +))] fn getauxval(key: usize) -> Result { use libc; pub type F = unsafe extern "C" fn(usize) -> usize; diff --git a/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs new file mode 100644 index 000000000..cfe4ad10a --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/openbsd/aarch64.rs @@ -0,0 +1,55 @@ +//! Run-time feature detection for Aarch64 on OpenBSD. +//! +//! OpenBSD doesn't trap the mrs instruction, but exposes the system registers through sysctl. +//! https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 +//! https://github.com/golang/go/commit/cd54ef1f61945459486e9eea2f016d99ef1da925 + +use crate::detect::cache; +use core::{mem::MaybeUninit, ptr}; + +// Defined in machine/cpu.h. +// https://github.com/openbsd/src/blob/72ccc03bd11da614f31f7ff76e3f6fce99bc1c79/sys/arch/arm64/include/cpu.h#L25-L40 +const CPU_ID_AA64ISAR0: libc::c_int = 2; +const CPU_ID_AA64ISAR1: libc::c_int = 3; +const CPU_ID_AA64MMFR2: libc::c_int = 7; +const CPU_ID_AA64PFR0: libc::c_int = 8; + +/// Try to read the features from the system registers. +pub(crate) fn detect_features() -> cache::Initializer { + // ID_AA64ISAR0_EL1 and ID_AA64ISAR1_EL1 are supported on OpenBSD 7.1+. + // https://github.com/openbsd/src/commit/d335af936b9d7dd9cf655cae1ce19560c45de6c8 + // Others are supported on OpenBSD 7.3+. + // https://github.com/openbsd/src/commit/c7654cd65262d532212f65123ee3905ba200365c + // sysctl returns an unsupported error if operation is not supported, + // so we can safely use this function on older versions of OpenBSD. + let aa64isar0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR0]).unwrap_or(0); + let aa64isar1 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64ISAR1]).unwrap_or(0); + let aa64mmfr2 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64MMFR2]).unwrap_or(0); + // Do not use unwrap_or(0) because in fp and asimd fields, 0 indicates that + // the feature is available. + let aa64pfr0 = sysctl64(&[libc::CTL_MACHDEP, CPU_ID_AA64PFR0]); + + super::aarch64::parse_system_registers(aa64isar0, aa64isar1, aa64mmfr2, aa64pfr0) +} + +#[inline] +fn sysctl64(mib: &[libc::c_int]) -> Option { + const OUT_LEN: libc::size_t = core::mem::size_of::(); + let mut out = MaybeUninit::::uninit(); + let mut out_len = OUT_LEN; + let res = unsafe { + libc::sysctl( + mib.as_ptr(), + mib.len() as libc::c_uint, + out.as_mut_ptr() as *mut libc::c_void, + &mut out_len, + ptr::null_mut(), + 0, + ) + }; + if res == -1 || out_len != OUT_LEN { + return None; + } + // SAFETY: we've checked that sysctl was successful and `out` was filled. + Some(unsafe { out.assume_init() }) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs index 051ad6d1b..faded671c 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs @@ -11,9 +11,14 @@ pub(crate) fn detect_features() -> cache::Initializer { // The following Microsoft documents isn't updated for aarch64. // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent // These are defined in winnt.h of Windows SDK + const PF_ARM_VFP_32_REGISTERS_AVAILABLE: u32 = 18; const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19; const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30; const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31; + const PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE: u32 = 34; + const PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE: u32 = 43; + const PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE: u32 = 44; + const PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE: u32 = 45; extern "system" { pub fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; @@ -27,10 +32,14 @@ pub(crate) fn detect_features() -> cache::Initializer { } }; - // Some features such Feature::fp may be supported on current CPU, + // Some features may be supported on current CPU, // but no way to detect it by OS API. // Also, we require unsafe block for the extern "system" calls. unsafe { + enable_feature( + Feature::fp, + IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != FALSE, + ); enable_feature( Feature::asimd, IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE, @@ -39,20 +48,29 @@ pub(crate) fn detect_features() -> cache::Initializer { Feature::crc, IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE, ); - // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and - // pmull support enable_feature( - Feature::aes, - IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE, + Feature::lse, + IsProcessorFeaturePresent(PF_ARM_V81_ATOMIC_INSTRUCTIONS_AVAILABLE) != FALSE, ); enable_feature( - Feature::pmull, - IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE, + Feature::dotprod, + IsProcessorFeaturePresent(PF_ARM_V82_DP_INSTRUCTIONS_AVAILABLE) != FALSE, ); enable_feature( - Feature::sha2, - IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE, + Feature::jsconv, + IsProcessorFeaturePresent(PF_ARM_V83_JSCVT_INSTRUCTIONS_AVAILABLE) != FALSE, ); + enable_feature( + Feature::rcpc, + IsProcessorFeaturePresent(PF_ARM_V83_LRCPC_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and + // pmull support + let crypto = + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE; + enable_feature(Feature::aes, crypto); + enable_feature(Feature::pmull, crypto); + enable_feature(Feature::sha2, crypto); } } value diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs index 08f48cd17..d8afc1aca 100644 --- a/library/stdarch/crates/std_detect/src/detect/os/x86.rs +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -111,6 +111,7 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(proc_info_ecx, 13, Feature::cmpxchg16b); enable(proc_info_ecx, 19, Feature::sse4_1); enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 22, Feature::movbe); enable(proc_info_ecx, 23, Feature::popcnt); enable(proc_info_ecx, 25, Feature::aes); enable(proc_info_ecx, 29, Feature::f16c); @@ -128,6 +129,8 @@ pub(crate) fn detect_features() -> cache::Initializer { enable(extended_features_ebx, 3, Feature::bmi1); enable(extended_features_ebx, 8, Feature::bmi2); + enable(extended_features_ebx, 9, Feature::ermsb); + // `XSAVE` and `AVX` support: let cpu_xsave = bit::test(proc_info_ecx as usize, 26); if cpu_xsave { diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs index c0e0de0dd..c0819218c 100644 --- a/library/stdarch/crates/std_detect/src/lib.rs +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -19,6 +19,9 @@ #![deny(clippy::missing_inline_in_public_items)] #![cfg_attr(test, allow(unused_imports))] #![no_std] +// FIXME(Nilstrieb): Remove this once the compiler in stdarch CI has the internal_features lint. +#![allow(unknown_lints)] +#![allow(internal_features)] #[cfg(test)] #[macro_use] diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs index 02ad77a63..eb3a3e409 100644 --- a/library/stdarch/crates/std_detect/tests/cpu-detection.rs +++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs @@ -20,8 +20,11 @@ fn all() { } #[test] -#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))] -fn arm_linux() { +#[cfg(all( + target_arch = "arm", + any(target_os = "linux", target_os = "android", target_os = "freebsd"), +))] +fn arm_linux_or_freebsd() { println!("neon: {}", is_arm_feature_detected!("neon")); println!("pmull: {}", is_arm_feature_detected!("pmull")); println!("crc: {}", is_arm_feature_detected!("crc")); @@ -83,6 +86,45 @@ fn aarch64_linux() { println!("sm4: {}", is_aarch64_feature_detected!("sm4")); } +#[test] +#[cfg(all(target_arch = "aarch64", target_os = "windows"))] +fn aarch64_windows() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("jsconv: {:?}", is_aarch64_feature_detected!("jsconv")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); +} + +#[test] +#[cfg(all( + target_arch = "aarch64", + any(target_os = "freebsd", target_os = "openbsd") +))] +fn aarch64_bsd() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("sve: {:?}", is_aarch64_feature_detected!("sve")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("lse2: {:?}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {:?}", is_aarch64_feature_detected!("tme")); + println!("paca: {:?}", is_aarch64_feature_detected!("paca")); + println!("pacg: {:?}", is_aarch64_feature_detected!("pacg")); + println!("aes: {:?}", is_aarch64_feature_detected!("aes")); + println!("sha2: {:?}", is_aarch64_feature_detected!("sha2")); +} + #[test] #[cfg(all(target_arch = "powerpc", target_os = "linux"))] fn powerpc_linux() { @@ -152,6 +194,7 @@ fn x86_all() { println!("abm: {:?}", is_x86_feature_detected!("abm")); println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); println!("tbm: {:?}", is_x86_feature_detected!("tbm")); + println!("movbe: {:?}", is_x86_feature_detected!("movbe")); println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); println!("xsave: {:?}", is_x86_feature_detected!("xsave")); @@ -159,3 +202,15 @@ fn x86_all() { println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); } + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +#[allow(deprecated)] +fn x86_deprecated() { + println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni")); + println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes")); + println!( + "avx512vpclmulqdq {:?}", + is_x86_feature_detected!("avx512vpclmulqdq") + ); +} diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs index e481620c7..38512c758 100644 --- a/library/stdarch/crates/std_detect/tests/x86-specific.rs +++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs @@ -20,6 +20,7 @@ fn dump() { println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); println!("sha: {:?}", is_x86_feature_detected!("sha")); + println!("f16c: {:?}", is_x86_feature_detected!("f16c")); println!("avx: {:?}", is_x86_feature_detected!("avx")); println!("avx2: {:?}", is_x86_feature_detected!("avx2")); println!("avx512f {:?}", is_x86_feature_detected!("avx512f")); @@ -64,6 +65,7 @@ fn dump() { println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); println!("adx: {:?}", is_x86_feature_detected!("adx")); println!("rtm: {:?}", is_x86_feature_detected!("rtm")); + println!("movbe: {:?}", is_x86_feature_detected!("movbe")); } #[cfg(feature = "std_detect_env_override")] @@ -108,6 +110,7 @@ fn compare_with_cupid() { assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2()); assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a()); assert_eq!(is_x86_feature_detected!("sha"), information.sha()); + assert_eq!(is_x86_feature_detected!("f16c"), information.f16c()); assert_eq!(is_x86_feature_detected!("avx"), information.avx()); assert_eq!(is_x86_feature_detected!("avx2"), information.avx2()); assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f()); @@ -152,4 +155,5 @@ fn compare_with_cupid() { ); assert_eq!(is_x86_feature_detected!("adx"), information.adx(),); assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),); + assert_eq!(is_x86_feature_detected!("movbe"), information.movbe(),); } -- cgit v1.2.3