diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-17 12:02:58 +0000 |
commit | 698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch) | |
tree | 173a775858bd501c378080a10dca74132f05bc50 /library/stdarch/crates/std_detect | |
parent | Initial commit. (diff) | |
download | rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip |
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/stdarch/crates/std_detect')
41 files changed, 3801 insertions, 0 deletions
diff --git a/library/stdarch/crates/std_detect/Cargo.toml b/library/stdarch/crates/std_detect/Cargo.toml new file mode 100644 index 000000000..1ca0d9c5d --- /dev/null +++ b/library/stdarch/crates/std_detect/Cargo.toml @@ -0,0 +1,45 @@ +[package] +name = "std_detect" +version = "0.1.5" +authors = [ + "Alex Crichton <alex@alexcrichton.com>", + "Andrew Gallant <jamslam@gmail.com>", + "Gonzalo Brito Gadeschi <gonzalobg88@gmail.com>", +] +description = "`std::detect` - Rust's standard library run-time CPU feature detection." +homepage = "https://github.com/rust-lang/stdarch" +repository = "https://github.com/rust-lang/stdarch" +readme = "README.md" +keywords = ["std", "run-time", "feature", "detection"] +categories = ["hardware-support"] +license = "MIT OR Apache-2.0" +edition = "2018" + +[badges] +is-it-maintained-issue-resolution = { repository = "rust-lang/stdarch" } +is-it-maintained-open-issues = { repository = "rust-lang/stdarch" } +maintenance = { status = "experimental" } + +[dependencies] +libc = { version = "0.2", optional = true, default-features = false } +cfg-if = "0.1.10" + +# When built as part of libstd +core = { version = "1.0.0", optional = true, package = "rustc-std-workspace-core" } +compiler_builtins = { version = "0.1.2", optional = true } +alloc = { version = "1.0.0", optional = true, package = "rustc-std-workspace-alloc" } + +[dev-dependencies] +auxv = "0.3.3" +cupid = "0.6.0" + +[features] +default = [ "std_detect_dlsym_getauxval", "std_detect_file_io" ] +std_detect_file_io = [ "libc" ] +std_detect_dlsym_getauxval = [ "libc" ] +std_detect_env_override = [ "libc" ] +rustc-dep-of-std = [ + "core", + "compiler_builtins", + "alloc", +] diff --git a/library/stdarch/crates/std_detect/LICENSE-APACHE b/library/stdarch/crates/std_detect/LICENSE-APACHE new file mode 100644 index 000000000..16fe87b06 --- /dev/null +++ b/library/stdarch/crates/std_detect/LICENSE-APACHE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + +TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + +1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + +2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + +3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + +4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + +5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + +6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + +7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + +8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + +9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + +END OF TERMS AND CONDITIONS + +APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + +Copyright [yyyy] [name of copyright owner] + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. diff --git a/library/stdarch/crates/std_detect/LICENSE-MIT b/library/stdarch/crates/std_detect/LICENSE-MIT new file mode 100644 index 000000000..52d82415d --- /dev/null +++ b/library/stdarch/crates/std_detect/LICENSE-MIT @@ -0,0 +1,25 @@ +Copyright (c) 2017 The Rust Project Developers + +Permission is hereby granted, free of charge, to any +person obtaining a copy of this software and associated +documentation files (the "Software"), to deal in the +Software without restriction, including without +limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of +the Software, and to permit persons to whom the Software +is furnished to do so, subject to the following +conditions: + +The above copyright notice and this permission notice +shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF +ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A +PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT +SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR +IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE. diff --git a/library/stdarch/crates/std_detect/README.md b/library/stdarch/crates/std_detect/README.md new file mode 100644 index 000000000..bea7d941a --- /dev/null +++ b/library/stdarch/crates/std_detect/README.md @@ -0,0 +1,73 @@ +`std::detect` - Rust's standard library run-time CPU feature detection +======= + +The private `std::detect` module implements run-time feature detection in Rust's +standard library. This allows detecting whether the CPU the binary runs on +supports certain features, like SIMD instructions. + +# Usage + +`std::detect` APIs are available as part of `libstd`. Prefer using it via the +standard library than through this crate. Unstable features of `std::detect` are +available on nightly Rust behind the `feature(stdsimd)` feature-gate. + +If you need run-time feature detection in `#[no_std]` environments, Rust `core` +library cannot help you. By design, Rust `core` is platform independent, but +performing run-time feature detection requires a certain level of cooperation +from the platform. + +You can then manually include `std_detect` as a dependency to get similar +run-time feature detection support than the one offered by Rust's standard +library. We intend to make `std_detect` more flexible and configurable in this +regard to better serve the needs of `#[no_std]` targets. + +# Features + +* `std_detect_dlsym_getauxval` (enabled by default, requires `libc`): Enable to +use `libc::dlsym` to query whether [`getauxval`] is linked into the binary. When +this is not the case, this feature allows other fallback methods to perform +run-time feature detection. When this feature is disabled, `std_detect` assumes +that [`getauxval`] is linked to the binary. If that is not the case the behavior +is undefined. + +* `std_detect_file_io` (enabled by default, requires `std`): Enable to perform run-time feature +detection using file APIs (e.g. `/proc/cpuinfo`, etc.) if other more performant +methods fail. This feature requires `libstd` as a dependency, preventing the +crate from working on applications in which `std` is not available. + +[`getauxval`]: http://man7.org/linux/man-pages/man3/getauxval.3.html + +# Platform support + +* All `x86`/`x86_64` targets are supported on all platforms by querying the + `cpuid` instruction directly for the features supported by the hardware and + the operating system. `std_detect` assumes that the binary is an user-space + application. If you need raw support for querying `cpuid`, consider using the + [`cupid`](https://crates.io/crates/cupid) crate. + +* Linux: + * `arm{32, 64}`, `mips{32,64}{,el}`, `powerpc{32,64}{,le}`: `std_detect` + supports these on Linux by querying ELF auxiliary vectors (using `getauxval` + when available), and if that fails, by querying `/proc/cpuinfo`. + * `arm64`: partial support for doing run-time feature detection by directly + querying `mrs` is implemented for Linux >= 4.11, but not enabled by default. + +* FreeBSD: + * `arm64`: run-time feature detection is implemented by directly querying `mrs`. + +# License + +This project is licensed under either of + + * Apache License, Version 2.0, ([LICENSE-APACHE](LICENSE-APACHE) or + http://www.apache.org/licenses/LICENSE-2.0) + * MIT license ([LICENSE-MIT](LICENSE-MIT) or + http://opensource.org/licenses/MIT) + +at your option. + +# Contribution + +Unless you explicitly state otherwise, any contribution intentionally submitted +for inclusion in `std_detect` by you, as defined in the Apache-2.0 license, +shall be dual licensed as above, without any additional terms or conditions. diff --git a/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs new file mode 100644 index 000000000..f32f961ae --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/aarch64.rs @@ -0,0 +1,152 @@ +//! Aarch64 run-time features. + +features! { + @TARGET: aarch64; + @CFG: target_arch = "aarch64"; + @MACRO_NAME: is_aarch64_feature_detected; + @MACRO_ATTRS: + /// This macro tests, at runtime, whether an `aarch64` feature is enabled on aarch64 platforms. + /// Currently most features are only supported on linux-based platforms. + /// + /// This macro takes one argument which is a string literal of the feature being tested for. + /// The feature names are mostly taken from their FEAT_* definitions in the [ARM Architecture + /// Reference Manual][docs]. + /// + /// ## Supported arguments + /// + /// * `"asimd"` or "neon" - FEAT_AdvSIMD + /// * `"pmull"` - FEAT_PMULL + /// * `"fp"` - FEAT_FP + /// * `"fp16"` - FEAT_FP16 + /// * `"sve"` - FEAT_SVE + /// * `"crc"` - FEAT_CRC + /// * `"lse"` - FEAT_LSE + /// * `"lse2"` - FEAT_LSE2 + /// * `"rdm"` - FEAT_RDM + /// * `"rcpc"` - FEAT_LRCPC + /// * `"rcpc2"` - FEAT_LRCPC2 + /// * `"dotprod"` - FEAT_DotProd + /// * `"tme"` - FEAT_TME + /// * `"fhm"` - FEAT_FHM + /// * `"dit"` - FEAT_DIT + /// * `"flagm"` - FEAT_FLAGM + /// * `"ssbs"` - FEAT_SSBS + /// * `"sb"` - FEAT_SB + /// * `"paca"` - FEAT_PAuth (address authentication) + /// * `"pacg"` - FEAT_Pauth (generic authentication) + /// * `"dpb"` - FEAT_DPB + /// * `"dpb2"` - FEAT_DPB2 + /// * `"sve2"` - FEAT_SVE2 + /// * `"sve2-aes"` - FEAT_SVE2_AES + /// * `"sve2-sm4"` - FEAT_SVE2_SM4 + /// * `"sve2-sha3"` - FEAT_SVE2_SHA3 + /// * `"sve2-bitperm"` - FEAT_SVE2_BitPerm + /// * `"frintts"` - FEAT_FRINTTS + /// * `"i8mm"` - FEAT_I8MM + /// * `"f32mm"` - FEAT_F32MM + /// * `"f64mm"` - FEAT_F64MM + /// * `"bf16"` - FEAT_BF16 + /// * `"rand"` - FEAT_RNG + /// * `"bti"` - FEAT_BTI + /// * `"mte"` - FEAT_MTE + /// * `"jsconv"` - FEAT_JSCVT + /// * `"fcma"` - FEAT_FCMA + /// * `"aes"` - FEAT_AES + /// * `"sha2"` - FEAT_SHA1 & FEAT_SHA256 + /// * `"sha3"` - FEAT_SHA512 & FEAT_SHA3 + /// * `"sm4"` - FEAT_SM3 & FEAT_SM4 + /// + /// [docs]: https://developer.arm.com/documentation/ddi0487/latest + #[stable(feature = "simd_aarch64", since = "1.60.0")] + @BIND_FEATURE_NAME: "asimd"; "neon"; + @NO_RUNTIME_DETECTION: "ras"; + @NO_RUNTIME_DETECTION: "v8.1a"; + @NO_RUNTIME_DETECTION: "v8.2a"; + @NO_RUNTIME_DETECTION: "v8.3a"; + @NO_RUNTIME_DETECTION: "v8.4a"; + @NO_RUNTIME_DETECTION: "v8.5a"; + @NO_RUNTIME_DETECTION: "v8.6a"; + @NO_RUNTIME_DETECTION: "v8.7a"; + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] asimd: "neon"; + /// FEAT_AdvSIMD (Advanced SIMD/NEON) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pmull: "pmull"; + /// FEAT_PMULL (Polynomial Multiply) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp: "fp"; + /// FEAT_FP (Floating point support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fp16: "fp16"; + /// FEAT_FP16 (Half-float support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve: "sve"; + /// FEAT_SVE (Scalable Vector Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] crc: "crc"; + /// FEAT_CRC32 (Cyclic Redundancy Check) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse: "lse"; + /// FEAT_LSE (Large System Extension - atomics) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] lse2: "lse2"; + /// FEAT_LSE2 (unaligned and register-pair atomics) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rdm: "rdm"; + /// FEAT_RDM (Rounding Doubling Multiply - ASIMDRDM) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc: "rcpc"; + /// FEAT_LRCPC (Release consistent Processor consistent) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rcpc2: "rcpc2"; + /// FEAT_LRCPC2 (RCPC with immediate offsets) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dotprod: "dotprod"; + /// FEAT_DotProd (Vector Dot-Product - ASIMDDP) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] tme: "tme"; + /// FEAT_TME (Transactional Memory Extensions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fhm: "fhm"; + /// FEAT_FHM (fp16 multiplication instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dit: "dit"; + /// FEAT_DIT (Data Independent Timing instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] flagm: "flagm"; + /// FEAT_FLAGM (flag manipulation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] ssbs: "ssbs"; + /// FEAT_SSBS (speculative store bypass safe) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sb: "sb"; + /// FEAT_SB (speculation barrier) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] paca: "paca"; + /// FEAT_PAuth (address authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] pacg: "pacg"; + /// FEAT_PAuth (generic authentication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb: "dpb"; + /// FEAT_DPB (aka dcpop - data cache clean to point of persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] dpb2: "dpb2"; + /// FEAT_DPB2 (aka dcpodp - data cache clean to point of deep persistence) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2: "sve2"; + /// FEAT_SVE2 (Scalable Vector Extension 2) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_aes: "sve2-aes"; + /// FEAT_SVE_AES (SVE2 AES crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sm4: "sve2-sm4"; + /// FEAT_SVE_SM4 (SVE2 SM4 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_sha3: "sve2-sha3"; + /// FEAT_SVE_SHA3 (SVE2 SHA3 crypto) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sve2_bitperm: "sve2-bitperm"; + /// FEAT_SVE_BitPerm (SVE2 bit permutation instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] frintts: "frintts"; + /// FEAT_FRINTTS (float to integer rounding instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] i8mm: "i8mm"; + /// FEAT_I8MM (integer matrix multiplication, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f32mm: "f32mm"; + /// FEAT_F32MM (single-precision matrix multiplication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] f64mm: "f64mm"; + /// FEAT_F64MM (double-precision matrix multiplication) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bf16: "bf16"; + /// FEAT_BF16 (BFloat16 type, plus MM instructions, plus ASIMD support) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] rand: "rand"; + /// FEAT_RNG (Random Number Generator) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] bti: "bti"; + /// FEAT_BTI (Branch Target Identification) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] mte: "mte"; + /// FEAT_MTE (Memory Tagging Extension) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] jsconv: "jsconv"; + /// FEAT_JSCVT (JavaScript float conversion instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] fcma: "fcma"; + /// FEAT_FCMA (float complex number operations) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] aes: "aes"; + /// FEAT_AES (AES instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sha3: "sha3"; + /// FEAT_SHA512 & FEAT_SHA3 (SHA2-512 & SHA3 instructions) + @FEATURE: #[stable(feature = "simd_aarch64", since = "1.60.0")] sm4: "sm4"; + /// FEAT_SM3 & FEAT_SM4 (SM3 & SM4 instructions) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/arm.rs b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs new file mode 100644 index 000000000..897dc314c --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/arm.rs @@ -0,0 +1,28 @@ +//! Run-time feature detection on ARM Aarch32. + +features! { + @TARGET: arm; + @CFG: target_arch = "arm"; + @MACRO_NAME: is_arm_feature_detected; + @MACRO_ATTRS: + /// Checks if `arm` feature is enabled. + #[unstable(feature = "stdsimd", issue = "27731")] + @NO_RUNTIME_DETECTION: "v7"; + @NO_RUNTIME_DETECTION: "vfp2"; + @NO_RUNTIME_DETECTION: "vfp3"; + @NO_RUNTIME_DETECTION: "vfp4"; + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] neon: "neon"; + /// ARM Advanced SIMD (NEON) - Aarch32 + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] pmull: "pmull"; + /// Polynomial Multiply + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crc: "crc"; + /// CRC32 (Cyclic Redundancy Check) + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] crypto: "crypto"; + /// Crypto: AES + PMULL + SHA1 + SHA256. Prefer using the individual features where possible. + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] aes: "aes"; + /// FEAT_AES (AES instructions) + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] sha2: "sha2"; + /// FEAT_SHA1 & FEAT_SHA256 (SHA1 & SHA2-256 instructions) + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] i8mm: "i8mm"; + /// FEAT_I8MM +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs new file mode 100644 index 000000000..ae27d0093 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS. + +features! { + @TARGET: mips; + @CFG: target_arch = "mips"; + @MACRO_NAME: is_mips_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips` feature is enabled. + #[unstable(feature = "stdsimd", issue = "27731")] + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs new file mode 100644 index 000000000..7182ec2da --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mips64.rs @@ -0,0 +1,12 @@ +//! Run-time feature detection on MIPS64. + +features! { + @TARGET: mips64; + @CFG: target_arch = "mips64"; + @MACRO_NAME: is_mips64_feature_detected; + @MACRO_ATTRS: + /// Checks if `mips64` feature is enabled. + #[unstable(feature = "stdsimd", issue = "27731")] + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] msa: "msa"; + /// MIPS SIMD Architecture (MSA) +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/mod.rs b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs new file mode 100644 index 000000000..81a1f23e8 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/mod.rs @@ -0,0 +1,56 @@ +#![allow(dead_code)] + +use cfg_if::cfg_if; + +// Export the macros for all supported architectures. +#[macro_use] +mod x86; +#[macro_use] +mod arm; +#[macro_use] +mod aarch64; +#[macro_use] +mod riscv; +#[macro_use] +mod powerpc; +#[macro_use] +mod powerpc64; +#[macro_use] +mod mips; +#[macro_use] +mod mips64; + +cfg_if! { + if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + pub use x86::*; + } else if #[cfg(target_arch = "arm")] { + pub use arm::*; + } else if #[cfg(target_arch = "aarch64")] { + pub use aarch64::*; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + pub use riscv::*; + } else if #[cfg(target_arch = "powerpc")] { + pub use powerpc::*; + } else if #[cfg(target_arch = "powerpc64")] { + pub use powerpc64::*; + } else if #[cfg(target_arch = "mips")] { + pub use mips::*; + } else if #[cfg(target_arch = "mips64")] { + pub use mips64::*; + } else { + // Unimplemented architecture: + #[doc(hidden)] + pub(crate) enum Feature { + Null + } + #[doc(hidden)] + pub mod __is_feature_detected {} + + impl Feature { + #[doc(hidden)] + pub(crate) fn from_str(_s: &str) -> Result<Feature, ()> { Err(()) } + #[doc(hidden)] + pub(crate) fn to_str(self) -> &'static str { "" } + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs new file mode 100644 index 000000000..d135cd95d --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc.rs @@ -0,0 +1,16 @@ +//! Run-time feature detection on PowerPC. + +features! { + @TARGET: powerpc; + @CFG: target_arch = "powerpc"; + @MACRO_NAME: is_powerpc_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdsimd", issue = "27731")] + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] power8: "power8"; + /// Power8 +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs new file mode 100644 index 000000000..773afd6ce --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/powerpc64.rs @@ -0,0 +1,16 @@ +//! Run-time feature detection on PowerPC64. + +features! { + @TARGET: powerpc64; + @CFG: target_arch = "powerpc64"; + @MACRO_NAME: is_powerpc64_feature_detected; + @MACRO_ATTRS: + /// Checks if `powerpc` feature is enabled. + #[unstable(feature = "stdsimd", issue = "27731")] + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] altivec: "altivec"; + /// Altivec + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] vsx: "vsx"; + /// VSX + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] power8: "power8"; + /// Power8 +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs new file mode 100644 index 000000000..5ea36e7c1 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/riscv.rs @@ -0,0 +1,206 @@ +//! Run-time feature detection on RISC-V. + +features! { + @TARGET: riscv; + @CFG: any(target_arch = "riscv32", target_arch = "riscv64"); + @MACRO_NAME: is_riscv_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether instruction sets are available on + /// RISC-V platforms. + /// + /// RISC-V standard defined the base sets and the extension sets. + /// The base sets are RV32I, RV64I, RV32E or RV128I. Any RISC-V platform + /// must support one base set and/or multiple extension sets. + /// + /// Any RISC-V standard instruction sets can be in state of either ratified, + /// frozen or draft. The version and status of current standard instruction + /// sets can be checked out from preface section of the [ISA manual]. + /// + /// Platform may define and support their own custom instruction sets with + /// ISA prefix X. These sets are highly platform specific and should be + /// detected with their own platform support crates. + /// + /// # Unprivileged Specification + /// + /// The supported ratified RISC-V instruction sets are as follows: + /// + /// * RV32I: `"rv32i"` + /// * Zifencei: `"zifencei"` + /// * Zihintpause: `"zihintpause"` + /// * RV64I: `"rv64i"` + /// * M: `"m"` + /// * A: `"a"` + /// * Zicsr: `"zicsr"` + /// * Zicntr: `"zicntr"` + /// * Zihpm: `"zihpm"` + /// * F: `"f"` + /// * D: `"d"` + /// * Q: `"q"` + /// * C: `"c"` + /// + /// There's also bases and extensions marked as standard instruction set, + /// but they are in frozen or draft state. These instruction sets are also + /// reserved by this macro and can be detected in the future platforms. + /// + /// Frozen RISC-V instruction sets: + /// + /// * Zfinx: `"zfinx"` + /// * Zdinx: `"zdinx"` + /// * Zhinx: `"zhinx"` + /// * Zhinxmin: `"zhinxmin"` + /// * Ztso: `"ztso"` + /// + /// Draft RISC-V instruction sets: + /// + /// * RV32E: `"rv32e"` + /// * RV128I: `"rv128i"` + /// * Zfh: `"zfh"` + /// * Zfhmin: `"zfhmin"` + /// * B: `"b"` + /// * J: `"j"` + /// * P: `"p"` + /// * V: `"v"` + /// * Zam: `"zam"` + /// + /// Defined by Privileged Specification: + /// + /// * Supervisor: `"s"` + /// * Svnapot: `"svnapot"` + /// * Svpbmt: `"svpbmt"` + /// * Svinval: `"svinval"` + /// * Hypervisor: `"h"` + /// + /// # RISC-V Bit-Manipulation ISA-extensions + /// + /// This document defined the following extensions: + /// + /// * Zba: `"zba"` + /// * Zbb: `"zbb"` + /// * Zbc: `"zbc"` + /// * Zbs: `"zbs"` + /// + /// # RISC-V Cryptography Extensions + /// + /// These extensions are defined in Volume I, Scalar & Entropy Source + /// Instructions: + /// + /// * Zbkb: `"zbkb"` + /// * Zbkc: `"zbkc"` + /// * Zbkx: `"zbkx"` + /// * Zknd: `"zknd"` + /// * Zkne: `"zkne"` + /// * Zknh: `"zknh"` + /// * Zksed: `"zksed"` + /// * Zksh: `"zksh"` + /// * Zkr: `"zkr"` + /// * Zkn: `"zkn"` + /// * Zks: `"zks"` + /// * Zk: `"zk"` + /// * Zkt: `"zkt"` + /// + /// [ISA manual]: https://github.com/riscv/riscv-isa-manual/ + #[unstable(feature = "stdsimd", issue = "27731")] + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv32i: "rv32i"; + /// RV32I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zifencei: "zifencei"; + /// "Zifencei" Instruction-Fetch Fence + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zihintpause: "zihintpause"; + /// "Zihintpause" Pause Hint + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv64i: "rv64i"; + /// RV64I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] m: "m"; + /// "M" Standard Extension for Integer Multiplication and Division + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] a: "a"; + /// "A" Standard Extension for Atomic Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zicsr: "zicsr"; + /// "Zicsr", Control and Status Register (CSR) Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zicntr: "zicntr"; + /// "Zicntr", Standard Extension for Base Counters and Timers + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zihpm: "zihpm"; + /// "Zihpm", Standard Extension for Hardware Performance Counters + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] f: "f"; + /// "F" Standard Extension for Single-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] d: "d"; + /// "D" Standard Extension for Double-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] q: "q"; + /// "Q" Standard Extension for Quad-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] c: "c"; + /// "C" Standard Extension for Compressed Instructions + + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfinx: "zfinx"; + /// "Zfinx" Standard Extension for Single-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zdinx: "zdinx"; + /// "Zdinx" Standard Extension for Double-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zhinx: "zhinx"; + /// "Zhinx" Standard Extension for Half-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zhinxmin: "zhinxmin"; + /// "Zhinxmin" Standard Extension for Minimal Half-Precision Floating-Point in Integer Registers + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] ztso: "ztso"; + /// "Ztso" Standard Extension for Total Store Ordering + + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv32e: "rv32e"; + /// RV32E Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] rv128i: "rv128i"; + /// RV128I Base Integer Instruction Set + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfh: "zfh"; + /// "Zfh" Standard Extension for 16-Bit Half-Precision Floating-Point + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zfhmin: "zfhmin"; + /// "Zfhmin" Standard Extension for Minimal Half-Precision Floating-Point Support + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] b: "b"; + /// "B" Standard Extension for Bit Manipulation + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] j: "j"; + /// "J" Standard Extension for Dynamically Translated Languages + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] p: "p"; + /// "P" Standard Extension for Packed-SIMD Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] v: "v"; + /// "V" Standard Extension for Vector Operations + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zam: "zam"; + /// "Zam" Standard Extension for Misaligned Atomics + + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] s: "s"; + /// Supervisor-Level ISA + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svnapot: "svnapot"; + /// "Svnapot" Standard Extension for NAPOT Translation Contiguity + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svpbmt: "svpbmt"; + /// "Svpbmt" Standard Extension for Page-Based Memory Types + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] svinval: "svinval"; + /// "Svinval" Standard Extension for Fine-Grained Address-Translation Cache Invalidation + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] h: "h"; + /// Hypervisor Extension + + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zba: "zba"; + /// "Zba" Standard Extension for Address Generation Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbb: "zbb"; + /// "Zbb" Standard Extension for Basic Bit-Manipulation + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbc: "zbc"; + /// "Zbc" Standard Extension for Carry-less Multiplication + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbs: "zbs"; + /// "Zbs" Standard Extension for Single-Bit instructions + + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkb: "zbkb"; + /// "Zbkb" Standard Extension for Bitmanip instructions for Cryptography + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkc: "zbkc"; + /// "Zbkc" Standard Extension for Carry-less multiply instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zbkx: "zbkx"; + /// "Zbkx" Standard Extension for Crossbar permutation instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zknd: "zknd"; + /// "Zknd" Standard Extension for NIST Suite: AES Decryption + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkne: "zkne"; + /// "Zkne" Standard Extension for NIST Suite: AES Encryption + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zknh: "zknh"; + /// "Zknh" Standard Extension for NIST Suite: Hash Function Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zksed: "zksed"; + /// "Zksed" Standard Extension for ShangMi Suite: SM4 Block Cipher Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zksh: "zksh"; + /// "Zksh" Standard Extension for ShangMi Suite: SM3 Hash Function Instructions + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkr: "zkr"; + /// "Zkr" Standard Extension for Entropy Source Extension + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkn: "zkn"; + /// "Zkn" Standard Extension for NIST Algorithm Suite + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zks: "zks"; + /// "Zks" Standard Extension for ShangMi Algorithm Suite + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zk: "zk"; + /// "Zk" Standard Extension for Standard scalar cryptography extension + @FEATURE: #[unstable(feature = "stdsimd", issue = "27731")] zkt: "zkt"; + /// "Zkt" Standard Extension for Data Independent Execution Latency +} diff --git a/library/stdarch/crates/std_detect/src/detect/arch/x86.rs b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs new file mode 100644 index 000000000..893e1a887 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/arch/x86.rs @@ -0,0 +1,197 @@ +//! This module implements minimal run-time feature detection for x86. +//! +//! The features are detected using the `detect_features` function below. +//! This function uses the CPUID instruction to read the feature flags from the +//! CPU and encodes them in a `usize` where each bit position represents +//! whether a feature is available (bit is set) or unavailable (bit is cleared). +//! +//! The enum `Feature` is used to map bit positions to feature names, and the +//! the `__crate::detect::check_for!` macro is used to map string literals (e.g., +//! "avx") to these bit positions (e.g., `Feature::avx`). +//! +//! The run-time feature detection is performed by the +//! `__crate::detect::check_for(Feature) -> bool` function. On its first call, +//! this functions queries the CPU for the available features and stores them +//! in a global `AtomicUsize` variable. The query is performed by just checking +//! whether the feature bit in this global variable is set or cleared. + +features! { + @TARGET: x86; + @CFG: any(target_arch = "x86", target_arch = "x86_64"); + @MACRO_NAME: is_x86_feature_detected; + @MACRO_ATTRS: + /// A macro to test at *runtime* whether a CPU feature is available on + /// x86/x86-64 platforms. + /// + /// This macro is provided in the standard library and will detect at runtime + /// whether the specified CPU feature is detected. This does **not** resolve at + /// compile time unless the specified feature is already enabled for the entire + /// crate. Runtime detection currently relies mostly on the `cpuid` instruction. + /// + /// This macro only takes one argument which is a string literal of the feature + /// being tested for. The feature names supported are the lowercase versions of + /// the ones defined by Intel in [their documentation][docs]. + /// + /// ## Supported arguments + /// + /// This macro supports the same names that `#[target_feature]` supports. Unlike + /// `#[target_feature]`, however, this macro does not support names separated + /// with a comma. Instead testing for multiple features must be done through + /// separate macro invocations for now. + /// + /// Supported arguments are: + /// + /// * `"aes"` + /// * `"pclmulqdq"` + /// * `"rdrand"` + /// * `"rdseed"` + /// * `"tsc"` + /// * `"mmx"` + /// * `"sse"` + /// * `"sse2"` + /// * `"sse3"` + /// * `"ssse3"` + /// * `"sse4.1"` + /// * `"sse4.2"` + /// * `"sse4a"` + /// * `"sha"` + /// * `"avx"` + /// * `"avx2"` + /// * `"avx512f"` + /// * `"avx512cd"` + /// * `"avx512er"` + /// * `"avx512pf"` + /// * `"avx512bw"` + /// * `"avx512dq"` + /// * `"avx512vl"` + /// * `"avx512ifma"` + /// * `"avx512vbmi"` + /// * `"avx512vpopcntdq"` + /// * `"avx512vbmi2"` + /// * `"avx512gfni"` + /// * `"avx512vaes"` + /// * `"avx512vpclmulqdq"` + /// * `"avx512vnni"` + /// * `"avx512bitalg"` + /// * `"avx512bf16"` + /// * `"avx512vp2intersect"` + /// * `"f16c"` + /// * `"fma"` + /// * `"bmi1"` + /// * `"bmi2"` + /// * `"abm"` + /// * `"lzcnt"` + /// * `"tbm"` + /// * `"popcnt"` + /// * `"fxsr"` + /// * `"xsave"` + /// * `"xsaveopt"` + /// * `"xsaves"` + /// * `"xsavec"` + /// * `"cmpxchg16b"` + /// * `"adx"` + /// * `"rtm"` + /// + /// [docs]: https://software.intel.com/sites/landingpage/IntrinsicsGuide + #[stable(feature = "simd_x86", since = "1.27.0")] + @BIND_FEATURE_NAME: "abm"; "lzcnt"; // abm is a synonym for lzcnt + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] aes: "aes"; + /// AES (Advanced Encryption Standard New Instructions AES-NI) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] pclmulqdq: "pclmulqdq"; + /// CLMUL (Carry-less Multiplication) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdrand: "rdrand"; + /// RDRAND + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rdseed: "rdseed"; + /// RDSEED + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tsc: "tsc"; + /// TSC (Time Stamp Counter) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] mmx: "mmx"; + /// MMX (MultiMedia eXtensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse: "sse"; + /// SSE (Streaming SIMD Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse2: "sse2"; + /// SSE2 (Streaming SIMD Extensions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse3: "sse3"; + /// SSE3 (Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] ssse3: "ssse3"; + /// SSSE3 (Supplemental Streaming SIMD Extensions 3) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_1: "sse4.1"; + /// SSE4.1 (Streaming SIMD Extensions 4.1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4_2: "sse4.2"; + /// SSE4.2 (Streaming SIMD Extensions 4.2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sse4a: "sse4a"; + /// SSE4a (Streaming SIMD Extensions 4a) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] sha: "sha"; + /// SHA + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx: "avx"; + /// AVX (Advanced Vector Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx2: "avx2"; + /// AVX2 (Advanced Vector Extensions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512f: "avx512f" ; + /// AVX-512 F (Foundation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512cd: "avx512cd" ; + /// AVX-512 CD (Conflict Detection Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512er: "avx512er"; + /// AVX-512 ER (Expo nential and Reciprocal Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512pf: "avx512pf"; + /// AVX-512 PF (Prefetch Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bw: "avx512bw"; + /// AVX-512 BW (Byte and Word Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512dq: "avx512dq"; + /// AVX-512 DQ (Doubleword and Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vl: "avx512vl"; + /// AVX-512 VL (Vector Length Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512ifma: "avx512ifma"; + /// AVX-512 IFMA (Integer Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi: "avx512vbmi"; + /// AVX-512 VBMI (Vector Byte Manipulation Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpopcntdq: "avx512vpopcntdq"; + /// AVX-512 VPOPCNTDQ (Vector Population Count Doubleword and + /// Quadword) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vbmi2: "avx512vbmi2"; + /// AVX-512 VBMI2 (Additional byte, word, dword and qword capabilities) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512gfni: "avx512gfni"; + /// AVX-512 GFNI (Galois Field New Instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vaes: "avx512vaes"; + /// AVX-512 VAES (Vector AES instruction) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vpclmulqdq: "avx512vpclmulqdq"; + /// AVX-512 VPCLMULQDQ (Vector PCLMULQDQ instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vnni: "avx512vnni"; + /// AVX-512 VNNI (Vector Neural Network Instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bitalg: "avx512bitalg"; + /// AVX-512 BITALG (Support for VPOPCNT\[B,W\] and VPSHUFBITQMB) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512bf16: "avx512bf16"; + /// AVX-512 BF16 (BFLOAT16 instructions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] avx512vp2intersect: "avx512vp2intersect"; + /// AVX-512 P2INTERSECT + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] f16c: "f16c"; + /// F16C (Conversions between IEEE-754 `binary16` and `binary32` formats) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fma: "fma"; + /// FMA (Fused Multiply Add) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi1: "bmi1" ; + /// BMI1 (Bit Manipulation Instructions 1) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] bmi2: "bmi2" ; + /// BMI2 (Bit Manipulation Instructions 2) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] lzcnt: "lzcnt"; + /// ABM (Advanced Bit Manipulation) / LZCNT (Leading Zero Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] tbm: "tbm"; + /// TBM (Trailing Bit Manipulation) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] popcnt: "popcnt"; + /// POPCNT (Population Count) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] fxsr: "fxsr"; + /// FXSR (Floating-point context fast save and restore) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsave: "xsave"; + /// XSAVE (Save Processor Extended States) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaveopt: "xsaveopt"; + /// XSAVEOPT (Save Processor Extended States Optimized) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsaves: "xsaves"; + /// XSAVES (Save Processor Extended States Supervisor) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] xsavec: "xsavec"; + /// XSAVEC (Save Processor Extended States Compacted) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] cmpxchg16b: "cmpxchg16b"; + /// CMPXCH16B (16-byte compare-and-swap instruction) + @FEATURE: #[stable(feature = "simd_x86_adx", since = "1.33.0")] adx: "adx"; + /// ADX, Intel ADX (Multi-Precision Add-Carry Instruction Extensions) + @FEATURE: #[stable(feature = "simd_x86", since = "1.27.0")] rtm: "rtm"; + /// RTM, Intel (Restricted Transactional Memory) +} diff --git a/library/stdarch/crates/std_detect/src/detect/bit.rs b/library/stdarch/crates/std_detect/src/detect/bit.rs new file mode 100644 index 000000000..6f06c5523 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/bit.rs @@ -0,0 +1,9 @@ +//! Bit manipulation utilities. + +/// Tests the `bit` of `x`. +#[allow(dead_code)] +#[inline] +pub(crate) fn test(x: usize, bit: u32) -> bool { + debug_assert!(bit < usize::BITS, "bit index out-of-bounds"); + x & (1 << bit) != 0 +} diff --git a/library/stdarch/crates/std_detect/src/detect/cache.rs b/library/stdarch/crates/std_detect/src/detect/cache.rs new file mode 100644 index 000000000..d01a5ea24 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/cache.rs @@ -0,0 +1,194 @@ +//! Caches run-time feature detection so that it only needs to be computed +//! once. + +#![allow(dead_code)] // not used on all platforms + +use core::sync::atomic::Ordering; + +use core::sync::atomic::AtomicUsize; + +/// Sets the `bit` of `x`. +#[inline] +const fn set_bit(x: u64, bit: u32) -> u64 { + x | 1 << bit +} + +/// Tests the `bit` of `x`. +#[inline] +const fn test_bit(x: u64, bit: u32) -> bool { + x & (1 << bit) != 0 +} + +/// Unset the `bit of `x`. +#[inline] +const fn unset_bit(x: u64, bit: u32) -> u64 { + x & !(1 << bit) +} + +/// Maximum number of features that can be cached. +const CACHE_CAPACITY: u32 = 62; + +/// This type is used to initialize the cache +#[derive(Copy, Clone)] +pub(crate) struct Initializer(u64); + +#[allow(clippy::use_self)] +impl Default for Initializer { + fn default() -> Self { + Initializer(0) + } +} + +// NOTE: the `debug_assert!` would catch that we do not add more Features than +// the one fitting our cache. +impl Initializer { + /// Tests the `bit` of the cache. + #[inline] + pub(crate) fn test(self, bit: u32) -> bool { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + test_bit(self.0, bit) + } + + /// Sets the `bit` of the cache. + #[inline] + pub(crate) fn set(&mut self, bit: u32) { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = set_bit(v, bit); + } + + /// Unsets the `bit` of the cache. + #[inline] + pub(crate) fn unset(&mut self, bit: u32) { + debug_assert!( + bit < CACHE_CAPACITY, + "too many features, time to increase the cache size!" + ); + let v = self.0; + self.0 = unset_bit(v, bit); + } +} + +/// This global variable is a cache of the features supported by the CPU. +// Note: on x64, we only use the first slot +static CACHE: [Cache; 2] = [Cache::uninitialized(), Cache::uninitialized()]; + +/// Feature cache with capacity for `size_of::<usize::MAX>() * 8 - 1` features. +/// +/// Note: 0 is used to represent an uninitialized cache, and (at least) the most +/// significant bit is set on any cache which has been initialized. +/// +/// Note: we use `Relaxed` atomic operations, because we are only interested in +/// the effects of operations on a single memory location. That is, we only need +/// "modification order", and not the full-blown "happens before". +struct Cache(AtomicUsize); + +impl Cache { + const CAPACITY: u32 = (core::mem::size_of::<usize>() * 8 - 1) as u32; + const MASK: usize = (1 << Cache::CAPACITY) - 1; + const INITIALIZED_BIT: usize = 1usize << Cache::CAPACITY; + + /// Creates an uninitialized cache. + #[allow(clippy::declare_interior_mutable_const)] + const fn uninitialized() -> Self { + Cache(AtomicUsize::new(0)) + } + + /// Is the `bit` in the cache set? Returns `None` if the cache has not been initialized. + #[inline] + pub(crate) fn test(&self, bit: u32) -> Option<bool> { + let cached = self.0.load(Ordering::Relaxed); + if cached == 0 { + None + } else { + Some(test_bit(cached as u64, bit)) + } + } + + /// Initializes the cache. + #[inline] + fn initialize(&self, value: usize) -> usize { + debug_assert_eq!((value & !Cache::MASK), 0); + self.0 + .store(value | Cache::INITIALIZED_BIT, Ordering::Relaxed); + value + } +} + +cfg_if::cfg_if! { + if #[cfg(feature = "std_detect_env_override")] { + #[inline] + fn initialize(mut value: Initializer) -> Initializer { + let env = unsafe { + libc::getenv(b"RUST_STD_DETECT_UNSTABLE\0".as_ptr() as *const libc::c_char) + }; + if !env.is_null() { + let len = unsafe { libc::strlen(env) }; + let env = unsafe { core::slice::from_raw_parts(env as *const u8, len) }; + if let Ok(disable) = core::str::from_utf8(env) { + for v in disable.split(" ") { + let _ = super::Feature::from_str(v).map(|v| value.unset(v as u32)); + } + } + } + do_initialize(value); + value + } + } else { + #[inline] + fn initialize(value: Initializer) -> Initializer { + do_initialize(value); + value + } + } +} + +#[inline] +fn do_initialize(value: Initializer) { + CACHE[0].initialize((value.0) as usize & Cache::MASK); + CACHE[1].initialize((value.0 >> Cache::CAPACITY) as usize & Cache::MASK); +} + +// We only have to detect features once, and it's fairly costly, so hint to LLVM +// that it should assume that cache hits are more common than misses (which is +// the point of caching). It's possibly unfortunate that this function needs to +// reach across modules like this to call `os::detect_features`, but it produces +// the best code out of several attempted variants. +// +// The `Initializer` that the cache was initialized with is returned, so that +// the caller can call `test()` on it without having to load the value from the +// cache again. +#[cold] +fn detect_and_initialize() -> Initializer { + initialize(super::os::detect_features()) +} + +/// Tests the `bit` of the storage. If the storage has not been initialized, +/// initializes it with the result of `os::detect_features()`. +/// +/// On its first invocation, it detects the CPU features and caches them in the +/// `CACHE` global variable as an `AtomicU64`. +/// +/// It uses the `Feature` variant to index into this variable as a bitset. If +/// the bit is set, the feature is enabled, and otherwise it is disabled. +/// +/// If the feature `std_detect_env_override` is enabled looks for the env +/// variable `RUST_STD_DETECT_UNSTABLE` and uses its its content to disable +/// Features that would had been otherwise detected. +#[inline] +pub(crate) fn test(bit: u32) -> bool { + let (relative_bit, idx) = if bit < Cache::CAPACITY { + (bit, 0) + } else { + (bit - Cache::CAPACITY, 1) + }; + CACHE[idx] + .test(relative_bit) + .unwrap_or_else(|| detect_and_initialize().test(bit)) +} diff --git a/library/stdarch/crates/std_detect/src/detect/macros.rs b/library/stdarch/crates/std_detect/src/detect/macros.rs new file mode 100644 index 000000000..7548c9780 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/macros.rs @@ -0,0 +1,153 @@ +#[allow(unused)] +macro_rules! features { + ( + @TARGET: $target:ident; + @CFG: $cfg:meta; + @MACRO_NAME: $macro_name:ident; + @MACRO_ATTRS: $(#[$macro_attrs:meta])* + $(@BIND_FEATURE_NAME: $bind_feature:tt; $feature_impl:tt; )* + $(@NO_RUNTIME_DETECTION: $nort_feature:tt; )* + $(@FEATURE: #[$stability_attr:meta] $feature:ident: $feature_lit:tt; $(#[$feature_comment:meta])*)* + ) => { + #[macro_export] + $(#[$macro_attrs])* + #[allow_internal_unstable(stdsimd_internal, stdsimd)] + #[cfg($cfg)] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + cfg!(target_feature = $feature_lit) || + $crate::detect::__is_feature_detected::$feature() + }; + )* + $( + ($bind_feature) => { $macro_name!($feature_impl) }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + $(#[$macro_attrs])* + #[macro_export] + #[cfg(not($cfg))] + #[doc(cfg($cfg))] + macro_rules! $macro_name { + $( + ($feature_lit) => { + compile_error!( + concat!( + r#"This macro cannot be used on the current target. + You can prevent it from being used in other architectures by + guarding it behind a cfg("#, + stringify!($cfg), + ")." + ) + ) + }; + )* + $( + ($bind_feature) => { $macro_name!($feature_impl) }; + )* + $( + ($nort_feature) => { + compile_error!( + concat!( + stringify!($nort_feature), + " feature cannot be detected at run-time" + ) + ) + }; + )* + ($t:tt,) => { + $macro_name!($t); + }; + ($t:tt) => { + compile_error!( + concat!( + concat!("unknown ", stringify!($target)), + concat!(" target feature: ", $t) + ) + ) + }; + } + + /// Each variant denotes a position in a bitset for a particular feature. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[allow(non_camel_case_types)] + #[derive(Copy, Clone)] + #[repr(u8)] + #[unstable(feature = "stdsimd_internal", issue = "none")] + #[cfg($cfg)] + pub(crate) enum Feature { + $( + $(#[$feature_comment])* + $feature, + )* + + // Do not add variants after last: + _last + } + + #[cfg($cfg)] + impl Feature { + pub(crate) fn to_str(self) -> &'static str { + match self { + $(Feature::$feature => $feature_lit,)* + Feature::_last => unreachable!(), + } + } + #[cfg(feature = "std_detect_env_override")] + pub(crate) fn from_str(s: &str) -> Result<Feature, ()> { + match s { + $($feature_lit => Ok(Feature::$feature),)* + _ => Err(()) + } + } + } + + /// Each function performs run-time feature detection for a single + /// feature. This allow us to use stability attributes on a per feature + /// basis. + /// + /// PLEASE: do not use this, it is an implementation detail subject + /// to change. + #[doc(hidden)] + #[cfg($cfg)] + pub mod __is_feature_detected { + $( + + /// PLEASE: do not use this, it is an implementation detail + /// subject to change. + #[inline] + #[doc(hidden)] + #[$stability_attr] + pub fn $feature() -> bool { + $crate::detect::check_for($crate::detect::Feature::$feature) + } + )* + } + }; +} diff --git a/library/stdarch/crates/std_detect/src/detect/mod.rs b/library/stdarch/crates/std_detect/src/detect/mod.rs new file mode 100644 index 000000000..2bca84ca1 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/mod.rs @@ -0,0 +1,104 @@ +//! This module implements run-time feature detection. +//! +//! The `is_{arch}_feature_detected!("feature-name")` macros take the name of a +//! feature as a string-literal, and return a boolean indicating whether the +//! feature is enabled at run-time or not. +//! +//! These macros do two things: +//! * map the string-literal into an integer stored as a `Feature` enum, +//! * call a `os::check_for(x: Feature)` function that returns `true` if the +//! feature is enabled. +//! +//! The `Feature` enums are also implemented in the `arch/{target_arch}.rs` +//! modules. +//! +//! The `check_for` functions are, in general, Operating System dependent. Most +//! architectures do not allow user-space programs to query the feature bits +//! due to security concerns (x86 is the big exception). These functions are +//! implemented in the `os/{target_os}.rs` modules. + +use cfg_if::cfg_if; + +#[macro_use] +mod macros; + +mod arch; + +// This module needs to be public because the `is_{arch}_feature_detected!` +// macros expand calls to items within it in user crates. +#[doc(hidden)] +pub use self::arch::__is_feature_detected; + +pub(crate) use self::arch::Feature; + +mod bit; +mod cache; + +cfg_if! { + if #[cfg(miri)] { + // When running under miri all target-features that are not enabled at + // compile-time are reported as disabled at run-time. + // + // For features for which `cfg(target_feature)` returns true, + // this run-time detection logic is never called. + #[path = "os/other.rs"] + mod os; + } else if #[cfg(any(target_arch = "x86", target_arch = "x86_64"))] { + // On x86/x86_64 no OS specific functionality is required. + #[path = "os/x86.rs"] + mod os; + } else if #[cfg(all(target_os = "linux", feature = "libc"))] { + #[path = "os/linux/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "freebsd", feature = "libc"))] { + #[cfg(target_arch = "aarch64")] + #[path = "os/aarch64.rs"] + mod aarch64; + #[path = "os/freebsd/mod.rs"] + mod os; + } else if #[cfg(all(target_os = "windows", target_arch = "aarch64"))] { + #[path = "os/windows/aarch64.rs"] + mod os; + } else { + #[path = "os/other.rs"] + mod os; + } +} + +/// Performs run-time feature detection. +#[inline] +#[allow(dead_code)] +fn check_for(x: Feature) -> bool { + cache::test(x as u32) +} + +/// Returns an `Iterator<Item=(&'static str, bool)>` where +/// `Item.0` is the feature name, and `Item.1` is a `bool` which +/// is `true` if the feature is supported by the host and `false` otherwise. +#[unstable(feature = "stdsimd", issue = "27731")] +pub fn features() -> impl Iterator<Item = (&'static str, bool)> { + cfg_if! { + if #[cfg(any( + target_arch = "x86", + target_arch = "x86_64", + target_arch = "arm", + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "powerpc", + target_arch = "powerpc64", + target_arch = "mips", + target_arch = "mips64", + ))] { + (0_u8..Feature::_last as u8).map(|discriminant: u8| { + #[allow(bindings_with_variant_name)] // RISC-V has Feature::f + let f: Feature = unsafe { core::mem::transmute(discriminant) }; + let name: &'static str = f.to_str(); + let enabled: bool = check_for(f); + (name, enabled) + }) + } else { + None.into_iter() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs new file mode 100644 index 000000000..e0e62ee33 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/aarch64.rs @@ -0,0 +1,104 @@ +//! Run-time feature detection for Aarch64 on any OS that emulates the mrs instruction. +//! +//! On FreeBSD >= 12.0, Linux >= 4.11 and other operating systems, it is possible to use +//! privileged system registers from userspace to check CPU feature support. +//! +//! AArch64 system registers ID_AA64ISAR0_EL1, ID_AA64PFR0_EL1, ID_AA64ISAR1_EL1 +//! have bits dedicated to features like AdvSIMD, CRC32, AES, atomics (LSE), etc. +//! Each part of the register indicates the level of support for a certain feature, e.g. +//! when ID_AA64ISAR0_EL1\[7:4\] is >= 1, AES is supported; when it's >= 2, PMULL is supported. +//! +//! For proper support of [SoCs where different cores have different capabilities](https://medium.com/@jadr2ddude/a-big-little-problem-a-tale-of-big-little-gone-wrong-e7778ce744bb), +//! the OS has to always report only the features supported by all cores, like [FreeBSD does](https://reviews.freebsd.org/D17137#393947). +//! +//! References: +//! +//! - [Zircon implementation](https://fuchsia.googlesource.com/zircon/+/master/kernel/arch/arm64/feature.cpp) +//! - [Linux documentation](https://www.kernel.org/doc/Documentation/arm64/cpu-feature-registers.txt) + +use crate::detect::{cache, Feature}; +use core::arch::asm; + +/// Try to read the features from the system registers. +/// +/// This will cause SIGILL if the current OS is not trapping the mrs instruction. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // ID_AA64ISAR0_EL1 - Instruction Set Attribute Register 0 + let aa64isar0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR0_EL1", + out(reg) aa64isar0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + enable_feature(Feature::pmull, bits_shift(aa64isar0, 7, 4) >= 2); + enable_feature(Feature::tme, bits_shift(aa64isar0, 27, 24) == 1); + enable_feature(Feature::lse, bits_shift(aa64isar0, 23, 20) >= 1); + enable_feature(Feature::crc, bits_shift(aa64isar0, 19, 16) >= 1); + + // ID_AA64PFR0_EL1 - Processor Feature Register 0 + let aa64pfr0: u64; + unsafe { + asm!( + "mrs {}, ID_AA64PFR0_EL1", + out(reg) aa64pfr0, + options(pure, nomem, preserves_flags, nostack) + ); + } + + let fp = bits_shift(aa64pfr0, 19, 16) < 0xF; + let fphp = bits_shift(aa64pfr0, 19, 16) >= 1; + let asimd = bits_shift(aa64pfr0, 23, 20) < 0xF; + let asimdhp = bits_shift(aa64pfr0, 23, 20) >= 1; + enable_feature(Feature::fp, fp); + enable_feature(Feature::fp16, fphp); + // SIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + enable_feature(Feature::asimd, fp && asimd && (!fphp | asimdhp)); + // SIMD extensions require SIMD support: + enable_feature(Feature::aes, asimd && bits_shift(aa64isar0, 7, 4) >= 1); + let sha1 = bits_shift(aa64isar0, 11, 8) >= 1; + let sha2 = bits_shift(aa64isar0, 15, 12) >= 1; + enable_feature(Feature::sha2, asimd && sha1 && sha2); + enable_feature(Feature::rdm, asimd && bits_shift(aa64isar0, 31, 28) >= 1); + enable_feature( + Feature::dotprod, + asimd && bits_shift(aa64isar0, 47, 44) >= 1, + ); + enable_feature(Feature::sve, asimd && bits_shift(aa64pfr0, 35, 32) >= 1); + + // ID_AA64ISAR1_EL1 - Instruction Set Attribute Register 1 + let aa64isar1: u64; + unsafe { + asm!( + "mrs {}, ID_AA64ISAR1_EL1", + out(reg) aa64isar1, + options(pure, nomem, preserves_flags, nostack) + ); + } + + // Check for either APA or API field + enable_feature(Feature::paca, bits_shift(aa64isar1, 11, 4) >= 1); + enable_feature(Feature::rcpc, bits_shift(aa64isar1, 23, 20) >= 1); + // Check for either GPA or GPI field + enable_feature(Feature::pacg, bits_shift(aa64isar1, 31, 24) >= 1); + } + + value +} + +#[inline] +fn bits_shift(x: u64, high: usize, low: usize) -> u64 { + (x >> low) & ((1 << (high - low + 1)) - 1) +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs new file mode 100644 index 000000000..7d972b373 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/aarch64.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for Aarch64 on FreeBSD. + +pub(crate) use super::super::aarch64::detect_features; + +#[cfg(test)] +mod tests { + #[test] + fn dump() { + println!("asimd: {:?}", is_aarch64_feature_detected!("asimd")); + println!("pmull: {:?}", is_aarch64_feature_detected!("pmull")); + println!("fp: {:?}", is_aarch64_feature_detected!("fp")); + println!("fp16: {:?}", is_aarch64_feature_detected!("fp16")); + println!("sve: {:?}", is_aarch64_feature_detected!("sve")); + println!("crc: {:?}", is_aarch64_feature_detected!("crc")); + println!("lse: {:?}", is_aarch64_feature_detected!("lse")); + println!("rdm: {:?}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {:?}", is_aarch64_feature_detected!("rcpc")); + println!("dotprod: {:?}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {:?}", is_aarch64_feature_detected!("tme")); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs new file mode 100644 index 000000000..4c9d763b4 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/arm.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for ARM on FreeBSD + +use super::auxvec; +use crate::detect::{cache, Feature}; + +/// Try to read the features from the auxiliary vector +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, auxv.hwcap & 0x00001000 != 0); + enable_feature(&mut value, Feature::pmull, auxv.hwcap2 & 0x00000002 != 0); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs new file mode 100644 index 000000000..29fcc8cb0 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/auxvec.rs @@ -0,0 +1,102 @@ +//! Parses ELF auxiliary vectors. +#![cfg_attr( + any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc64", + target_arch = "riscv64" + ), + allow(dead_code) +)] + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 25; +/// Key to access the CPU Hardware capabilities 2 bitfield. +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For FreeBSD, they are +/// defined in [sys/elf_common.h][elf_common_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. +/// +/// [elf_common.h]: https://svnweb.freebsd.org/base/release/12.0.0/sys/sys/elf_common.h?revision=341707 +pub(crate) fn auxv() -> Result<AuxVec, ()> { + if let Ok(hwcap) = archauxv(AT_HWCAP) { + if let Ok(hwcap2) = archauxv(AT_HWCAP2) { + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + Err(()) +} + +/// Tries to read the `key` from the auxiliary vector. +fn archauxv(key: usize) -> Result<usize, ()> { + use core::mem; + + #[derive(Copy, Clone)] + #[repr(C)] + pub struct Elf_Auxinfo { + pub a_type: usize, + pub a_un: unnamed, + } + #[derive(Copy, Clone)] + #[repr(C)] + pub union unnamed { + pub a_val: libc::c_long, + pub a_ptr: *mut libc::c_void, + pub a_fcn: Option<unsafe extern "C" fn() -> ()>, + } + + let mut auxv: [Elf_Auxinfo; 27] = [Elf_Auxinfo { + a_type: 0, + a_un: unnamed { a_val: 0 }, + }; 27]; + + let mut len: libc::c_uint = mem::size_of_val(&auxv) as libc::c_uint; + + unsafe { + let mut mib = [ + libc::CTL_KERN, + libc::KERN_PROC, + libc::KERN_PROC_AUXV, + libc::getpid(), + ]; + + let ret = libc::sysctl( + mib.as_mut_ptr(), + mib.len() as u32, + &mut auxv as *mut _ as *mut _, + &mut len as *mut _ as *mut _, + 0 as *mut libc::c_void, + 0, + ); + + if ret != -1 { + for i in 0..auxv.len() { + if auxv[i].a_type == key { + return Ok(auxv[i].a_un.a_val as usize); + } + } + } + } + return Ok(0); +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs new file mode 100644 index 000000000..ade7fb626 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/mod.rs @@ -0,0 +1,22 @@ +//! Run-time feature detection on FreeBSD + +mod auxvec; + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(target_arch = "powerpc64")] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs new file mode 100644 index 000000000..6bfab631a --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/freebsd/powerpc.rs @@ -0,0 +1,21 @@ +//! Run-time feature detection for PowerPC on FreeBSD. + +use super::auxvec; +use crate::detect::{cache, Feature}; + +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs new file mode 100644 index 000000000..b6a2e5218 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/aarch64.rs @@ -0,0 +1,290 @@ +//! Run-time feature detection for Aarch64 on Linux. + +use super::auxvec; +use crate::detect::{bit, cache, Feature}; + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +pub(crate) fn detect_features() -> cache::Initializer { + if let Ok(auxv) = auxvec::auxv() { + let hwcap: AtHwcap = auxv.into(); + return hwcap.cache(); + } + #[cfg(feature = "std_detect_file_io")] + if let Ok(c) = super::cpuinfo::CpuInfo::new() { + let hwcap: AtHwcap = c.into(); + return hwcap.cache(); + } + cache::Initializer::default() +} + +/// These values are part of the platform-specific [asm/hwcap.h][hwcap] . +/// +/// The names match those used for cpuinfo. +/// +/// [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h +struct AtHwcap { + fp: bool, // 0 + asimd: bool, // 1 + // evtstrm: bool, // 2 No LLVM support + aes: bool, // 3 + pmull: bool, // 4 + sha1: bool, // 5 + sha2: bool, // 6 + crc32: bool, // 7 + atomics: bool, // 8 + fphp: bool, // 9 + asimdhp: bool, // 10 + // cpuid: bool, // 11 No LLVM support + asimdrdm: bool, // 12 + jscvt: bool, // 13 + fcma: bool, // 14 + lrcpc: bool, // 15 + dcpop: bool, // 16 + sha3: bool, // 17 + sm3: bool, // 18 + sm4: bool, // 19 + asimddp: bool, // 20 + sha512: bool, // 21 + sve: bool, // 22 + fhm: bool, // 23 + dit: bool, // 24 + uscat: bool, // 25 + ilrcpc: bool, // 26 + flagm: bool, // 27 + ssbs: bool, // 28 + sb: bool, // 29 + paca: bool, // 30 + pacg: bool, // 31 + dcpodp: bool, // 32 + sve2: bool, // 33 + sveaes: bool, // 34 + // svepmull: bool, // 35 No LLVM support + svebitperm: bool, // 36 + svesha3: bool, // 37 + svesm4: bool, // 38 + // flagm2: bool, // 39 No LLVM support + frint: bool, // 40 + // svei8mm: bool, // 41 See i8mm feature + svef32mm: bool, // 42 + svef64mm: bool, // 43 + // svebf16: bool, // 44 See bf16 feature + i8mm: bool, // 45 + bf16: bool, // 46 + // dgh: bool, // 47 No LLVM support + rng: bool, // 48 + bti: bool, // 49 + mte: bool, // 50 +} + +impl From<auxvec::AuxVec> for AtHwcap { + /// Reads AtHwcap from the auxiliary vector. + fn from(auxv: auxvec::AuxVec) -> Self { + AtHwcap { + fp: bit::test(auxv.hwcap, 0), + asimd: bit::test(auxv.hwcap, 1), + // evtstrm: bit::test(auxv.hwcap, 2), + aes: bit::test(auxv.hwcap, 3), + pmull: bit::test(auxv.hwcap, 4), + sha1: bit::test(auxv.hwcap, 5), + sha2: bit::test(auxv.hwcap, 6), + crc32: bit::test(auxv.hwcap, 7), + atomics: bit::test(auxv.hwcap, 8), + fphp: bit::test(auxv.hwcap, 9), + asimdhp: bit::test(auxv.hwcap, 10), + // cpuid: bit::test(auxv.hwcap, 11), + asimdrdm: bit::test(auxv.hwcap, 12), + jscvt: bit::test(auxv.hwcap, 13), + fcma: bit::test(auxv.hwcap, 14), + lrcpc: bit::test(auxv.hwcap, 15), + dcpop: bit::test(auxv.hwcap, 16), + sha3: bit::test(auxv.hwcap, 17), + sm3: bit::test(auxv.hwcap, 18), + sm4: bit::test(auxv.hwcap, 19), + asimddp: bit::test(auxv.hwcap, 20), + sha512: bit::test(auxv.hwcap, 21), + sve: bit::test(auxv.hwcap, 22), + fhm: bit::test(auxv.hwcap, 23), + dit: bit::test(auxv.hwcap, 24), + uscat: bit::test(auxv.hwcap, 25), + ilrcpc: bit::test(auxv.hwcap, 26), + flagm: bit::test(auxv.hwcap, 27), + ssbs: bit::test(auxv.hwcap, 28), + sb: bit::test(auxv.hwcap, 29), + paca: bit::test(auxv.hwcap, 30), + pacg: bit::test(auxv.hwcap, 31), + dcpodp: bit::test(auxv.hwcap, 32), + sve2: bit::test(auxv.hwcap, 33), + sveaes: bit::test(auxv.hwcap, 34), + // svepmull: bit::test(auxv.hwcap, 35), + svebitperm: bit::test(auxv.hwcap, 36), + svesha3: bit::test(auxv.hwcap, 37), + svesm4: bit::test(auxv.hwcap, 38), + // flagm2: bit::test(auxv.hwcap, 39), + frint: bit::test(auxv.hwcap, 40), + // svei8mm: bit::test(auxv.hwcap, 41), + svef32mm: bit::test(auxv.hwcap, 42), + svef64mm: bit::test(auxv.hwcap, 43), + // svebf16: bit::test(auxv.hwcap, 44), + i8mm: bit::test(auxv.hwcap, 45), + bf16: bit::test(auxv.hwcap, 46), + // dgh: bit::test(auxv.hwcap, 47), + rng: bit::test(auxv.hwcap, 48), + bti: bit::test(auxv.hwcap, 49), + mte: bit::test(auxv.hwcap, 50), + } + } +} + +#[cfg(feature = "std_detect_file_io")] +impl From<super::cpuinfo::CpuInfo> for AtHwcap { + /// Reads AtHwcap from /proc/cpuinfo . + fn from(c: super::cpuinfo::CpuInfo) -> Self { + let f = &c.field("Features"); + AtHwcap { + // 64-bit names. FIXME: In 32-bit compatibility mode /proc/cpuinfo will + // map some of the 64-bit names to some 32-bit feature names. This does not + // cover that yet. + fp: f.has("fp"), + asimd: f.has("asimd"), + // evtstrm: f.has("evtstrm"), + aes: f.has("aes"), + pmull: f.has("pmull"), + sha1: f.has("sha1"), + sha2: f.has("sha2"), + crc32: f.has("crc32"), + atomics: f.has("atomics"), + fphp: f.has("fphp"), + asimdhp: f.has("asimdhp"), + // cpuid: f.has("cpuid"), + asimdrdm: f.has("asimdrdm"), + jscvt: f.has("jscvt"), + fcma: f.has("fcma"), + lrcpc: f.has("lrcpc"), + dcpop: f.has("dcpop"), + sha3: f.has("sha3"), + sm3: f.has("sm3"), + sm4: f.has("sm4"), + asimddp: f.has("asimddp"), + sha512: f.has("sha512"), + sve: f.has("sve"), + fhm: f.has("asimdfhm"), + dit: f.has("dit"), + uscat: f.has("uscat"), + ilrcpc: f.has("ilrcpc"), + flagm: f.has("flagm"), + ssbs: f.has("ssbs"), + sb: f.has("sb"), + paca: f.has("paca"), + pacg: f.has("pacg"), + dcpodp: f.has("dcpodp"), + sve2: f.has("sve2"), + sveaes: f.has("sveaes"), + // svepmull: f.has("svepmull"), + svebitperm: f.has("svebitperm"), + svesha3: f.has("svesha3"), + svesm4: f.has("svesm4"), + // flagm2: f.has("flagm2"), + frint: f.has("frint"), + // svei8mm: f.has("svei8mm"), + svef32mm: f.has("svef32mm"), + svef64mm: f.has("svef64mm"), + // svebf16: f.has("svebf16"), + i8mm: f.has("i8mm"), + bf16: f.has("bf16"), + // dgh: f.has("dgh"), + rng: f.has("rng"), + bti: f.has("bti"), + mte: f.has("mte"), + } + } +} + +impl AtHwcap { + /// Initializes the cache from the feature -bits. + /// + /// The feature dependencies here come directly from LLVM's feature definintions: + /// https://github.com/llvm/llvm-project/blob/main/llvm/lib/Target/AArch64/AArch64.td + fn cache(self) -> cache::Initializer { + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + enable_feature(Feature::fp, self.fp); + // Half-float support requires float support + enable_feature(Feature::fp16, self.fp && self.fphp); + // FHM (fp16fml in LLVM) requires half float support + enable_feature(Feature::fhm, self.fphp && self.fhm); + enable_feature(Feature::pmull, self.pmull); + enable_feature(Feature::crc, self.crc32); + enable_feature(Feature::lse, self.atomics); + enable_feature(Feature::lse2, self.uscat); + enable_feature(Feature::rcpc, self.lrcpc); + // RCPC2 (rcpc-immo in LLVM) requires RCPC support + enable_feature(Feature::rcpc2, self.ilrcpc && self.lrcpc); + enable_feature(Feature::dit, self.dit); + enable_feature(Feature::flagm, self.flagm); + enable_feature(Feature::ssbs, self.ssbs); + enable_feature(Feature::sb, self.sb); + enable_feature(Feature::paca, self.paca); + enable_feature(Feature::pacg, self.pacg); + enable_feature(Feature::dpb, self.dcpop); + enable_feature(Feature::dpb2, self.dcpodp); + enable_feature(Feature::rand, self.rng); + enable_feature(Feature::bti, self.bti); + enable_feature(Feature::mte, self.mte); + // jsconv requires float support + enable_feature(Feature::jsconv, self.jscvt && self.fp); + enable_feature(Feature::rdm, self.asimdrdm); + enable_feature(Feature::dotprod, self.asimddp); + enable_feature(Feature::frintts, self.frint); + + // FEAT_I8MM & FEAT_BF16 also include optional SVE components which linux exposes + // separately. We ignore that distinction here. + enable_feature(Feature::i8mm, self.i8mm); + enable_feature(Feature::bf16, self.bf16); + + // ASIMD support requires float support - if half-floats are + // supported, it also requires half-float support: + let asimd = self.fp && self.asimd && (!self.fphp | self.asimdhp); + enable_feature(Feature::asimd, asimd); + // ASIMD extensions require ASIMD support: + enable_feature(Feature::fcma, self.fcma && asimd); + enable_feature(Feature::sve, self.sve && asimd); + + // SVE extensions require SVE & ASIMD + enable_feature(Feature::f32mm, self.svef32mm && self.sve && asimd); + enable_feature(Feature::f64mm, self.svef64mm && self.sve && asimd); + + // Cryptographic extensions require ASIMD + enable_feature(Feature::aes, self.aes && asimd); + enable_feature(Feature::sha2, self.sha1 && self.sha2 && asimd); + // SHA512/SHA3 require SHA1 & SHA256 + enable_feature( + Feature::sha3, + self.sha512 && self.sha3 && self.sha1 && self.sha2 && asimd, + ); + enable_feature(Feature::sm4, self.sm3 && self.sm4 && asimd); + + // SVE2 requires SVE + let sve2 = self.sve2 && self.sve && asimd; + enable_feature(Feature::sve2, sve2); + // SVE2 extensions require SVE2 and crypto features + enable_feature(Feature::sve2_aes, self.sveaes && sve2 && self.aes); + enable_feature( + Feature::sve2_sm4, + self.svesm4 && sve2 && self.sm3 && self.sm4, + ); + enable_feature( + Feature::sve2_sha3, + self.svesha3 && sve2 && self.sha512 && self.sha3 && self.sha1 && self.sha2, + ); + enable_feature(Feature::sve2_bitperm, self.svebitperm && self.sve2); + } + value + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs new file mode 100644 index 000000000..7383e487f --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/arm.rs @@ -0,0 +1,79 @@ +//! Run-time feature detection for ARM on Linux. + +use super::auxvec; +use crate::detect::{bit, cache, Feature}; + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::neon, bit::test(auxv.hwcap, 12)); + enable_feature(&mut value, Feature::pmull, bit::test(auxv.hwcap2, 1)); + enable_feature(&mut value, Feature::crc, bit::test(auxv.hwcap2, 4)); + enable_feature( + &mut value, + Feature::crypto, + bit::test(auxv.hwcap2, 0) + && bit::test(auxv.hwcap2, 1) + && bit::test(auxv.hwcap2, 2) + && bit::test(auxv.hwcap2, 3), + ); + enable_feature(&mut value, Feature::aes, bit::test(auxv.hwcap2, 0)); + // SHA2 requires SHA1 & SHA2 features + enable_feature( + &mut value, + Feature::sha2, + bit::test(auxv.hwcap2, 2) && bit::test(auxv.hwcap2, 3), + ); + return value; + } + + #[cfg(feature = "std_detect_file_io")] + if let Ok(c) = super::cpuinfo::CpuInfo::new() { + enable_feature( + &mut value, + Feature::neon, + c.field("Features").has("neon") && !has_broken_neon(&c), + ); + enable_feature(&mut value, Feature::pmull, c.field("Features").has("pmull")); + enable_feature(&mut value, Feature::crc, c.field("Features").has("crc32")); + enable_feature( + &mut value, + Feature::crypto, + c.field("Features").has("aes") + && c.field("Features").has("pmull") + && c.field("Features").has("sha1") + && c.field("Features").has("sha2"), + ); + enable_feature(&mut value, Feature::aes, c.field("Features").has("aes")); + enable_feature( + &mut value, + Feature::sha2, + c.field("Features").has("sha1") && c.field("Features").has("sha2"), + ); + return value; + } + value +} + +/// Is the CPU known to have a broken NEON unit? +/// +/// See https://crbug.com/341598. +#[cfg(feature = "std_detect_file_io")] +fn has_broken_neon(cpuinfo: &super::cpuinfo::CpuInfo) -> bool { + cpuinfo.field("CPU implementer") == "0x51" + && cpuinfo.field("CPU architecture") == "7" + && cpuinfo.field("CPU variant") == "0x1" + && cpuinfo.field("CPU part") == "0x04d" + && cpuinfo.field("CPU revision") == "0" +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs new file mode 100644 index 000000000..e6447d0cd --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/auxvec.rs @@ -0,0 +1,366 @@ +//! Parses ELF auxiliary vectors. +#![allow(dead_code)] + +pub(crate) const AT_NULL: usize = 0; + +/// Key to access the CPU Hardware capabilities bitfield. +pub(crate) const AT_HWCAP: usize = 16; +/// Key to access the CPU Hardware capabilities 2 bitfield. +#[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +pub(crate) const AT_HWCAP2: usize = 26; + +/// Cache HWCAP bitfields of the ELF Auxiliary Vector. +/// +/// If an entry cannot be read all the bits in the bitfield are set to zero. +/// This should be interpreted as all the features being disabled. +#[derive(Debug, Copy, Clone)] +pub(crate) struct AuxVec { + pub hwcap: usize, + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + pub hwcap2: usize, +} + +/// ELF Auxiliary Vector +/// +/// The auxiliary vector is a memory region in a running ELF program's stack +/// composed of (key: usize, value: usize) pairs. +/// +/// The keys used in the aux vector are platform dependent. For Linux, they are +/// defined in [linux/auxvec.h][auxvec_h]. The hardware capabilities of a given +/// CPU can be queried with the `AT_HWCAP` and `AT_HWCAP2` keys. +/// +/// There is no perfect way of reading the auxiliary vector. +/// +/// - If the `std_detect_dlsym_getauxval` cargo feature is enabled, this will use +/// `getauxval` if its linked to the binary, and otherwise proceed to a fallback implementation. +/// When `std_detect_dlsym_getauxval` is disabled, this will assume that `getauxval` is +/// linked to the binary - if that is not the case the behavior is undefined. +/// - Otherwise, if the `std_detect_file_io` cargo feature is enabled, it will +/// try to read `/proc/self/auxv`. +/// - If that fails, this function returns an error. +/// +/// Note that run-time feature detection is not invoked for features that can +/// be detected at compile-time. Also note that if this function returns an +/// error, cpuinfo still can (and will) be used to try to perform run-time +/// feature detecton on some platforms. +/// +/// For more information about when `getauxval` is available check the great +/// [`auxv` crate documentation][auxv_docs]. +/// +/// [auxvec_h]: https://github.com/torvalds/linux/blob/master/include/uapi/linux/auxvec.h +/// [auxv_docs]: https://docs.rs/auxv/0.3.3/auxv/ +pub(crate) fn auxv() -> Result<AuxVec, ()> { + #[cfg(feature = "std_detect_dlsym_getauxval")] + { + // Try to call a dynamically-linked getauxval function. + if let Ok(hwcap) = getauxval(AT_HWCAP) { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64" + ))] + { + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + { + if let Ok(hwcap2) = getauxval(AT_HWCAP2) { + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + drop(hwcap); + } + } + + #[cfg(not(feature = "std_detect_dlsym_getauxval"))] + { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64" + ))] + { + let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize }; + if hwcap != 0 { + return Ok(AuxVec { hwcap }); + } + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + { + let hwcap = unsafe { libc::getauxval(AT_HWCAP as libc::c_ulong) as usize }; + let hwcap2 = unsafe { libc::getauxval(AT_HWCAP2 as libc::c_ulong) as usize }; + if hwcap != 0 && hwcap2 != 0 { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + } + + #[cfg(feature = "std_detect_file_io")] + { + // If calling getauxval fails, try to read the auxiliary vector from + // its file: + auxv_from_file("/proc/self/auxv") + } + #[cfg(not(feature = "std_detect_file_io"))] + { + Err(()) + } +} + +/// Tries to read the `key` from the auxiliary vector by calling the +/// dynamically-linked `getauxval` function. If the function is not linked, +/// this function return `Err`. +#[cfg(feature = "std_detect_dlsym_getauxval")] +fn getauxval(key: usize) -> Result<usize, ()> { + use libc; + pub type F = unsafe extern "C" fn(usize) -> usize; + unsafe { + let ptr = libc::dlsym(libc::RTLD_DEFAULT, "getauxval\0".as_ptr() as *const _); + if ptr.is_null() { + return Err(()); + } + + let ffi_getauxval: F = core::mem::transmute(ptr); + Ok(ffi_getauxval(key)) + } +} + +/// Tries to read the auxiliary vector from the `file`. If this fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_file(file: &str) -> Result<AuxVec, ()> { + let file = super::read_file(file)?; + + // See <https://github.com/torvalds/linux/blob/v3.19/include/uapi/linux/auxvec.h>. + // + // The auxiliary vector contains at most 32 (key,value) fields: from + // `AT_EXECFN = 31` to `AT_NULL = 0`. That is, a buffer of + // 2*32 `usize` elements is enough to read the whole vector. + let mut buf = [0_usize; 64]; + let len = core::mem::size_of_val(&buf).max(file.len()); + unsafe { + core::ptr::copy_nonoverlapping(file.as_ptr(), buf.as_mut_ptr() as *mut u8, len); + } + + auxv_from_buf(&buf) +} + +/// Tries to interpret the `buffer` as an auxiliary vector. If that fails, this +/// function returns `Err`. +#[cfg(feature = "std_detect_file_io")] +fn auxv_from_buf(buf: &[usize; 64]) -> Result<AuxVec, ()> { + // Targets with only AT_HWCAP: + #[cfg(any( + target_arch = "aarch64", + target_arch = "riscv32", + target_arch = "riscv64", + target_arch = "mips", + target_arch = "mips64", + ))] + { + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => return Ok(AuxVec { hwcap: el[1] }), + _ => (), + } + } + } + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + { + let mut hwcap = None; + let mut hwcap2 = None; + for el in buf.chunks(2) { + match el[0] { + AT_NULL => break, + AT_HWCAP => hwcap = Some(el[1]), + AT_HWCAP2 => hwcap2 = Some(el[1]), + _ => (), + } + } + + if let (Some(hwcap), Some(hwcap2)) = (hwcap, hwcap2) { + return Ok(AuxVec { hwcap, hwcap2 }); + } + } + drop(buf); + Err(()) +} + +#[cfg(test)] +mod tests { + extern crate auxv as auxv_crate; + use super::*; + + // Reads the Auxiliary Vector key from /proc/self/auxv + // using the auxv crate. + #[cfg(feature = "std_detect_file_io")] + fn auxv_crate_getprocfs(key: usize) -> Option<usize> { + use self::auxv_crate::procfs::search_procfs_auxv; + use self::auxv_crate::AuxvType; + let k = key as AuxvType; + match search_procfs_auxv(&[k]) { + Ok(v) => Some(v[&k] as usize), + Err(_) => None, + } + } + + // Reads the Auxiliary Vector key from getauxval() + // using the auxv crate. + #[cfg(not(any(target_arch = "mips", target_arch = "mips64")))] + fn auxv_crate_getauxval(key: usize) -> Option<usize> { + use self::auxv_crate::getauxval::Getauxval; + use self::auxv_crate::AuxvType; + let q = auxv_crate::getauxval::NativeGetauxval {}; + match q.getauxval(key as AuxvType) { + Ok(v) => Some(v as usize), + Err(_) => None, + } + } + + // FIXME: on mips/mips64 getauxval returns 0, and /proc/self/auxv + // does not always contain the AT_HWCAP key under qemu. + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + #[test] + fn auxv_crate() { + let v = auxv(); + if let Some(hwcap) = auxv_crate_getauxval(AT_HWCAP) { + let rt_hwcap = v.expect("failed to find hwcap key").hwcap; + assert_eq!(rt_hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + { + if let Some(hwcap2) = auxv_crate_getauxval(AT_HWCAP2) { + let rt_hwcap2 = v.expect("failed to find hwcap2 key").hwcap2; + assert_eq!(rt_hwcap2, hwcap2); + } + } + } + + #[test] + fn auxv_dump() { + if let Ok(auxvec) = auxv() { + println!("{:?}", auxvec); + } else { + println!("both getauxval() and reading /proc/self/auxv failed!"); + } + } + + #[cfg(feature = "std_detect_file_io")] + cfg_if::cfg_if! { + if #[cfg(target_arch = "arm")] { + #[test] + fn linux_rpi3() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-rpi3.auxv"); + println!("file: {}", file); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 4174038); + assert_eq!(v.hwcap2, 16); + } + + #[test] + #[should_panic] + fn linux_macos_vb() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv"); + println!("file: {}", file); + let v = auxv_from_file(file).unwrap(); + // this file is incomplete (contains hwcap but not hwcap2), we + // want to fall back to /proc/cpuinfo in this case, so + // reading should fail. assert_eq!(v.hwcap, 126614527); + // assert_eq!(v.hwcap2, 0); + let _ = v; + } + } else if #[cfg(target_arch = "aarch64")] { + #[test] + fn linux_x64() { + let file = concat!(env!("CARGO_MANIFEST_DIR"), "/src/detect/test_data/linux-x64-i7-6850k.auxv"); + println!("file: {}", file); + let v = auxv_from_file(file).unwrap(); + assert_eq!(v.hwcap, 3219913727); + } + } + } + + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_dump_procfs() { + if let Ok(auxvec) = auxv_from_file("/proc/self/auxv") { + println!("{:?}", auxvec); + } else { + println!("reading /proc/self/auxv failed!"); + } + } + + #[cfg(any( + target_arch = "aarch64", + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + #[test] + #[cfg(feature = "std_detect_file_io")] + fn auxv_crate_procfs() { + let v = auxv(); + if let Some(hwcap) = auxv_crate_getprocfs(AT_HWCAP) { + assert_eq!(v.unwrap().hwcap, hwcap); + } + + // Targets with AT_HWCAP and AT_HWCAP2: + #[cfg(any( + target_arch = "arm", + target_arch = "powerpc", + target_arch = "powerpc64" + ))] + { + if let Some(hwcap2) = auxv_crate_getprocfs(AT_HWCAP2) { + assert_eq!(v.unwrap().hwcap2, hwcap2); + } + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs new file mode 100644 index 000000000..48a5c9728 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/cpuinfo.rs @@ -0,0 +1,331 @@ +//! Parses /proc/cpuinfo +#![cfg_attr(not(target_arch = "arm"), allow(dead_code))] + +use alloc::string::String; + +/// cpuinfo +pub(crate) struct CpuInfo { + raw: String, +} + +impl CpuInfo { + /// Reads /proc/cpuinfo into CpuInfo. + pub(crate) fn new() -> Result<Self, ()> { + let raw = super::read_file("/proc/cpuinfo")?; + Ok(Self { + raw: String::from_utf8(raw).map_err(|_| ())?, + }) + } + /// Returns the value of the cpuinfo `field`. + pub(crate) fn field(&self, field: &str) -> CpuInfoField<'_> { + for l in self.raw.lines() { + if l.trim().starts_with(field) { + return CpuInfoField::new(l.split(": ").nth(1)); + } + } + CpuInfoField(None) + } + + /// Returns the `raw` contents of `/proc/cpuinfo` + #[cfg(test)] + fn raw(&self) -> &String { + &self.raw + } + + #[cfg(test)] + fn from_str(other: &str) -> Result<Self, ()> { + Ok(Self { + raw: String::from(other), + }) + } +} + +/// Field of cpuinfo +#[derive(Debug)] +pub(crate) struct CpuInfoField<'a>(Option<&'a str>); + +impl<'a> PartialEq<&'a str> for CpuInfoField<'a> { + fn eq(&self, other: &&'a str) -> bool { + match self.0 { + None => other.is_empty(), + Some(f) => f == other.trim(), + } + } +} + +impl<'a> CpuInfoField<'a> { + pub(crate) fn new<'b>(v: Option<&'b str>) -> CpuInfoField<'b> { + match v { + None => CpuInfoField::<'b>(None), + Some(f) => CpuInfoField::<'b>(Some(f.trim())), + } + } + /// Does the field exist? + #[cfg(test)] + pub(crate) fn exists(&self) -> bool { + self.0.is_some() + } + /// Does the field contain `other`? + pub(crate) fn has(&self, other: &str) -> bool { + match self.0 { + None => other.is_empty(), + Some(f) => { + let other = other.trim(); + for v in f.split(' ') { + if v == other { + return true; + } + } + false + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn raw_dump() { + let cpuinfo = CpuInfo::new().unwrap(); + if cpuinfo.field("vendor_id") == "GenuineIntel" { + assert!(cpuinfo.field("flags").exists()); + assert!(!cpuinfo.field("vendor33_id").exists()); + assert!(cpuinfo.field("flags").has("sse")); + assert!(!cpuinfo.field("flags").has("avx314")); + } + println!("{}", cpuinfo.raw()); + } + + const CORE_DUO_T6500: &str = r"processor : 0 +vendor_id : GenuineIntel +cpu family : 6 +model : 23 +model name : Intel(R) Core(TM)2 Duo CPU T6500 @ 2.10GHz +stepping : 10 +microcode : 0xa0b +cpu MHz : 1600.000 +cache size : 2048 KB +physical id : 0 +siblings : 2 +core id : 0 +cpu cores : 2 +apicid : 0 +initial apicid : 0 +fdiv_bug : no +hlt_bug : no +f00f_bug : no +coma_bug : no +fpu : yes +fpu_exception : yes +cpuid level : 13 +wp : yes +flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm +bogomips : 4190.43 +clflush size : 64 +cache_alignment : 64 +address sizes : 36 bits physical, 48 bits virtual +power management: +"; + + #[test] + fn core_duo_t6500() { + let cpuinfo = CpuInfo::from_str(CORE_DUO_T6500).unwrap(); + assert_eq!(cpuinfo.field("vendor_id"), "GenuineIntel"); + assert_eq!(cpuinfo.field("cpu family"), "6"); + assert_eq!(cpuinfo.field("model"), "23"); + assert_eq!( + cpuinfo.field("model name"), + "Intel(R) Core(TM)2 Duo CPU T6500 @ 2.10GHz" + ); + assert_eq!( + cpuinfo.field("flags"), + "fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe nx lm constant_tsc arch_perfmon pebs bts aperfmperf pni dtes64 monitor ds_cpl est tm2 ssse3 cx16 xtpr pdcm sse4_1 xsave lahf_lm dtherm" + ); + assert!(cpuinfo.field("flags").has("fpu")); + assert!(cpuinfo.field("flags").has("dtherm")); + assert!(cpuinfo.field("flags").has("sse2")); + assert!(!cpuinfo.field("flags").has("avx")); + } + + const ARM_CORTEX_A53: &str = r"Processor : AArch64 Processor rev 3 (aarch64) + processor : 0 + processor : 1 + processor : 2 + processor : 3 + processor : 4 + processor : 5 + processor : 6 + processor : 7 + Features : fp asimd evtstrm aes pmull sha1 sha2 crc32 + CPU implementer : 0x41 + CPU architecture: AArch64 + CPU variant : 0x0 + CPU part : 0xd03 + CPU revision : 3 + + Hardware : HiKey Development Board + "; + + #[test] + fn arm_cortex_a53() { + let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A53).unwrap(); + assert_eq!( + cpuinfo.field("Processor"), + "AArch64 Processor rev 3 (aarch64)" + ); + assert_eq!( + cpuinfo.field("Features"), + "fp asimd evtstrm aes pmull sha1 sha2 crc32" + ); + assert!(cpuinfo.field("Features").has("pmull")); + assert!(!cpuinfo.field("Features").has("neon")); + assert!(cpuinfo.field("Features").has("asimd")); + } + + const ARM_CORTEX_A57: &str = r"Processor : Cortex A57 Processor rev 1 (aarch64) +processor : 0 +processor : 1 +processor : 2 +processor : 3 +Features : fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt +CPU implementer : 0x41 +CPU architecture: 8 +CPU variant : 0x1 +CPU part : 0xd07 +CPU revision : 1"; + + #[test] + fn arm_cortex_a57() { + let cpuinfo = CpuInfo::from_str(ARM_CORTEX_A57).unwrap(); + assert_eq!( + cpuinfo.field("Processor"), + "Cortex A57 Processor rev 1 (aarch64)" + ); + assert_eq!( + cpuinfo.field("Features"), + "fp asimd aes pmull sha1 sha2 crc32 wp half thumb fastmult vfp edsp neon vfpv3 tlsi vfpv4 idiva idivt" + ); + assert!(cpuinfo.field("Features").has("pmull")); + assert!(cpuinfo.field("Features").has("neon")); + assert!(cpuinfo.field("Features").has("asimd")); + } + + const RISCV_RV64GC: &str = r"processor : 0 +hart : 3 +isa : rv64imafdc +mmu : sv39 +uarch : sifive,u74-mc + +processor : 1 +hart : 1 +isa : rv64imafdc +mmu : sv39 +uarch : sifive,u74-mc + +processor : 2 +hart : 2 +isa : rv64imafdc +mmu : sv39 +uarch : sifive,u74-mc + +processor : 3 +hart : 4 +isa : rv64imafdc +mmu : sv39 +uarch : sifive,u74-mc"; + + #[test] + fn riscv_rv64gc() { + let cpuinfo = CpuInfo::from_str(RISCV_RV64GC).unwrap(); + assert_eq!(cpuinfo.field("isa"), "rv64imafdc"); + assert_eq!(cpuinfo.field("mmu"), "sv39"); + assert_eq!(cpuinfo.field("uarch"), "sifive,u74-mc"); + } + + const POWER8E_POWERKVM: &str = r"processor : 0 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +processor : 1 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +processor : 2 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +processor : 3 +cpu : POWER8E (raw), altivec supported +clock : 3425.000000MHz +revision : 2.1 (pvr 004b 0201) + +timebase : 512000000 +platform : pSeries +model : IBM pSeries (emulated by qemu) +machine : CHRP IBM pSeries (emulated by qemu)"; + + #[test] + fn power8_powerkvm() { + let cpuinfo = CpuInfo::from_str(POWER8E_POWERKVM).unwrap(); + assert_eq!(cpuinfo.field("cpu"), "POWER8E (raw), altivec supported"); + + assert!(cpuinfo.field("cpu").has("altivec")); + } + + const POWER5P: &str = r"processor : 0 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 1 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 2 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 3 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 4 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 5 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 6 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +processor : 7 +cpu : POWER5+ (gs) +clock : 1900.098000MHz +revision : 2.1 (pvr 003b 0201) + +timebase : 237331000 +platform : pSeries +machine : CHRP IBM,9133-55A"; + + #[test] + fn power5p() { + let cpuinfo = CpuInfo::from_str(POWER5P).unwrap(); + assert_eq!(cpuinfo.field("cpu"), "POWER5+ (gs)"); + + assert!(!cpuinfo.field("cpu").has("altivec")); + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs new file mode 100644 index 000000000..9c030f41a --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mips.rs @@ -0,0 +1,25 @@ +//! Run-time feature detection for MIPS on Linux. + +use super::auxvec; +use crate::detect::{bit, cache, Feature}; + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from `/proc/cpuinfo`. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/hwcap.h][hwcap] + // + // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/arm64/include/uapi/asm/hwcap.h + if let Ok(auxv) = auxvec::auxv() { + enable_feature(&mut value, Feature::msa, bit::test(auxv.hwcap, 1)); + return value; + } + // TODO: fall back via `cpuinfo`. + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs new file mode 100644 index 000000000..a49a72783 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/mod.rs @@ -0,0 +1,64 @@ +//! Run-time feature detection on Linux +//! +#[cfg(feature = "std_detect_file_io")] +use alloc::vec::Vec; + +mod auxvec; + +#[cfg(feature = "std_detect_file_io")] +mod cpuinfo; + +#[cfg(feature = "std_detect_file_io")] +fn read_file(path: &str) -> Result<Vec<u8>, ()> { + let mut path = Vec::from(path.as_bytes()); + path.push(0); + + unsafe { + let file = libc::open(path.as_ptr() as *const libc::c_char, libc::O_RDONLY); + if file == -1 { + return Err(()); + } + + let mut data = Vec::new(); + loop { + data.reserve(4096); + let spare = data.spare_capacity_mut(); + match libc::read(file, spare.as_mut_ptr() as *mut _, spare.len()) { + -1 => { + libc::close(file); + return Err(()); + } + 0 => break, + n => data.set_len(data.len() + n as usize), + } + } + + libc::close(file); + Ok(data) + } +} + +cfg_if::cfg_if! { + if #[cfg(target_arch = "aarch64")] { + mod aarch64; + pub(crate) use self::aarch64::detect_features; + } else if #[cfg(target_arch = "arm")] { + mod arm; + pub(crate) use self::arm::detect_features; + } else if #[cfg(any(target_arch = "riscv32", target_arch = "riscv64"))] { + mod riscv; + pub(crate) use self::riscv::detect_features; + } else if #[cfg(any(target_arch = "mips", target_arch = "mips64"))] { + mod mips; + pub(crate) use self::mips::detect_features; + } else if #[cfg(any(target_arch = "powerpc", target_arch = "powerpc64"))] { + mod powerpc; + pub(crate) use self::powerpc::detect_features; + } else { + use crate::detect::cache; + /// Performs run-time feature detection. + pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() + } + } +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs new file mode 100644 index 000000000..c3308e815 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/powerpc.rs @@ -0,0 +1,36 @@ +//! Run-time feature detection for PowerPC on Linux. + +use super::auxvec; +use crate::detect::{cache, Feature}; + +/// Try to read the features from the auxiliary vector, and if that fails, try +/// to read them from /proc/cpuinfo. +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + let enable_feature = |value: &mut cache::Initializer, f, enable| { + if enable { + value.set(f as u32); + } + }; + + // The values are part of the platform-specific [asm/cputable.h][cputable] + // + // [cputable]: https://github.com/torvalds/linux/blob/master/arch/powerpc/include/uapi/asm/cputable.h + if let Ok(auxv) = auxvec::auxv() { + // note: the PowerPC values are the mask to do the test (instead of the + // index of the bit to test like in ARM and Aarch64) + enable_feature(&mut value, Feature::altivec, auxv.hwcap & 0x10000000 != 0); + enable_feature(&mut value, Feature::vsx, auxv.hwcap & 0x00000080 != 0); + enable_feature(&mut value, Feature::power8, auxv.hwcap2 & 0x80000000 != 0); + return value; + } + + // PowerPC's /proc/cpuinfo lacks a proper Feature field, + // but `altivec` support is indicated in the `cpu` field. + #[cfg(feature = "std_detect_file_io")] + if let Ok(c) = super::cpuinfo::CpuInfo::new() { + enable_feature(&mut value, Feature::altivec, c.field("cpu").has("altivec")); + return value; + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs new file mode 100644 index 000000000..1ec06959a --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/linux/riscv.rs @@ -0,0 +1,73 @@ +//! Run-time feature detection for RISC-V on Linux.
+
+use super::auxvec;
+use crate::detect::{bit, cache, Feature};
+
+/// Read list of supported features from the auxiliary vector.
+pub(crate) fn detect_features() -> cache::Initializer {
+ let mut value = cache::Initializer::default();
+ let enable_feature = |value: &mut cache::Initializer, feature, enable| {
+ if enable {
+ value.set(feature as u32);
+ }
+ };
+ let enable_features = |value: &mut cache::Initializer, feature_slice: &[Feature], enable| {
+ if enable {
+ for feature in feature_slice {
+ value.set(*feature as u32);
+ }
+ }
+ };
+
+ // The values are part of the platform-specific [asm/hwcap.h][hwcap]
+ //
+ // [hwcap]: https://github.com/torvalds/linux/blob/master/arch/riscv/include/asm/hwcap.h
+ let auxv = auxvec::auxv().expect("read auxvec"); // should not fail on RISC-V platform
+ enable_feature(
+ &mut value,
+ Feature::a,
+ bit::test(auxv.hwcap, (b'a' - b'a').into()),
+ );
+ enable_feature(
+ &mut value,
+ Feature::c,
+ bit::test(auxv.hwcap, (b'c' - b'a').into()),
+ );
+ enable_features(
+ &mut value,
+ &[Feature::d, Feature::f, Feature::zicsr],
+ bit::test(auxv.hwcap, (b'd' - b'a').into()),
+ );
+ enable_features(
+ &mut value,
+ &[Feature::f, Feature::zicsr],
+ bit::test(auxv.hwcap, (b'f' - b'a').into()),
+ );
+ let has_i = bit::test(auxv.hwcap, (b'i' - b'a').into());
+ // If future RV128I is supported, implement with `enable_feature` here
+ #[cfg(target_pointer_width = "64")]
+ enable_feature(&mut value, Feature::rv64i, has_i);
+ #[cfg(target_pointer_width = "32")]
+ enable_feature(&mut value, Feature::rv32i, has_i);
+ #[cfg(target_pointer_width = "32")]
+ enable_feature(
+ &mut value,
+ Feature::rv32e,
+ bit::test(auxv.hwcap, (b'e' - b'a').into()),
+ );
+ enable_feature(
+ &mut value,
+ Feature::h,
+ bit::test(auxv.hwcap, (b'h' - b'a').into()),
+ );
+ enable_feature(
+ &mut value,
+ Feature::m,
+ bit::test(auxv.hwcap, (b'm' - b'a').into()),
+ );
+ // FIXME: Auxvec does not show supervisor feature support, but this mode may be useful
+ // to detect when Rust is used to write Linux kernel modules.
+ // These should be more than Auxvec way to detect supervisor features.
+
+ value
+}
diff --git a/library/stdarch/crates/std_detect/src/detect/os/other.rs b/library/stdarch/crates/std_detect/src/detect/os/other.rs new file mode 100644 index 000000000..091fafc4e --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/other.rs @@ -0,0 +1,8 @@ +//! Other operating systems + +use crate::detect::cache; + +#[allow(dead_code)] +pub(crate) fn detect_features() -> cache::Initializer { + cache::Initializer::default() +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs new file mode 100644 index 000000000..051ad6d1b --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/windows/aarch64.rs @@ -0,0 +1,59 @@ +//! Run-time feature detection for Aarch64 on Windows. + +use crate::detect::{cache, Feature}; + +/// Try to read the features using IsProcessorFeaturePresent. +pub(crate) fn detect_features() -> cache::Initializer { + type DWORD = u32; + type BOOL = i32; + + const FALSE: BOOL = 0; + // The following Microsoft documents isn't updated for aarch64. + // https://docs.microsoft.com/en-us/windows/win32/api/processthreadsapi/nf-processthreadsapi-isprocessorfeaturepresent + // These are defined in winnt.h of Windows SDK + const PF_ARM_NEON_INSTRUCTIONS_AVAILABLE: u32 = 19; + const PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE: u32 = 30; + const PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE: u32 = 31; + + extern "system" { + pub fn IsProcessorFeaturePresent(ProcessorFeature: DWORD) -> BOOL; + } + + let mut value = cache::Initializer::default(); + { + let mut enable_feature = |f, enable| { + if enable { + value.set(f as u32); + } + }; + + // Some features such Feature::fp may be supported on current CPU, + // but no way to detect it by OS API. + // Also, we require unsafe block for the extern "system" calls. + unsafe { + enable_feature( + Feature::asimd, + IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::crc, + IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + // PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE means aes, sha1, sha2 and + // pmull support + enable_feature( + Feature::aes, + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::pmull, + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + enable_feature( + Feature::sha2, + IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != FALSE, + ); + } + } + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/os/x86.rs b/library/stdarch/crates/std_detect/src/detect/os/x86.rs new file mode 100644 index 000000000..ea5f595ec --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/os/x86.rs @@ -0,0 +1,273 @@ +//! x86 run-time feature detection is OS independent. + +#[cfg(target_arch = "x86")] +use core::arch::x86::*; +#[cfg(target_arch = "x86_64")] +use core::arch::x86_64::*; + +use core::mem; + +use crate::detect::{bit, cache, Feature}; + +/// Run-time feature detection on x86 works by using the CPUID instruction. +/// +/// The [CPUID Wikipedia page][wiki_cpuid] contains +/// all the information about which flags to set to query which values, and in +/// which registers these are reported. +/// +/// The definitive references are: +/// - [Intel 64 and IA-32 Architectures Software Developer's Manual Volume 2: +/// Instruction Set Reference, A-Z][intel64_ref]. +/// - [AMD64 Architecture Programmer's Manual, Volume 3: General-Purpose and +/// System Instructions][amd64_ref]. +/// +/// [wiki_cpuid]: https://en.wikipedia.org/wiki/CPUID +/// [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf +/// [amd64_ref]: http://support.amd.com/TechDocs/24594.pdf +#[allow(clippy::similar_names)] +pub(crate) fn detect_features() -> cache::Initializer { + let mut value = cache::Initializer::default(); + + // If the x86 CPU does not support the CPUID instruction then it is too + // old to support any of the currently-detectable features. + if !has_cpuid() { + return value; + } + + // Calling `__cpuid`/`__cpuid_count` from here on is safe because the CPU + // has `cpuid` support. + + // 0. EAX = 0: Basic Information: + // - EAX returns the "Highest Function Parameter", that is, the maximum + // leaf value for subsequent calls of `cpuinfo` in range [0, + // 0x8000_0000]. - The vendor ID is stored in 12 u8 ascii chars, + // returned in EBX, EDX, and ECX (in that order): + let (max_basic_leaf, vendor_id) = unsafe { + let CpuidResult { + eax: max_basic_leaf, + ebx, + ecx, + edx, + } = __cpuid(0); + let vendor_id: [[u8; 4]; 3] = [ + mem::transmute(ebx), + mem::transmute(edx), + mem::transmute(ecx), + ]; + let vendor_id: [u8; 12] = mem::transmute(vendor_id); + (max_basic_leaf, vendor_id) + }; + + if max_basic_leaf < 1 { + // Earlier Intel 486, CPUID not implemented + return value; + } + + // EAX = 1, ECX = 0: Queries "Processor Info and Feature Bits"; + // Contains information about most x86 features. + let CpuidResult { + ecx: proc_info_ecx, + edx: proc_info_edx, + .. + } = unsafe { __cpuid(0x0000_0001_u32) }; + + // EAX = 7, ECX = 0: Queries "Extended Features"; + // Contains information about bmi,bmi2, and avx2 support. + let (extended_features_ebx, extended_features_ecx) = if max_basic_leaf >= 7 { + let CpuidResult { ebx, ecx, .. } = unsafe { __cpuid(0x0000_0007_u32) }; + (ebx, ecx) + } else { + (0, 0) // CPUID does not support "Extended Features" + }; + + // EAX = 0x8000_0000, ECX = 0: Get Highest Extended Function Supported + // - EAX returns the max leaf value for extended information, that is, + // `cpuid` calls in range [0x8000_0000; u32::MAX]: + let CpuidResult { + eax: extended_max_basic_leaf, + .. + } = unsafe { __cpuid(0x8000_0000_u32) }; + + // EAX = 0x8000_0001, ECX=0: Queries "Extended Processor Info and Feature + // Bits" + let extended_proc_info_ecx = if extended_max_basic_leaf >= 1 { + let CpuidResult { ecx, .. } = unsafe { __cpuid(0x8000_0001_u32) }; + ecx + } else { + 0 + }; + + { + // borrows value till the end of this scope: + let mut enable = |r, rb, f| { + if bit::test(r as usize, rb) { + value.set(f as u32); + } + }; + + enable(proc_info_ecx, 0, Feature::sse3); + enable(proc_info_ecx, 1, Feature::pclmulqdq); + enable(proc_info_ecx, 9, Feature::ssse3); + enable(proc_info_ecx, 13, Feature::cmpxchg16b); + enable(proc_info_ecx, 19, Feature::sse4_1); + enable(proc_info_ecx, 20, Feature::sse4_2); + enable(proc_info_ecx, 23, Feature::popcnt); + enable(proc_info_ecx, 25, Feature::aes); + enable(proc_info_ecx, 29, Feature::f16c); + enable(proc_info_ecx, 30, Feature::rdrand); + enable(extended_features_ebx, 18, Feature::rdseed); + enable(extended_features_ebx, 19, Feature::adx); + enable(extended_features_ebx, 11, Feature::rtm); + enable(proc_info_edx, 4, Feature::tsc); + enable(proc_info_edx, 23, Feature::mmx); + enable(proc_info_edx, 24, Feature::fxsr); + enable(proc_info_edx, 25, Feature::sse); + enable(proc_info_edx, 26, Feature::sse2); + enable(extended_features_ebx, 29, Feature::sha); + + enable(extended_features_ebx, 3, Feature::bmi1); + enable(extended_features_ebx, 8, Feature::bmi2); + + // `XSAVE` and `AVX` support: + let cpu_xsave = bit::test(proc_info_ecx as usize, 26); + if cpu_xsave { + // 0. Here the CPU supports `XSAVE`. + + // 1. Detect `OSXSAVE`, that is, whether the OS is AVX enabled and + // supports saving the state of the AVX/AVX2 vector registers on + // context-switches, see: + // + // - [intel: is avx enabled?][is_avx_enabled], + // - [mozilla: sse.cpp][mozilla_sse_cpp]. + // + // [is_avx_enabled]: https://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled + // [mozilla_sse_cpp]: https://hg.mozilla.org/mozilla-central/file/64bab5cbb9b6/mozglue/build/SSE.cpp#l190 + let cpu_osxsave = bit::test(proc_info_ecx as usize, 27); + + if cpu_osxsave { + // 2. The OS must have signaled the CPU that it supports saving and + // restoring the: + // + // * SSE -> `XCR0.SSE[1]` + // * AVX -> `XCR0.AVX[2]` + // * AVX-512 -> `XCR0.AVX-512[7:5]`. + // + // by setting the corresponding bits of `XCR0` to `1`. + // + // This is safe because the CPU supports `xsave` + // and the OS has set `osxsave`. + let xcr0 = unsafe { _xgetbv(0) }; + // Test `XCR0.SSE[1]` and `XCR0.AVX[2]` with the mask `0b110 == 6`: + let os_avx_support = xcr0 & 6 == 6; + // Test `XCR0.AVX-512[7:5]` with the mask `0b1110_0000 == 224`: + let os_avx512_support = xcr0 & 224 == 224; + + // Only if the OS and the CPU support saving/restoring the AVX + // registers we enable `xsave` support: + if os_avx_support { + // See "13.3 ENABLING THE XSAVE FEATURE SET AND XSAVE-ENABLED + // FEATURES" in the "Intel® 64 and IA-32 Architectures Software + // Developer’s Manual, Volume 1: Basic Architecture": + // + // "Software enables the XSAVE feature set by setting + // CR4.OSXSAVE[bit 18] to 1 (e.g., with the MOV to CR4 + // instruction). If this bit is 0, execution of any of XGETBV, + // XRSTOR, XRSTORS, XSAVE, XSAVEC, XSAVEOPT, XSAVES, and XSETBV + // causes an invalid-opcode exception (#UD)" + // + enable(proc_info_ecx, 26, Feature::xsave); + + // For `xsaveopt`, `xsavec`, and `xsaves` we need to query: + // Processor Extended State Enumeration Sub-leaf (EAX = 0DH, + // ECX = 1): + if max_basic_leaf >= 0xd { + let CpuidResult { + eax: proc_extended_state1_eax, + .. + } = unsafe { __cpuid_count(0xd_u32, 1) }; + enable(proc_extended_state1_eax, 0, Feature::xsaveopt); + enable(proc_extended_state1_eax, 1, Feature::xsavec); + enable(proc_extended_state1_eax, 3, Feature::xsaves); + } + + // FMA (uses 256-bit wide registers): + enable(proc_info_ecx, 12, Feature::fma); + + // And AVX/AVX2: + enable(proc_info_ecx, 28, Feature::avx); + enable(extended_features_ebx, 5, Feature::avx2); + + // For AVX-512 the OS also needs to support saving/restoring + // the extended state, only then we enable AVX-512 support: + if os_avx512_support { + enable(extended_features_ebx, 16, Feature::avx512f); + enable(extended_features_ebx, 17, Feature::avx512dq); + enable(extended_features_ebx, 21, Feature::avx512ifma); + enable(extended_features_ebx, 26, Feature::avx512pf); + enable(extended_features_ebx, 27, Feature::avx512er); + enable(extended_features_ebx, 28, Feature::avx512cd); + enable(extended_features_ebx, 30, Feature::avx512bw); + enable(extended_features_ebx, 31, Feature::avx512vl); + enable(extended_features_ecx, 1, Feature::avx512vbmi); + enable(extended_features_ecx, 5, Feature::avx512bf16); + enable(extended_features_ecx, 6, Feature::avx512vbmi2); + enable(extended_features_ecx, 8, Feature::avx512gfni); + enable(extended_features_ecx, 8, Feature::avx512vp2intersect); + enable(extended_features_ecx, 9, Feature::avx512vaes); + enable(extended_features_ecx, 10, Feature::avx512vpclmulqdq); + enable(extended_features_ecx, 11, Feature::avx512vnni); + enable(extended_features_ecx, 12, Feature::avx512bitalg); + enable(extended_features_ecx, 14, Feature::avx512vpopcntdq); + } + } + } + } + + // This detects ABM on AMD CPUs and LZCNT on Intel CPUs. + // On intel CPUs with popcnt, lzcnt implements the + // "missing part" of ABM, so we map both to the same + // internal feature. + // + // The `is_x86_feature_detected!("lzcnt")` macro then + // internally maps to Feature::abm. + enable(extended_proc_info_ecx, 5, Feature::lzcnt); + + // As Hygon Dhyana originates from AMD technology and shares most of the architecture with + // AMD's family 17h, but with different CPU Vendor ID("HygonGenuine")/Family series + // number(Family 18h). + // + // For CPUID feature bits, Hygon Dhyana(family 18h) share the same definition with AMD + // family 17h. + // + // Related AMD CPUID specification is https://www.amd.com/system/files/TechDocs/25481.pdf. + // Related Hygon kernel patch can be found on + // http://lkml.kernel.org/r/5ce86123a7b9dad925ac583d88d2f921040e859b.1538583282.git.puwen@hygon.cn + if vendor_id == *b"AuthenticAMD" || vendor_id == *b"HygonGenuine" { + // These features are available on AMD arch CPUs: + enable(extended_proc_info_ecx, 6, Feature::sse4a); + enable(extended_proc_info_ecx, 21, Feature::tbm); + } + } + + // Unfortunately, some Skylake chips erroneously report support for BMI1 and + // BMI2 without actual support. These chips don't support AVX, and it seems + // that all Intel chips with non-erroneous support BMI do (I didn't check + // other vendors), so we can disable these flags for chips that don't also + // report support for AVX. + // + // It's possible this will pessimize future chips that do support BMI and + // not AVX, but this seems minor compared to a hard crash you get when + // executing an unsupported instruction (to put it another way, it's safe + // for us to under-report CPU features, but not to over-report them). Still, + // to limit any impact this may have in the future, we only do this for + // Intel chips, as it's a bug only present in their chips. + // + // This bug is documented as `SKL052` in the errata section of this document: + // http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/desktop-6th-gen-core-family-spec-update.pdf + if vendor_id == *b"GenuineIntel" && !value.test(Feature::avx as u32) { + value.unset(Feature::bmi1 as u32); + value.unset(Feature::bmi2 as u32); + } + + value +} diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv Binary files differnew file mode 100644 index 000000000..0538e661f --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-rpi3.auxv diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv Binary files differnew file mode 100644 index 000000000..6afe1b3b4 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/linux-x64-i7-6850k.auxv diff --git a/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv Binary files differnew file mode 100644 index 000000000..75abc02d1 --- /dev/null +++ b/library/stdarch/crates/std_detect/src/detect/test_data/macos-virtualbox-linux-x86-4850HQ.auxv diff --git a/library/stdarch/crates/std_detect/src/lib.rs b/library/stdarch/crates/std_detect/src/lib.rs new file mode 100644 index 000000000..c0e0de0dd --- /dev/null +++ b/library/stdarch/crates/std_detect/src/lib.rs @@ -0,0 +1,34 @@ +//! Run-time feature detection for the Rust standard library. +//! +//! To detect whether a feature is enabled in the system running the binary +//! use one of the appropriate macro for the target: +//! +//! * `x86` and `x86_64`: [`is_x86_feature_detected`] +//! * `arm`: [`is_arm_feature_detected`] +//! * `aarch64`: [`is_aarch64_feature_detected`] +//! * `riscv`: [`is_riscv_feature_detected`] +//! * `mips`: [`is_mips_feature_detected`] +//! * `mips64`: [`is_mips64_feature_detected`] +//! * `powerpc`: [`is_powerpc_feature_detected`] +//! * `powerpc64`: [`is_powerpc64_feature_detected`] + +#![unstable(feature = "stdsimd", issue = "27731")] +#![feature(staged_api, stdsimd, doc_cfg, allow_internal_unstable)] +#![deny(rust_2018_idioms)] +#![allow(clippy::shadow_reuse)] +#![deny(clippy::missing_inline_in_public_items)] +#![cfg_attr(test, allow(unused_imports))] +#![no_std] + +#[cfg(test)] +#[macro_use] +extern crate std; + +// rust-lang/rust#83888: removing `extern crate` gives an error that `vec_spare> +#[cfg_attr(feature = "std_detect_file_io", allow(unused_extern_crates))] +#[cfg(feature = "std_detect_file_io")] +extern crate alloc; + +#[doc(hidden)] +#[unstable(feature = "stdsimd", issue = "27731")] +pub mod detect; diff --git a/library/stdarch/crates/std_detect/tests/cpu-detection.rs b/library/stdarch/crates/std_detect/tests/cpu-detection.rs new file mode 100644 index 000000000..ca8bf28f4 --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/cpu-detection.rs @@ -0,0 +1,164 @@ +#![feature(stdsimd)] +#![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)] +#![cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] + +#[macro_use] +extern crate std_detect; + +#[test] +fn all() { + for (f, e) in std_detect::detect::features() { + println!("{}: {}", f, e); + } +} + +#[test] +#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))] +fn arm_linux() { + println!("neon: {}", is_arm_feature_detected!("neon")); + println!("pmull: {}", is_arm_feature_detected!("pmull")); + println!("crc: {}", is_arm_feature_detected!("crc")); + println!("crypto: {}", is_arm_feature_detected!("crypto")); + println!("aes: {}", is_arm_feature_detected!("aes")); + println!("sha2: {}", is_arm_feature_detected!("sha2")); +} + +#[test] +#[cfg(all( + target_arch = "aarch64", + any(target_os = "linux", target_os = "android") +))] +fn aarch64_linux() { + println!("asimd: {}", is_aarch64_feature_detected!("asimd")); + println!("neon: {}", is_aarch64_feature_detected!("neon")); + println!("pmull: {}", is_aarch64_feature_detected!("pmull")); + println!("fp: {}", is_aarch64_feature_detected!("fp")); + println!("fp16: {}", is_aarch64_feature_detected!("fp16")); + println!("sve: {}", is_aarch64_feature_detected!("sve")); + println!("crc: {}", is_aarch64_feature_detected!("crc")); + println!("lse: {}", is_aarch64_feature_detected!("lse")); + println!("lse2: {}", is_aarch64_feature_detected!("lse2")); + println!("rdm: {}", is_aarch64_feature_detected!("rdm")); + println!("rcpc: {}", is_aarch64_feature_detected!("rcpc")); + println!("rcpc2: {}", is_aarch64_feature_detected!("rcpc2")); + println!("dotprod: {}", is_aarch64_feature_detected!("dotprod")); + println!("tme: {}", is_aarch64_feature_detected!("tme")); + println!("fhm: {}", is_aarch64_feature_detected!("fhm")); + println!("dit: {}", is_aarch64_feature_detected!("dit")); + println!("flagm: {}", is_aarch64_feature_detected!("flagm")); + println!("ssbs: {}", is_aarch64_feature_detected!("ssbs")); + println!("sb: {}", is_aarch64_feature_detected!("sb")); + println!("paca: {}", is_aarch64_feature_detected!("paca")); + println!("pacg: {}", is_aarch64_feature_detected!("pacg")); + println!("dpb: {}", is_aarch64_feature_detected!("dpb")); + println!("dpb2: {}", is_aarch64_feature_detected!("dpb2")); + println!("sve2: {}", is_aarch64_feature_detected!("sve2")); + println!("sve2-aes: {}", is_aarch64_feature_detected!("sve2-aes")); + println!("sve2-sm4: {}", is_aarch64_feature_detected!("sve2-sm4")); + println!("sve2-sha3: {}", is_aarch64_feature_detected!("sve2-sha3")); + println!( + "sve2-bitperm: {}", + is_aarch64_feature_detected!("sve2-bitperm") + ); + println!("frintts: {}", is_aarch64_feature_detected!("frintts")); + println!("i8mm: {}", is_aarch64_feature_detected!("i8mm")); + println!("f32mm: {}", is_aarch64_feature_detected!("f32mm")); + println!("f64mm: {}", is_aarch64_feature_detected!("f64mm")); + println!("bf16: {}", is_aarch64_feature_detected!("bf16")); + println!("rand: {}", is_aarch64_feature_detected!("rand")); + println!("bti: {}", is_aarch64_feature_detected!("bti")); + println!("mte: {}", is_aarch64_feature_detected!("mte")); + println!("jsconv: {}", is_aarch64_feature_detected!("jsconv")); + println!("fcma: {}", is_aarch64_feature_detected!("fcma")); + println!("aes: {}", is_aarch64_feature_detected!("aes")); + println!("sha2: {}", is_aarch64_feature_detected!("sha2")); + println!("sha3: {}", is_aarch64_feature_detected!("sha3")); + println!("sm4: {}", is_aarch64_feature_detected!("sm4")); +} + +#[test] +#[cfg(all(target_arch = "powerpc", target_os = "linux"))] +fn powerpc_linux() { + println!("altivec: {}", is_powerpc_feature_detected!("altivec")); + println!("vsx: {}", is_powerpc_feature_detected!("vsx")); + println!("power8: {}", is_powerpc_feature_detected!("power8")); +} + +#[test] +#[cfg(all(target_arch = "powerpc64", target_os = "linux"))] +fn powerpc64_linux() { + println!("altivec: {}", is_powerpc64_feature_detected!("altivec")); + println!("vsx: {}", is_powerpc64_feature_detected!("vsx")); + println!("power8: {}", is_powerpc64_feature_detected!("power8")); +} + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn x86_all() { + println!("aes: {:?}", is_x86_feature_detected!("aes")); + println!("pcmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq")); + println!("rdrand: {:?}", is_x86_feature_detected!("rdrand")); + println!("rdseed: {:?}", is_x86_feature_detected!("rdseed")); + println!("tsc: {:?}", is_x86_feature_detected!("tsc")); + println!("mmx: {:?}", is_x86_feature_detected!("mmx")); + println!("sse: {:?}", is_x86_feature_detected!("sse")); + println!("sse2: {:?}", is_x86_feature_detected!("sse2")); + println!("sse3: {:?}", is_x86_feature_detected!("sse3")); + println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); + println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); + println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); + println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); + println!("avx: {:?}", is_x86_feature_detected!("avx")); + println!("avx2: {:?}", is_x86_feature_detected!("avx2")); + println!("avx512f: {:?}", is_x86_feature_detected!("avx512f")); + println!("avx512cd: {:?}", is_x86_feature_detected!("avx512cd")); + println!("avx512er: {:?}", is_x86_feature_detected!("avx512er")); + println!("avx512pf: {:?}", is_x86_feature_detected!("avx512pf")); + println!("avx512bw: {:?}", is_x86_feature_detected!("avx512bw")); + println!("avx512dq: {:?}", is_x86_feature_detected!("avx512dq")); + println!("avx512vl: {:?}", is_x86_feature_detected!("avx512vl")); + println!("avx512ifma: {:?}", is_x86_feature_detected!("avx512ifma")); + println!("avx512vbmi: {:?}", is_x86_feature_detected!("avx512vbmi")); + println!( + "avx512vpopcntdq: {:?}", + is_x86_feature_detected!("avx512vpopcntdq") + ); + println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2")); + println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni")); + println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes")); + println!( + "avx512vpclmulqdq {:?}", + is_x86_feature_detected!("avx512vpclmulqdq") + ); + println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni")); + println!( + "avx512bitalg {:?}", + is_x86_feature_detected!("avx512bitalg") + ); + println!("avx512bf16 {:?}", is_x86_feature_detected!("avx512bf16")); + println!( + "avx512vp2intersect {:?}", + is_x86_feature_detected!("avx512vp2intersect") + ); + println!("f16c: {:?}", is_x86_feature_detected!("f16c")); + println!("fma: {:?}", is_x86_feature_detected!("fma")); + println!("bmi1: {:?}", is_x86_feature_detected!("bmi1")); + println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); + println!("abm: {:?}", is_x86_feature_detected!("abm")); + println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); + println!("tbm: {:?}", is_x86_feature_detected!("tbm")); + println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); + println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); + println!("xsave: {:?}", is_x86_feature_detected!("xsave")); + println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); + println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); + println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); +} diff --git a/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs new file mode 100644 index 000000000..cd597af73 --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/macro_trailing_commas.rs @@ -0,0 +1,51 @@ +#![feature(stdsimd)] +#![allow(clippy::unwrap_used, clippy::use_debug, clippy::print_stdout)] + +#[cfg(any( + target_arch = "arm", + target_arch = "aarch64", + target_arch = "x86", + target_arch = "x86_64", + target_arch = "powerpc", + target_arch = "powerpc64" +))] +#[macro_use] +extern crate std_detect; + +#[test] +#[cfg(all(target_arch = "arm", any(target_os = "linux", target_os = "android")))] +fn arm_linux() { + let _ = is_arm_feature_detected!("neon"); + let _ = is_arm_feature_detected!("neon",); +} + +#[test] +#[cfg(all( + target_arch = "aarch64", + any(target_os = "linux", target_os = "android") +))] +fn aarch64_linux() { + let _ = is_aarch64_feature_detected!("fp"); + let _ = is_aarch64_feature_detected!("fp",); +} + +#[test] +#[cfg(all(target_arch = "powerpc", target_os = "linux"))] +fn powerpc_linux() { + let _ = is_powerpc_feature_detected!("altivec"); + let _ = is_powerpc_feature_detected!("altivec",); +} + +#[test] +#[cfg(all(target_arch = "powerpc64", target_os = "linux"))] +fn powerpc64_linux() { + let _ = is_powerpc64_feature_detected!("altivec"); + let _ = is_powerpc64_feature_detected!("altivec",); +} + +#[test] +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +fn x86_all() { + let _ = is_x86_feature_detected!("sse"); + let _ = is_x86_feature_detected!("sse",); +} diff --git a/library/stdarch/crates/std_detect/tests/x86-specific.rs b/library/stdarch/crates/std_detect/tests/x86-specific.rs new file mode 100644 index 000000000..59e9a62fd --- /dev/null +++ b/library/stdarch/crates/std_detect/tests/x86-specific.rs @@ -0,0 +1,158 @@ +#![feature(stdsimd)] +#![cfg(any(target_arch = "x86", target_arch = "x86_64"))] + +extern crate cupid; +#[macro_use] +extern crate std_detect; + +#[test] +fn dump() { + println!("aes: {:?}", is_x86_feature_detected!("aes")); + println!("pclmulqdq: {:?}", is_x86_feature_detected!("pclmulqdq")); + println!("rdrand: {:?}", is_x86_feature_detected!("rdrand")); + println!("rdseed: {:?}", is_x86_feature_detected!("rdseed")); + println!("tsc: {:?}", is_x86_feature_detected!("tsc")); + println!("sse: {:?}", is_x86_feature_detected!("sse")); + println!("sse2: {:?}", is_x86_feature_detected!("sse2")); + println!("sse3: {:?}", is_x86_feature_detected!("sse3")); + println!("ssse3: {:?}", is_x86_feature_detected!("ssse3")); + println!("sse4.1: {:?}", is_x86_feature_detected!("sse4.1")); + println!("sse4.2: {:?}", is_x86_feature_detected!("sse4.2")); + println!("sse4a: {:?}", is_x86_feature_detected!("sse4a")); + println!("sha: {:?}", is_x86_feature_detected!("sha")); + println!("avx: {:?}", is_x86_feature_detected!("avx")); + println!("avx2: {:?}", is_x86_feature_detected!("avx2")); + println!("avx512f {:?}", is_x86_feature_detected!("avx512f")); + println!("avx512cd {:?}", is_x86_feature_detected!("avx512cd")); + println!("avx512er {:?}", is_x86_feature_detected!("avx512er")); + println!("avx512pf {:?}", is_x86_feature_detected!("avx512pf")); + println!("avx512bw {:?}", is_x86_feature_detected!("avx512bw")); + println!("avx512dq {:?}", is_x86_feature_detected!("avx512dq")); + println!("avx512vl {:?}", is_x86_feature_detected!("avx512vl")); + println!("avx512_ifma {:?}", is_x86_feature_detected!("avx512ifma")); + println!("avx512vbmi {:?}", is_x86_feature_detected!("avx512vbmi")); + println!( + "avx512_vpopcntdq {:?}", + is_x86_feature_detected!("avx512vpopcntdq") + ); + println!("avx512vbmi2 {:?}", is_x86_feature_detected!("avx512vbmi2")); + println!("avx512gfni {:?}", is_x86_feature_detected!("avx512gfni")); + println!("avx512vaes {:?}", is_x86_feature_detected!("avx512vaes")); + println!( + "avx512vpclmulqdq {:?}", + is_x86_feature_detected!("avx512vpclmulqdq") + ); + println!("avx512vnni {:?}", is_x86_feature_detected!("avx512vnni")); + println!( + "avx512bitalg {:?}", + is_x86_feature_detected!("avx512bitalg") + ); + println!("avx512bf16 {:?}", is_x86_feature_detected!("avx512bf16")); + println!( + "avx512vp2intersect {:?}", + is_x86_feature_detected!("avx512vp2intersect") + ); + println!("fma: {:?}", is_x86_feature_detected!("fma")); + println!("abm: {:?}", is_x86_feature_detected!("abm")); + println!("bmi: {:?}", is_x86_feature_detected!("bmi1")); + println!("bmi2: {:?}", is_x86_feature_detected!("bmi2")); + println!("tbm: {:?}", is_x86_feature_detected!("tbm")); + println!("popcnt: {:?}", is_x86_feature_detected!("popcnt")); + println!("lzcnt: {:?}", is_x86_feature_detected!("lzcnt")); + println!("fxsr: {:?}", is_x86_feature_detected!("fxsr")); + println!("xsave: {:?}", is_x86_feature_detected!("xsave")); + println!("xsaveopt: {:?}", is_x86_feature_detected!("xsaveopt")); + println!("xsaves: {:?}", is_x86_feature_detected!("xsaves")); + println!("xsavec: {:?}", is_x86_feature_detected!("xsavec")); + println!("cmpxchg16b: {:?}", is_x86_feature_detected!("cmpxchg16b")); + println!("adx: {:?}", is_x86_feature_detected!("adx")); + println!("rtm: {:?}", is_x86_feature_detected!("rtm")); +} + +#[cfg(feature = "std_detect_env_override")] +#[test] +fn env_override_no_avx() { + if let Ok(disable) = std::env::var("RUST_STD_DETECT_UNSTABLE") { + let information = cupid::master().unwrap(); + for d in disable.split(" ") { + match d { + "avx" => { + if information.avx() { + assert_ne!(is_x86_feature_detected!("avx"), information.avx()) + } + } + "avx2" => { + if information.avx2() { + assert_ne!(is_x86_feature_detected!("avx2"), information.avx2()) + } + } + _ => {} + } + } + } +} + +#[test] +fn compare_with_cupid() { + let information = cupid::master().unwrap(); + assert_eq!(is_x86_feature_detected!("aes"), information.aesni()); + assert_eq!( + is_x86_feature_detected!("pclmulqdq"), + information.pclmulqdq() + ); + assert_eq!(is_x86_feature_detected!("rdrand"), information.rdrand()); + assert_eq!(is_x86_feature_detected!("rdseed"), information.rdseed()); + assert_eq!(is_x86_feature_detected!("tsc"), information.tsc()); + assert_eq!(is_x86_feature_detected!("sse"), information.sse()); + assert_eq!(is_x86_feature_detected!("sse2"), information.sse2()); + assert_eq!(is_x86_feature_detected!("sse3"), information.sse3()); + assert_eq!(is_x86_feature_detected!("ssse3"), information.ssse3()); + assert_eq!(is_x86_feature_detected!("sse4.1"), information.sse4_1()); + assert_eq!(is_x86_feature_detected!("sse4.2"), information.sse4_2()); + assert_eq!(is_x86_feature_detected!("sse4a"), information.sse4a()); + assert_eq!(is_x86_feature_detected!("sha"), information.sha()); + assert_eq!(is_x86_feature_detected!("avx"), information.avx()); + assert_eq!(is_x86_feature_detected!("avx2"), information.avx2()); + assert_eq!(is_x86_feature_detected!("avx512f"), information.avx512f()); + assert_eq!(is_x86_feature_detected!("avx512cd"), information.avx512cd()); + assert_eq!(is_x86_feature_detected!("avx512er"), information.avx512er()); + assert_eq!(is_x86_feature_detected!("avx512pf"), information.avx512pf()); + assert_eq!(is_x86_feature_detected!("avx512bw"), information.avx512bw()); + assert_eq!(is_x86_feature_detected!("avx512dq"), information.avx512dq()); + assert_eq!(is_x86_feature_detected!("avx512vl"), information.avx512vl()); + assert_eq!( + is_x86_feature_detected!("avx512ifma"), + information.avx512_ifma() + ); + assert_eq!( + is_x86_feature_detected!("avx512vbmi"), + information.avx512_vbmi() + ); + assert_eq!( + is_x86_feature_detected!("avx512vpopcntdq"), + information.avx512_vpopcntdq() + ); + assert_eq!(is_x86_feature_detected!("fma"), information.fma()); + assert_eq!(is_x86_feature_detected!("bmi1"), information.bmi1()); + assert_eq!(is_x86_feature_detected!("bmi2"), information.bmi2()); + assert_eq!(is_x86_feature_detected!("popcnt"), information.popcnt()); + assert_eq!(is_x86_feature_detected!("abm"), information.lzcnt()); + assert_eq!(is_x86_feature_detected!("tbm"), information.tbm()); + assert_eq!(is_x86_feature_detected!("lzcnt"), information.lzcnt()); + assert_eq!(is_x86_feature_detected!("xsave"), information.xsave()); + assert_eq!(is_x86_feature_detected!("xsaveopt"), information.xsaveopt()); + assert_eq!( + is_x86_feature_detected!("xsavec"), + information.xsavec_and_xrstor() + ); + assert_eq!( + is_x86_feature_detected!("xsaves"), + information.xsaves_xrstors_and_ia32_xss() + ); + assert_eq!( + is_x86_feature_detected!("cmpxchg16b"), + information.cmpxchg16b(), + ); + assert_eq!(is_x86_feature_detected!("adx"), information.adx(),); + assert_eq!(is_x86_feature_detected!("rtm"), information.rtm(),); +} |