summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
diff options
context:
space:
mode:
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86_64/bmi2.rs')
-rw-r--r--library/stdarch/crates/core_arch/src/x86_64/bmi2.rs139
1 files changed, 139 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
new file mode 100644
index 000000000..356d95a3d
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86_64/bmi2.rs
@@ -0,0 +1,139 @@
+//! Bit Manipulation Instruction (BMI) Set 2.0.
+//!
+//! The reference is [Intel 64 and IA-32 Architectures Software Developer's
+//! Manual Volume 2: Instruction Set Reference, A-Z][intel64_ref].
+//!
+//! [Wikipedia][wikipedia_bmi] provides a quick overview of the instructions
+//! available.
+//!
+//! [intel64_ref]: http://www.intel.de/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-instruction-set-reference-manual-325383.pdf
+//! [wikipedia_bmi]:
+//! https://en.wikipedia.org/wiki/Bit_Manipulation_Instruction_Sets#ABM_.28Advanced_Bit_Manipulation.29
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+/// Unsigned multiply without affecting flags.
+///
+/// Unsigned multiplication of `a` with `b` returning a pair `(lo, hi)` with
+/// the low half and the high half of the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mulx_u64)
+#[inline]
+#[cfg_attr(test, assert_instr(mul))]
+#[target_feature(enable = "bmi2")]
+#[cfg(not(target_arch = "x86"))] // calls an intrinsic
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mulx_u64(a: u64, b: u64, hi: &mut u64) -> u64 {
+ let result: u128 = (a as u128) * (b as u128);
+ *hi = (result >> 64) as u64;
+ result as u64
+}
+
+/// Zeroes higher bits of `a` >= `index`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_bzhi_u64)
+#[inline]
+#[target_feature(enable = "bmi2")]
+#[cfg_attr(test, assert_instr(bzhi))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _bzhi_u64(a: u64, index: u32) -> u64 {
+ x86_bmi2_bzhi_64(a, index as u64)
+}
+
+/// Scatter contiguous low order bits of `a` to the result at the positions
+/// specified by the `mask`.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pdep_u64)
+#[inline]
+#[target_feature(enable = "bmi2")]
+#[cfg_attr(test, assert_instr(pdep))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _pdep_u64(a: u64, mask: u64) -> u64 {
+ x86_bmi2_pdep_64(a, mask)
+}
+
+/// Gathers the bits of `x` specified by the `mask` into the contiguous low
+/// order bit positions of the result.
+///
+/// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_pext_u64)
+#[inline]
+#[target_feature(enable = "bmi2")]
+#[cfg_attr(test, assert_instr(pext))]
+#[cfg(not(target_arch = "x86"))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _pext_u64(a: u64, mask: u64) -> u64 {
+ x86_bmi2_pext_64(a, mask)
+}
+
+extern "C" {
+ #[link_name = "llvm.x86.bmi.bzhi.64"]
+ fn x86_bmi2_bzhi_64(x: u64, y: u64) -> u64;
+ #[link_name = "llvm.x86.bmi.pdep.64"]
+ fn x86_bmi2_pdep_64(x: u64, y: u64) -> u64;
+ #[link_name = "llvm.x86.bmi.pext.64"]
+ fn x86_bmi2_pext_64(x: u64, y: u64) -> u64;
+}
+
+#[cfg(test)]
+mod tests {
+ use stdarch_test::simd_test;
+
+ use crate::core_arch::x86_64::*;
+
+ #[simd_test(enable = "bmi2")]
+ unsafe fn test_pext_u64() {
+ let n = 0b1011_1110_1001_0011u64;
+
+ let m0 = 0b0110_0011_1000_0101u64;
+ let s0 = 0b0000_0000_0011_0101u64;
+
+ let m1 = 0b1110_1011_1110_1111u64;
+ let s1 = 0b0001_0111_0100_0011u64;
+
+ assert_eq!(_pext_u64(n, m0), s0);
+ assert_eq!(_pext_u64(n, m1), s1);
+ }
+
+ #[simd_test(enable = "bmi2")]
+ unsafe fn test_pdep_u64() {
+ let n = 0b1011_1110_1001_0011u64;
+
+ let m0 = 0b0110_0011_1000_0101u64;
+ let s0 = 0b0000_0010_0000_0101u64;
+
+ let m1 = 0b1110_1011_1110_1111u64;
+ let s1 = 0b1110_1001_0010_0011u64;
+
+ assert_eq!(_pdep_u64(n, m0), s0);
+ assert_eq!(_pdep_u64(n, m1), s1);
+ }
+
+ #[simd_test(enable = "bmi2")]
+ unsafe fn test_bzhi_u64() {
+ let n = 0b1111_0010u64;
+ let s = 0b0001_0010u64;
+ assert_eq!(_bzhi_u64(n, 5), s);
+ }
+
+ #[simd_test(enable = "bmi2")]
+ #[rustfmt::skip]
+ unsafe fn test_mulx_u64() {
+ let a: u64 = 9_223_372_036_854_775_800;
+ let b: u64 = 100;
+ let mut hi = 0;
+ let lo = _mulx_u64(a, b, &mut hi);
+ /*
+result = 922337203685477580000 =
+0b00110001_1111111111111111_1111111111111111_1111111111111111_1111110011100000
+ ^~hi~~~~ ^~lo~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+ assert_eq!(
+ lo,
+ 0b11111111_11111111_11111111_11111111_11111111_11111111_11111100_11100000u64
+ );
+ assert_eq!(hi, 0b00110001u64);
+ }
+}