summaryrefslogtreecommitdiffstats
path: root/library/stdarch/crates/core_arch/src/x86/sse4a.rs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-17 12:02:58 +0000
commit698f8c2f01ea549d77d7dc3338a12e04c11057b9 (patch)
tree173a775858bd501c378080a10dca74132f05bc50 /library/stdarch/crates/core_arch/src/x86/sse4a.rs
parentInitial commit. (diff)
downloadrustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.tar.xz
rustc-698f8c2f01ea549d77d7dc3338a12e04c11057b9.zip
Adding upstream version 1.64.0+dfsg1.upstream/1.64.0+dfsg1
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'library/stdarch/crates/core_arch/src/x86/sse4a.rs')
-rw-r--r--library/stdarch/crates/core_arch/src/x86/sse4a.rs164
1 files changed, 164 insertions, 0 deletions
diff --git a/library/stdarch/crates/core_arch/src/x86/sse4a.rs b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
new file mode 100644
index 000000000..976c907cb
--- /dev/null
+++ b/library/stdarch/crates/core_arch/src/x86/sse4a.rs
@@ -0,0 +1,164 @@
+//! `i686`'s Streaming SIMD Extensions 4a (`SSE4a`)
+
+use crate::{
+ core_arch::{simd::*, x86::*},
+ mem::transmute,
+};
+
+#[cfg(test)]
+use stdarch_test::assert_instr;
+
+#[allow(improper_ctypes)]
+extern "C" {
+ #[link_name = "llvm.x86.sse4a.extrq"]
+ fn extrq(x: i64x2, y: i8x16) -> i64x2;
+ #[link_name = "llvm.x86.sse4a.insertq"]
+ fn insertq(x: i64x2, y: i64x2) -> i64x2;
+ #[link_name = "llvm.x86.sse4a.movnt.sd"]
+ fn movntsd(x: *mut f64, y: __m128d);
+ #[link_name = "llvm.x86.sse4a.movnt.ss"]
+ fn movntss(x: *mut f32, y: __m128);
+}
+
+// FIXME(blocked on #248): _mm_extracti_si64(x, len, idx) // EXTRQ
+// FIXME(blocked on #248): _mm_inserti_si64(x, y, len, idx) // INSERTQ
+
+/// Extracts the bit range specified by `y` from the lower 64 bits of `x`.
+///
+/// The `[13:8]` bits of `y` specify the index of the bit-range to extract. The
+/// `[5:0]` bits of `y` specify the length of the bit-range to extract. All
+/// other bits are ignored.
+///
+/// If the length is zero, it is interpreted as `64`. If the length and index
+/// are zero, the lower 64 bits of `x` are extracted.
+///
+/// If `length == 0 && index > 0` or `length + index > 64` the result is
+/// undefined.
+#[inline]
+#[target_feature(enable = "sse4a")]
+#[cfg_attr(test, assert_instr(extrq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_extract_si64(x: __m128i, y: __m128i) -> __m128i {
+ transmute(extrq(x.as_i64x2(), y.as_i8x16()))
+}
+
+/// Inserts the `[length:0]` bits of `y` into `x` at `index`.
+///
+/// The bits of `y`:
+///
+/// - `[69:64]` specify the `length`,
+/// - `[77:72]` specify the index.
+///
+/// If the `length` is zero it is interpreted as `64`. If `index + length > 64`
+/// or `index > 0 && length == 0` the result is undefined.
+#[inline]
+#[target_feature(enable = "sse4a")]
+#[cfg_attr(test, assert_instr(insertq))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_insert_si64(x: __m128i, y: __m128i) -> __m128i {
+ transmute(insertq(x.as_i64x2(), y.as_i64x2()))
+}
+
+/// Non-temporal store of `a.0` into `p`.
+///
+/// Writes 64-bit data to a memory location without polluting the caches.
+#[inline]
+#[target_feature(enable = "sse4a")]
+#[cfg_attr(test, assert_instr(movntsd))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_stream_sd(p: *mut f64, a: __m128d) {
+ movntsd(p, a);
+}
+
+/// Non-temporal store of `a.0` into `p`.
+///
+/// Writes 32-bit data to a memory location without polluting the caches.
+#[inline]
+#[target_feature(enable = "sse4a")]
+#[cfg_attr(test, assert_instr(movntss))]
+#[stable(feature = "simd_x86", since = "1.27.0")]
+pub unsafe fn _mm_stream_ss(p: *mut f32, a: __m128) {
+ movntss(p, a);
+}
+
+#[cfg(test)]
+mod tests {
+ use crate::core_arch::x86::*;
+ use stdarch_test::simd_test;
+
+ #[simd_test(enable = "sse4a")]
+ unsafe fn test_mm_extract_si64() {
+ let b = 0b0110_0000_0000_i64;
+ // ^^^^ bit range extracted
+ let x = _mm_setr_epi64x(b, 0);
+ let v = 0b001000___00___000100_i64;
+ // ^idx: 2^3 = 8 ^length = 2^2 = 4
+ let y = _mm_setr_epi64x(v, 0);
+ let e = _mm_setr_epi64x(0b0110_i64, 0);
+ let r = _mm_extract_si64(x, y);
+ assert_eq_m128i(r, e);
+ }
+
+ #[simd_test(enable = "sse4a")]
+ unsafe fn test_mm_insert_si64() {
+ let i = 0b0110_i64;
+ // ^^^^ bit range inserted
+ let z = 0b1010_1010_1010i64;
+ // ^^^^ bit range replaced
+ let e = 0b0110_1010_1010i64;
+ // ^^^^ replaced 1010 with 0110
+ let x = _mm_setr_epi64x(z, 0);
+ let expected = _mm_setr_epi64x(e, 0);
+ let v = 0b001000___00___000100_i64;
+ // ^idx: 2^3 = 8 ^length = 2^2 = 4
+ let y = _mm_setr_epi64x(i, v);
+ let r = _mm_insert_si64(x, y);
+ assert_eq_m128i(r, expected);
+ }
+
+ #[repr(align(16))]
+ struct MemoryF64 {
+ data: [f64; 2],
+ }
+
+ #[simd_test(enable = "sse4a")]
+ unsafe fn test_mm_stream_sd() {
+ let mut mem = MemoryF64 {
+ data: [1.0_f64, 2.0],
+ };
+ {
+ let vals = &mut mem.data;
+ let d = vals.as_mut_ptr();
+
+ let x = _mm_setr_pd(3.0, 4.0);
+
+ _mm_stream_sd(d, x);
+ }
+ assert_eq!(mem.data[0], 3.0);
+ assert_eq!(mem.data[1], 2.0);
+ }
+
+ #[repr(align(16))]
+ struct MemoryF32 {
+ data: [f32; 4],
+ }
+
+ #[simd_test(enable = "sse4a")]
+ unsafe fn test_mm_stream_ss() {
+ let mut mem = MemoryF32 {
+ data: [1.0_f32, 2.0, 3.0, 4.0],
+ };
+ {
+ let vals = &mut mem.data;
+ let d = vals.as_mut_ptr();
+
+ let x = _mm_setr_ps(5.0, 6.0, 7.0, 8.0);
+
+ _mm_stream_ss(d, x);
+ }
+ assert_eq!(mem.data[0], 5.0);
+ assert_eq!(mem.data[1], 2.0);
+ assert_eq!(mem.data[2], 3.0);
+ assert_eq!(mem.data[3], 4.0);
+ }
+}