summaryrefslogtreecommitdiffstats
path: root/vendor/sha2/src/sha256/x86.rs
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/sha2/src/sha256/x86.rs')
-rw-r--r--vendor/sha2/src/sha256/x86.rs112
1 files changed, 112 insertions, 0 deletions
diff --git a/vendor/sha2/src/sha256/x86.rs b/vendor/sha2/src/sha256/x86.rs
new file mode 100644
index 000000000..46019388d
--- /dev/null
+++ b/vendor/sha2/src/sha256/x86.rs
@@ -0,0 +1,112 @@
+//! SHA-256 `x86`/`x86_64` backend
+
+#![allow(clippy::many_single_char_names)]
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
+ let t1 = _mm_sha256msg1_epu32(v0, v1);
+ let t2 = _mm_alignr_epi8(v3, v2, 4);
+ let t3 = _mm_add_epi32(t1, t2);
+ _mm_sha256msg2_epu32(t3, v3)
+}
+
+macro_rules! rounds4 {
+ ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
+ let k = crate::consts::K32X4[$i];
+ let kv = _mm_set_epi32(k[0] as i32, k[1] as i32, k[2] as i32, k[3] as i32);
+ let t1 = _mm_add_epi32($rest, kv);
+ $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
+ let t2 = _mm_shuffle_epi32(t1, 0x0E);
+ $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
+ }};
+}
+
+macro_rules! schedule_rounds4 {
+ (
+ $abef:ident, $cdgh:ident,
+ $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+ $i: expr
+ ) => {{
+ $w4 = schedule($w0, $w1, $w2, $w3);
+ rounds4!($abef, $cdgh, $w4, $i);
+ }};
+}
+
+// we use unaligned loads with `__m128i` pointers
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
+unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ #[allow(non_snake_case)]
+ let MASK: __m128i = _mm_set_epi64x(
+ 0x0C0D_0E0F_0809_0A0Bu64 as i64,
+ 0x0405_0607_0001_0203u64 as i64,
+ );
+
+ let state_ptr = state.as_ptr() as *const __m128i;
+ let dcba = _mm_loadu_si128(state_ptr.add(0));
+ let efgh = _mm_loadu_si128(state_ptr.add(1));
+
+ let cdab = _mm_shuffle_epi32(dcba, 0xB1);
+ let efgh = _mm_shuffle_epi32(efgh, 0x1B);
+ let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
+ let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
+
+ for block in blocks {
+ let abef_save = abef;
+ let cdgh_save = cdgh;
+
+ let data_ptr = block.as_ptr() as *const __m128i;
+ let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(0)), MASK);
+ let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(1)), MASK);
+ let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(2)), MASK);
+ let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(3)), MASK);
+ let mut w4;
+
+ rounds4!(abef, cdgh, w0, 0);
+ rounds4!(abef, cdgh, w1, 1);
+ rounds4!(abef, cdgh, w2, 2);
+ rounds4!(abef, cdgh, w3, 3);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
+ schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
+ schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
+ schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
+ schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
+ schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
+ schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
+
+ abef = _mm_add_epi32(abef, abef_save);
+ cdgh = _mm_add_epi32(cdgh, cdgh_save);
+ }
+
+ let feba = _mm_shuffle_epi32(abef, 0x1B);
+ let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
+ let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
+ let hgef = _mm_alignr_epi8(dchg, feba, 8);
+
+ let state_ptr_mut = state.as_mut_ptr() as *mut __m128i;
+ _mm_storeu_si128(state_ptr_mut.add(0), dcba);
+ _mm_storeu_si128(state_ptr_mut.add(1), hgef);
+}
+
+cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1");
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+ // after stabilization
+ if shani_cpuid::get() {
+ unsafe {
+ digest_blocks(state, blocks);
+ }
+ } else {
+ super::soft::compress(state, blocks);
+ }
+}