summaryrefslogtreecommitdiffstats
path: root/third_party/rust/sha2/src/sha256
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /third_party/rust/sha2/src/sha256
parentInitial commit. (diff)
downloadfirefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.tar.xz
firefox-esr-36d22d82aa202bb199967e9512281e9a53db42c9.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'third_party/rust/sha2/src/sha256')
-rw-r--r--third_party/rust/sha2/src/sha256/aarch64.rs15
-rw-r--r--third_party/rust/sha2/src/sha256/soft.rs218
-rw-r--r--third_party/rust/sha2/src/sha256/x86.rs112
3 files changed, 345 insertions, 0 deletions
diff --git a/third_party/rust/sha2/src/sha256/aarch64.rs b/third_party/rust/sha2/src/sha256/aarch64.rs
new file mode 100644
index 0000000000..7eaa2de73c
--- /dev/null
+++ b/third_party/rust/sha2/src/sha256/aarch64.rs
@@ -0,0 +1,15 @@
+//! SHA-256 `aarch64` backend.
+
+// TODO: stdarch intrinsics: RustCrypto/hashes#257
+
+cpufeatures::new!(sha2_hwcap, "sha2");
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+ // after stabilization
+ if sha2_hwcap::get() {
+ sha2_asm::compress256(state, blocks);
+ } else {
+ super::soft::compress(state, blocks);
+ }
+}
diff --git a/third_party/rust/sha2/src/sha256/soft.rs b/third_party/rust/sha2/src/sha256/soft.rs
new file mode 100644
index 0000000000..34826a7e88
--- /dev/null
+++ b/third_party/rust/sha2/src/sha256/soft.rs
@@ -0,0 +1,218 @@
+#![allow(clippy::many_single_char_names)]
+use crate::consts::BLOCK_LEN;
+use core::convert::TryInto;
+
+#[inline(always)]
+fn shl(v: [u32; 4], o: u32) -> [u32; 4] {
+ [v[0] >> o, v[1] >> o, v[2] >> o, v[3] >> o]
+}
+
+#[inline(always)]
+fn shr(v: [u32; 4], o: u32) -> [u32; 4] {
+ [v[0] << o, v[1] << o, v[2] << o, v[3] << o]
+}
+
+#[inline(always)]
+fn or(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+ [a[0] | b[0], a[1] | b[1], a[2] | b[2], a[3] | b[3]]
+}
+
+#[inline(always)]
+fn xor(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+ [a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]]
+}
+
+#[inline(always)]
+fn add(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+ [
+ a[0].wrapping_add(b[0]),
+ a[1].wrapping_add(b[1]),
+ a[2].wrapping_add(b[2]),
+ a[3].wrapping_add(b[3]),
+ ]
+}
+
+fn sha256load(v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
+ [v3[3], v2[0], v2[1], v2[2]]
+}
+
+fn sha256swap(v0: [u32; 4]) -> [u32; 4] {
+ [v0[2], v0[3], v0[0], v0[1]]
+}
+
+fn sha256msg1(v0: [u32; 4], v1: [u32; 4]) -> [u32; 4] {
+ // sigma 0 on vectors
+ #[inline]
+ fn sigma0x4(x: [u32; 4]) -> [u32; 4] {
+ let t1 = or(shl(x, 7), shr(x, 25));
+ let t2 = or(shl(x, 18), shr(x, 14));
+ let t3 = shl(x, 3);
+ xor(xor(t1, t2), t3)
+ }
+
+ add(v0, sigma0x4(sha256load(v0, v1)))
+}
+
+fn sha256msg2(v4: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
+ macro_rules! sigma1 {
+ ($a:expr) => {
+ $a.rotate_right(17) ^ $a.rotate_right(19) ^ ($a >> 10)
+ };
+ }
+
+ let [x3, x2, x1, x0] = v4;
+ let [w15, w14, _, _] = v3;
+
+ let w16 = x0.wrapping_add(sigma1!(w14));
+ let w17 = x1.wrapping_add(sigma1!(w15));
+ let w18 = x2.wrapping_add(sigma1!(w16));
+ let w19 = x3.wrapping_add(sigma1!(w17));
+
+ [w19, w18, w17, w16]
+}
+
+fn sha256_digest_round_x2(cdgh: [u32; 4], abef: [u32; 4], wk: [u32; 4]) -> [u32; 4] {
+ macro_rules! big_sigma0 {
+ ($a:expr) => {
+ ($a.rotate_right(2) ^ $a.rotate_right(13) ^ $a.rotate_right(22))
+ };
+ }
+ macro_rules! big_sigma1 {
+ ($a:expr) => {
+ ($a.rotate_right(6) ^ $a.rotate_right(11) ^ $a.rotate_right(25))
+ };
+ }
+ macro_rules! bool3ary_202 {
+ ($a:expr, $b:expr, $c:expr) => {
+ $c ^ ($a & ($b ^ $c))
+ };
+ } // Choose, MD5F, SHA1C
+ macro_rules! bool3ary_232 {
+ ($a:expr, $b:expr, $c:expr) => {
+ ($a & $b) ^ ($a & $c) ^ ($b & $c)
+ };
+ } // Majority, SHA1M
+
+ let [_, _, wk1, wk0] = wk;
+ let [a0, b0, e0, f0] = abef;
+ let [c0, d0, g0, h0] = cdgh;
+
+ // a round
+ let x0 = big_sigma1!(e0)
+ .wrapping_add(bool3ary_202!(e0, f0, g0))
+ .wrapping_add(wk0)
+ .wrapping_add(h0);
+ let y0 = big_sigma0!(a0).wrapping_add(bool3ary_232!(a0, b0, c0));
+ let (a1, b1, c1, d1, e1, f1, g1, h1) = (
+ x0.wrapping_add(y0),
+ a0,
+ b0,
+ c0,
+ x0.wrapping_add(d0),
+ e0,
+ f0,
+ g0,
+ );
+
+ // a round
+ let x1 = big_sigma1!(e1)
+ .wrapping_add(bool3ary_202!(e1, f1, g1))
+ .wrapping_add(wk1)
+ .wrapping_add(h1);
+ let y1 = big_sigma0!(a1).wrapping_add(bool3ary_232!(a1, b1, c1));
+ let (a2, b2, _, _, e2, f2, _, _) = (
+ x1.wrapping_add(y1),
+ a1,
+ b1,
+ c1,
+ x1.wrapping_add(d1),
+ e1,
+ f1,
+ g1,
+ );
+
+ [a2, b2, e2, f2]
+}
+
+fn schedule(v0: [u32; 4], v1: [u32; 4], v2: [u32; 4], v3: [u32; 4]) -> [u32; 4] {
+ let t1 = sha256msg1(v0, v1);
+ let t2 = sha256load(v2, v3);
+ let t3 = add(t1, t2);
+ sha256msg2(t3, v3)
+}
+
+macro_rules! rounds4 {
+ ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
+ let t1 = add($rest, crate::consts::K32X4[$i]);
+ $cdgh = sha256_digest_round_x2($cdgh, $abef, t1);
+ let t2 = sha256swap(t1);
+ $abef = sha256_digest_round_x2($abef, $cdgh, t2);
+ }};
+}
+
+macro_rules! schedule_rounds4 {
+ (
+ $abef:ident, $cdgh:ident,
+ $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+ $i: expr
+ ) => {{
+ $w4 = schedule($w0, $w1, $w2, $w3);
+ rounds4!($abef, $cdgh, $w4, $i);
+ }};
+}
+
+/// Process a block with the SHA-256 algorithm.
+fn sha256_digest_block_u32(state: &mut [u32; 8], block: &[u32; 16]) {
+ let mut abef = [state[0], state[1], state[4], state[5]];
+ let mut cdgh = [state[2], state[3], state[6], state[7]];
+
+ // Rounds 0..64
+ let mut w0 = [block[3], block[2], block[1], block[0]];
+ let mut w1 = [block[7], block[6], block[5], block[4]];
+ let mut w2 = [block[11], block[10], block[9], block[8]];
+ let mut w3 = [block[15], block[14], block[13], block[12]];
+ let mut w4;
+
+ rounds4!(abef, cdgh, w0, 0);
+ rounds4!(abef, cdgh, w1, 1);
+ rounds4!(abef, cdgh, w2, 2);
+ rounds4!(abef, cdgh, w3, 3);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
+ schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
+ schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
+ schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
+ schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
+ schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
+ schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
+
+ let [a, b, e, f] = abef;
+ let [c, d, g, h] = cdgh;
+
+ state[0] = state[0].wrapping_add(a);
+ state[1] = state[1].wrapping_add(b);
+ state[2] = state[2].wrapping_add(c);
+ state[3] = state[3].wrapping_add(d);
+ state[4] = state[4].wrapping_add(e);
+ state[5] = state[5].wrapping_add(f);
+ state[6] = state[6].wrapping_add(g);
+ state[7] = state[7].wrapping_add(h);
+}
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ let mut block_u32 = [0u32; BLOCK_LEN];
+ // since LLVM can't properly use aliasing yet it will make
+ // unnecessary state stores without this copy
+ let mut state_cpy = *state;
+ for block in blocks {
+ for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) {
+ *o = u32::from_be_bytes(chunk.try_into().unwrap());
+ }
+ sha256_digest_block_u32(&mut state_cpy, &block_u32);
+ }
+ *state = state_cpy;
+}
diff --git a/third_party/rust/sha2/src/sha256/x86.rs b/third_party/rust/sha2/src/sha256/x86.rs
new file mode 100644
index 0000000000..46019388d3
--- /dev/null
+++ b/third_party/rust/sha2/src/sha256/x86.rs
@@ -0,0 +1,112 @@
+//! SHA-256 `x86`/`x86_64` backend
+
+#![allow(clippy::many_single_char_names)]
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+unsafe fn schedule(v0: __m128i, v1: __m128i, v2: __m128i, v3: __m128i) -> __m128i {
+ let t1 = _mm_sha256msg1_epu32(v0, v1);
+ let t2 = _mm_alignr_epi8(v3, v2, 4);
+ let t3 = _mm_add_epi32(t1, t2);
+ _mm_sha256msg2_epu32(t3, v3)
+}
+
+macro_rules! rounds4 {
+ ($abef:ident, $cdgh:ident, $rest:expr, $i:expr) => {{
+ let k = crate::consts::K32X4[$i];
+ let kv = _mm_set_epi32(k[0] as i32, k[1] as i32, k[2] as i32, k[3] as i32);
+ let t1 = _mm_add_epi32($rest, kv);
+ $cdgh = _mm_sha256rnds2_epu32($cdgh, $abef, t1);
+ let t2 = _mm_shuffle_epi32(t1, 0x0E);
+ $abef = _mm_sha256rnds2_epu32($abef, $cdgh, t2);
+ }};
+}
+
+macro_rules! schedule_rounds4 {
+ (
+ $abef:ident, $cdgh:ident,
+ $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+ $i: expr
+ ) => {{
+ $w4 = schedule($w0, $w1, $w2, $w3);
+ rounds4!($abef, $cdgh, $w4, $i);
+ }};
+}
+
+// we use unaligned loads with `__m128i` pointers
+#[allow(clippy::cast_ptr_alignment)]
+#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
+unsafe fn digest_blocks(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ #[allow(non_snake_case)]
+ let MASK: __m128i = _mm_set_epi64x(
+ 0x0C0D_0E0F_0809_0A0Bu64 as i64,
+ 0x0405_0607_0001_0203u64 as i64,
+ );
+
+ let state_ptr = state.as_ptr() as *const __m128i;
+ let dcba = _mm_loadu_si128(state_ptr.add(0));
+ let efgh = _mm_loadu_si128(state_ptr.add(1));
+
+ let cdab = _mm_shuffle_epi32(dcba, 0xB1);
+ let efgh = _mm_shuffle_epi32(efgh, 0x1B);
+ let mut abef = _mm_alignr_epi8(cdab, efgh, 8);
+ let mut cdgh = _mm_blend_epi16(efgh, cdab, 0xF0);
+
+ for block in blocks {
+ let abef_save = abef;
+ let cdgh_save = cdgh;
+
+ let data_ptr = block.as_ptr() as *const __m128i;
+ let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(0)), MASK);
+ let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(1)), MASK);
+ let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(2)), MASK);
+ let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(data_ptr.add(3)), MASK);
+ let mut w4;
+
+ rounds4!(abef, cdgh, w0, 0);
+ rounds4!(abef, cdgh, w1, 1);
+ rounds4!(abef, cdgh, w2, 2);
+ rounds4!(abef, cdgh, w3, 3);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 4);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 5);
+ schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 6);
+ schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 7);
+ schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 8);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 9);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 10);
+ schedule_rounds4!(abef, cdgh, w2, w3, w4, w0, w1, 11);
+ schedule_rounds4!(abef, cdgh, w3, w4, w0, w1, w2, 12);
+ schedule_rounds4!(abef, cdgh, w4, w0, w1, w2, w3, 13);
+ schedule_rounds4!(abef, cdgh, w0, w1, w2, w3, w4, 14);
+ schedule_rounds4!(abef, cdgh, w1, w2, w3, w4, w0, 15);
+
+ abef = _mm_add_epi32(abef, abef_save);
+ cdgh = _mm_add_epi32(cdgh, cdgh_save);
+ }
+
+ let feba = _mm_shuffle_epi32(abef, 0x1B);
+ let dchg = _mm_shuffle_epi32(cdgh, 0xB1);
+ let dcba = _mm_blend_epi16(feba, dchg, 0xF0);
+ let hgef = _mm_alignr_epi8(dchg, feba, 8);
+
+ let state_ptr_mut = state.as_mut_ptr() as *mut __m128i;
+ _mm_storeu_si128(state_ptr_mut.add(0), dcba);
+ _mm_storeu_si128(state_ptr_mut.add(1), hgef);
+}
+
+cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1");
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+ // after stabilization
+ if shani_cpuid::get() {
+ unsafe {
+ digest_blocks(state, blocks);
+ }
+ } else {
+ super::soft::compress(state, blocks);
+ }
+}