4 files changed, 645 insertions, 0 deletions
diff --git a/rust/vendor/sha1/src/compress/aarch64.rs b/rust/vendor/sha1/src/compress/aarch64.rs
new file mode 100644
index 0000000..5952d1f
--- /dev/null
+++ b/rust/vendor/sha1/src/compress/aarch64.rs
@@ -0,0 +1,18 @@
+//! SHA-1 `aarch64` backend.
+
+// Per rustc target feature docs for `aarch64-unknown-linux-gnu` and
+// `aarch64-apple-darwin` platforms, the `sha2` target feature enables
+// SHA-1 as well:
+//
+// > Enable SHA1 and SHA256 support.
+cpufeatures::new!(sha1_hwcap, "sha2");
+
+pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
+    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+    // after stabilization
+    if sha1_hwcap::get() {
+        sha1_asm::compress(state, blocks);
+    } else {
+        super::soft::compress(state, blocks);
+    }
+}
diff --git a/rust/vendor/sha1/src/compress/loongarch64_asm.rs b/rust/vendor/sha1/src/compress/loongarch64_asm.rs
new file mode 100644
index 0000000..facef1b
--- /dev/null
+++ b/rust/vendor/sha1/src/compress/loongarch64_asm.rs
@@ -0,0 +1,255 @@
+//! LoongArch64 assembly backend
+
+use core::arch::asm;
+
+const K: [u32; 4] = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6];
+
+macro_rules! c {
+    ($($l:expr)*) => {
+        concat!($($l ,)*)
+    };
+}
+
+macro_rules! round0a {
+    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
+        c!(
+            "ld.w    $t5, $a1, (" $i " * 4);"
+            "revb.2h $t5, $t5;"
+            "rotri.w $t5, $t5, 16;"
+            "add.w " $e ", " $e ", $t5;"
+            "st.w    $t5, $sp, (" $i " * 4);"
+            "xor     $t5, " $c "," $d ";"
+            "and     $t5, $t5, " $b ";"
+            "xor     $t5, $t5, " $d ";"
+            roundtail!($a, $b, $e, $i, "$a4")
+        )
+    };
+}
+
+macro_rules! scheldule {
+    ($i:literal, $e:literal) => {
+        c!(
+            "ld.w    $t5, $sp, (((" $i " - 3) & 0xF) * 4);"
+            "ld.w    $t6, $sp, (((" $i " - 8) & 0xF) * 4);"
+            "ld.w    $t7, $sp, (((" $i " - 14) & 0xF) * 4);"
+            "ld.w    $t8, $sp, (((" $i " - 16) & 0xF) * 4);"
+            "xor     $t5, $t5, $t6;"
+            "xor     $t5, $t5, $t7;"
+            "xor     $t5, $t5, $t8;"
+            "rotri.w $t5, $t5, 31;"
+            "add.w " $e "," $e ", $t5;"
+            "st.w    $t5, $sp, ((" $i " & 0xF) * 4);"
+        )
+    };
+}
+
+macro_rules! round0b {
+    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
+        c!(
+            scheldule!($i, $e)
+            "xor     $t5," $c "," $d ";"
+            "and     $t5, $t5," $b ";"
+            "xor     $t5, $t5," $d ";"
+            roundtail!($a, $b, $e, $i, "$a4")
+        )
+    };
+}
+
+macro_rules! round1 {
+    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
+        c!(
+            scheldule!($i, $e)
+            "xor     $t5," $b "," $c ";"
+            "xor     $t5, $t5," $d ";"
+            roundtail!($a, $b, $e, $i, "$a5")
+        )
+    };
+}
+
+macro_rules! round2 {
+    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
+        c!(
+            scheldule!($i, $e)
+            "or      $t5," $c "," $d ";"
+            "and     $t5, $t5, " $b ";"
+            "and     $t7," $c "," $d ";"
+            "or      $t5, $t5, $t7;"
+            roundtail!($a, $b, $e, $i, "$a6")
+        )
+    };
+}
+
+macro_rules! round3 {
+    ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => {
+        c!(
+            scheldule!($i, $e)
+            "xor     $t5," $b "," $c ";"
+            "xor     $t5, $t5," $d ";"
+            roundtail!($a, $b, $e, $i, "$a7")
+        )
+    };
+}
+
+macro_rules! roundtail {
+    ($a:literal, $b:literal, $e:literal, $i:literal, $k:literal) => {
+        c!(
+            "rotri.w " $b "," $b ", 2;"
+            "add.w " $e "," $e ", $t5;"
+            "add.w " $e "," $e "," $k ";"
+            "rotri.w $t5," $a ", 27;"
+            "add.w " $e "," $e ", $t5;"
+        )
+    };
+}
+
+pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
+    if blocks.is_empty() {
+        return;
+    }
+
+    unsafe {
+        asm!(
+            // Allocate scratch stack space
+            "addi.d  $sp, $sp, -64;",
+
+            // Load state
+            "ld.w    $t0, $a0, 0",
+            "ld.w    $t1, $a0, 4",
+            "ld.w    $t2, $a0, 8",
+            "ld.w    $t3, $a0, 12",
+            "ld.w    $t4, $a0, 16",
+
+            "42:",
+
+            round0a!("$t0", "$t1", "$t2", "$t3", "$t4",  0),
+            round0a!("$t4", "$t0", "$t1", "$t2", "$t3",  1),
+            round0a!("$t3", "$t4", "$t0", "$t1", "$t2",  2),
+            round0a!("$t2", "$t3", "$t4", "$t0", "$t1",  3),
+            round0a!("$t1", "$t2", "$t3", "$t4", "$t0",  4),
+            round0a!("$t0", "$t1", "$t2", "$t3", "$t4",  5),
+            round0a!("$t4", "$t0", "$t1", "$t2", "$t3",  6),
+            round0a!("$t3", "$t4", "$t0", "$t1", "$t2",  7),
+            round0a!("$t2", "$t3", "$t4", "$t0", "$t1",  8),
+            round0a!("$t1", "$t2", "$t3", "$t4", "$t0",  9),
+            round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 10),
+            round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 11),
+            round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 12),
+            round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 13),
+            round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 14),
+            round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 15),
+            round0b!("$t4", "$t0", "$t1", "$t2", "$t3", 16),
+            round0b!("$t3", "$t4", "$t0", "$t1", "$t2", 17),
+            round0b!("$t2", "$t3", "$t4", "$t0", "$t1", 18),
+            round0b!("$t1", "$t2", "$t3", "$t4", "$t0", 19),
+            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 20),
+            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 21),
+            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 22),
+            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 23),
+            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 24),
+            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 25),
+            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 26),
+            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 27),
+            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 28),
+            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 29),
+            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 30),
+            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 31),
+            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 32),
+            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 33),
+            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 34),
+            round1!("$t0", "$t1", "$t2", "$t3", "$t4", 35),
+            round1!("$t4", "$t0", "$t1", "$t2", "$t3", 36),
+            round1!("$t3", "$t4", "$t0", "$t1", "$t2", 37),
+            round1!("$t2", "$t3", "$t4", "$t0", "$t1", 38),
+            round1!("$t1", "$t2", "$t3", "$t4", "$t0", 39),
+            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 40),
+            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 41),
+            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 42),
+            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 43),
+            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 44),
+            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 45),
+            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 46),
+            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 47),
+            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 48),
+            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 49),
+            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 50),
+            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 51),
+            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 52),
+            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 53),
+            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 54),
+            round2!("$t0", "$t1", "$t2", "$t3", "$t4", 55),
+            round2!("$t4", "$t0", "$t1", "$t2", "$t3", 56),
+            round2!("$t3", "$t4", "$t0", "$t1", "$t2", 57),
+            round2!("$t2", "$t3", "$t4", "$t0", "$t1", 58),
+            round2!("$t1", "$t2", "$t3", "$t4", "$t0", 59),
+            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 60),
+            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 61),
+            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 62),
+            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 63),
+            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 64),
+            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 65),
+            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 66),
+            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 67),
+            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 68),
+            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 69),
+            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 70),
+            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 71),
+            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 72),
+            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 73),
+            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 74),
+            round3!("$t0", "$t1", "$t2", "$t3", "$t4", 75),
+            round3!("$t4", "$t0", "$t1", "$t2", "$t3", 76),
+            round3!("$t3", "$t4", "$t0", "$t1", "$t2", 77),
+            round3!("$t2", "$t3", "$t4", "$t0", "$t1", 78),
+            round3!("$t1", "$t2", "$t3", "$t4", "$t0", 79),
+
+            // Update state registers
+            "ld.w    $t5, $a0, 0",  // a
+            "ld.w    $t6, $a0, 4",  // b
+            "ld.w    $t7, $a0, 8",  // c
+            "ld.w    $t8, $a0, 12", // d
+            "add.w   $t0, $t0, $t5",
+            "ld.w    $t5, $a0, 16", // e
+            "add.w   $t1, $t1, $t6",
+            "add.w   $t2, $t2, $t7",
+            "add.w   $t3, $t3, $t8",
+            "add.w   $t4, $t4, $t5",
+
+            // Save updated state
+            "st.w    $t0, $a0, 0",
+            "st.w    $t1, $a0, 4",
+            "st.w    $t2, $a0, 8",
+            "st.w    $t3, $a0, 12",
+            "st.w    $t4, $a0, 16",
+
+            // Looping over blocks
+            "addi.d  $a1, $a1, 64",
+            "addi.d  $a2, $a2, -1",
+            "bnez    $a2, 42b",
+
+            // Restore stack register
+            "addi.d  $sp, $sp, 64",
+
+            in("$a0") state,
+            inout("$a1") blocks.as_ptr() => _,
+            inout("$a2") blocks.len() => _,
+
+            in("$a4") K[0],
+            in("$a5") K[1],
+            in("$a6") K[2],
+            in("$a7") K[3],
+
+            // Clobbers
+            out("$t0") _,
+            out("$t1") _,
+            out("$t2") _,
+            out("$t3") _,
+            out("$t4") _,
+            out("$t5") _,
+            out("$t6") _,
+            out("$t7") _,
+            out("$t8") _,
+
+            options(preserves_flags),
+        );
+    }
+}
diff --git a/rust/vendor/sha1/src/compress/soft.rs b/rust/vendor/sha1/src/compress/soft.rs
new file mode 100644
index 0000000..0b9fb27
--- /dev/null
+++ b/rust/vendor/sha1/src/compress/soft.rs
@@ -0,0 +1,260 @@
+#![allow(clippy::many_single_char_names)]
+use super::BLOCK_SIZE;
+use core::convert::TryInto;
+
+const K: [u32; 4] = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6];
+
+#[inline(always)]
+fn add(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    [
+        a[0].wrapping_add(b[0]),
+        a[1].wrapping_add(b[1]),
+        a[2].wrapping_add(b[2]),
+        a[3].wrapping_add(b[3]),
+    ]
+}
+
+#[inline(always)]
+fn xor(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    [a[0] ^ b[0], a[1] ^ b[1], a[2] ^ b[2], a[3] ^ b[3]]
+}
+
+#[inline]
+pub fn sha1_first_add(e: u32, w0: [u32; 4]) -> [u32; 4] {
+    let [a, b, c, d] = w0;
+    [e.wrapping_add(a), b, c, d]
+}
+
+fn sha1msg1(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    let [_, _, w2, w3] = a;
+    let [w4, w5, _, _] = b;
+    [a[0] ^ w2, a[1] ^ w3, a[2] ^ w4, a[3] ^ w5]
+}
+
+fn sha1msg2(a: [u32; 4], b: [u32; 4]) -> [u32; 4] {
+    let [x0, x1, x2, x3] = a;
+    let [_, w13, w14, w15] = b;
+
+    let w16 = (x0 ^ w13).rotate_left(1);
+    let w17 = (x1 ^ w14).rotate_left(1);
+    let w18 = (x2 ^ w15).rotate_left(1);
+    let w19 = (x3 ^ w16).rotate_left(1);
+
+    [w16, w17, w18, w19]
+}
+
+#[inline]
+fn sha1_first_half(abcd: [u32; 4], msg: [u32; 4]) -> [u32; 4] {
+    sha1_first_add(abcd[0].rotate_left(30), msg)
+}
+
+fn sha1_digest_round_x4(abcd: [u32; 4], work: [u32; 4], i: i8) -> [u32; 4] {
+    match i {
+        0 => sha1rnds4c(abcd, add(work, [K[0]; 4])),
+        1 => sha1rnds4p(abcd, add(work, [K[1]; 4])),
+        2 => sha1rnds4m(abcd, add(work, [K[2]; 4])),
+        3 => sha1rnds4p(abcd, add(work, [K[3]; 4])),
+        _ => unreachable!("unknown icosaround index"),
+    }
+}
+
+fn sha1rnds4c(abcd: [u32; 4], msg: [u32; 4]) -> [u32; 4] {
+    let [mut a, mut b, mut c, mut d] = abcd;
+    let [t, u, v, w] = msg;
+    let mut e = 0u32;
+
+    macro_rules! bool3ary_202 {
+        ($a:expr, $b:expr, $c:expr) => {
+            $c ^ ($a & ($b ^ $c))
+        };
+    } // Choose, MD5F, SHA1C
+
+    e = e
+        .wrapping_add(a.rotate_left(5))
+        .wrapping_add(bool3ary_202!(b, c, d))
+        .wrapping_add(t);
+    b = b.rotate_left(30);
+
+    d = d
+        .wrapping_add(e.rotate_left(5))
+        .wrapping_add(bool3ary_202!(a, b, c))
+        .wrapping_add(u);
+    a = a.rotate_left(30);
+
+    c = c
+        .wrapping_add(d.rotate_left(5))
+        .wrapping_add(bool3ary_202!(e, a, b))
+        .wrapping_add(v);
+    e = e.rotate_left(30);
+
+    b = b
+        .wrapping_add(c.rotate_left(5))
+        .wrapping_add(bool3ary_202!(d, e, a))
+        .wrapping_add(w);
+    d = d.rotate_left(30);
+
+    [b, c, d, e]
+}
+
+fn sha1rnds4p(abcd: [u32; 4], msg: [u32; 4]) -> [u32; 4] {
+    let [mut a, mut b, mut c, mut d] = abcd;
+    let [t, u, v, w] = msg;
+    let mut e = 0u32;
+
+    macro_rules! bool3ary_150 {
+        ($a:expr, $b:expr, $c:expr) => {
+            $a ^ $b ^ $c
+        };
+    } // Parity, XOR, MD5H, SHA1P
+
+    e = e
+        .wrapping_add(a.rotate_left(5))
+        .wrapping_add(bool3ary_150!(b, c, d))
+        .wrapping_add(t);
+    b = b.rotate_left(30);
+
+    d = d
+        .wrapping_add(e.rotate_left(5))
+        .wrapping_add(bool3ary_150!(a, b, c))
+        .wrapping_add(u);
+    a = a.rotate_left(30);
+
+    c = c
+        .wrapping_add(d.rotate_left(5))
+        .wrapping_add(bool3ary_150!(e, a, b))
+        .wrapping_add(v);
+    e = e.rotate_left(30);
+
+    b = b
+        .wrapping_add(c.rotate_left(5))
+        .wrapping_add(bool3ary_150!(d, e, a))
+        .wrapping_add(w);
+    d = d.rotate_left(30);
+
+    [b, c, d, e]
+}
+
+fn sha1rnds4m(abcd: [u32; 4], msg: [u32; 4]) -> [u32; 4] {
+    let [mut a, mut b, mut c, mut d] = abcd;
+    let [t, u, v, w] = msg;
+    let mut e = 0u32;
+
+    macro_rules! bool3ary_232 {
+        ($a:expr, $b:expr, $c:expr) => {
+            ($a & $b) ^ ($a & $c) ^ ($b & $c)
+        };
+    } // Majority, SHA1M
+
+    e = e
+        .wrapping_add(a.rotate_left(5))
+        .wrapping_add(bool3ary_232!(b, c, d))
+        .wrapping_add(t);
+    b = b.rotate_left(30);
+
+    d = d
+        .wrapping_add(e.rotate_left(5))
+        .wrapping_add(bool3ary_232!(a, b, c))
+        .wrapping_add(u);
+    a = a.rotate_left(30);
+
+    c = c
+        .wrapping_add(d.rotate_left(5))
+        .wrapping_add(bool3ary_232!(e, a, b))
+        .wrapping_add(v);
+    e = e.rotate_left(30);
+
+    b = b
+        .wrapping_add(c.rotate_left(5))
+        .wrapping_add(bool3ary_232!(d, e, a))
+        .wrapping_add(w);
+    d = d.rotate_left(30);
+
+    [b, c, d, e]
+}
+
+macro_rules! rounds4 {
+    ($h0:ident, $h1:ident, $wk:expr, $i:expr) => {
+        sha1_digest_round_x4($h0, sha1_first_half($h1, $wk), $i)
+    };
+}
+
+macro_rules! schedule {
+    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {
+        sha1msg2(xor(sha1msg1($v0, $v1), $v2), $v3)
+    };
+}
+
+macro_rules! schedule_rounds4 {
+    (
+        $h0:ident, $h1:ident,
+        $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+        $i:expr
+    ) => {
+        $w4 = schedule!($w0, $w1, $w2, $w3);
+        $h1 = rounds4!($h0, $h1, $w4, $i);
+    };
+}
+
+#[inline(always)]
+fn sha1_digest_block_u32(state: &mut [u32; 5], block: &[u32; 16]) {
+    let mut w0 = [block[0], block[1], block[2], block[3]];
+    let mut w1 = [block[4], block[5], block[6], block[7]];
+    let mut w2 = [block[8], block[9], block[10], block[11]];
+    let mut w3 = [block[12], block[13], block[14], block[15]];
+    #[allow(clippy::needless_late_init)]
+    let mut w4;
+
+    let mut h0 = [state[0], state[1], state[2], state[3]];
+    let mut h1 = sha1_first_add(state[4], w0);
+
+    // Rounds 0..20
+    h1 = sha1_digest_round_x4(h0, h1, 0);
+    h0 = rounds4!(h1, h0, w1, 0);
+    h1 = rounds4!(h0, h1, w2, 0);
+    h0 = rounds4!(h1, h0, w3, 0);
+    schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 0);
+
+    // Rounds 20..40
+    schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 1);
+    schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 1);
+    schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 1);
+    schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 1);
+    schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 1);
+
+    // Rounds 40..60
+    schedule_rounds4!(h0, h1, w1, w2, w3, w4, w0, 2);
+    schedule_rounds4!(h1, h0, w2, w3, w4, w0, w1, 2);
+    schedule_rounds4!(h0, h1, w3, w4, w0, w1, w2, 2);
+    schedule_rounds4!(h1, h0, w4, w0, w1, w2, w3, 2);
+    schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 2);
+
+    // Rounds 60..80
+    schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 3);
+    schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 3);
+    schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 3);
+    schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 3);
+    schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 3);
+
+    let e = h1[0].rotate_left(30);
+    let [a, b, c, d] = h0;
+
+    state[0] = state[0].wrapping_add(a);
+    state[1] = state[1].wrapping_add(b);
+    state[2] = state[2].wrapping_add(c);
+    state[3] = state[3].wrapping_add(d);
+    state[4] = state[4].wrapping_add(e);
+}
+
+pub fn compress(state: &mut [u32; 5], blocks: &[[u8; BLOCK_SIZE]]) {
+    let mut block_u32 = [0u32; BLOCK_SIZE / 4];
+    // since LLVM can't properly use aliasing yet it will make
+    // unnecessary state stores without this copy
+    let mut state_cpy = *state;
+    for block in blocks.iter() {
+        for (o, chunk) in block_u32.iter_mut().zip(block.chunks_exact(4)) {
+            *o = u32::from_be_bytes(chunk.try_into().unwrap());
+        }
+        sha1_digest_block_u32(&mut state_cpy, &block_u32);
+    }
+    *state = state_cpy;
+}
diff --git a/rust/vendor/sha1/src/compress/x86.rs b/rust/vendor/sha1/src/compress/x86.rs
new file mode 100644
index 0000000..4dcd56b
--- /dev/null
+++ b/rust/vendor/sha1/src/compress/x86.rs
@@ -0,0 +1,112 @@
+//! SHA-1 `x86`/`x86_64` backend
+
+#![cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+
+#[cfg(target_arch = "x86")]
+use core::arch::x86::*;
+#[cfg(target_arch = "x86_64")]
+use core::arch::x86_64::*;
+
+macro_rules! rounds4 {
+    ($h0:ident, $h1:ident, $wk:expr, $i:expr) => {
+        _mm_sha1rnds4_epu32($h0, _mm_sha1nexte_epu32($h1, $wk), $i)
+    };
+}
+
+macro_rules! schedule {
+    ($v0:expr, $v1:expr, $v2:expr, $v3:expr) => {
+        _mm_sha1msg2_epu32(_mm_xor_si128(_mm_sha1msg1_epu32($v0, $v1), $v2), $v3)
+    };
+}
+
+macro_rules! schedule_rounds4 {
+    (
+        $h0:ident, $h1:ident,
+        $w0:expr, $w1:expr, $w2:expr, $w3:expr, $w4:expr,
+        $i:expr
+    ) => {
+        $w4 = schedule!($w0, $w1, $w2, $w3);
+        $h1 = rounds4!($h0, $h1, $w4, $i);
+    };
+}
+
+#[target_feature(enable = "sha,sse2,ssse3,sse4.1")]
+unsafe fn digest_blocks(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
+    #[allow(non_snake_case)]
+    let MASK: __m128i = _mm_set_epi64x(0x0001_0203_0405_0607, 0x0809_0A0B_0C0D_0E0F);
+
+    let mut state_abcd = _mm_set_epi32(
+        state[0] as i32,
+        state[1] as i32,
+        state[2] as i32,
+        state[3] as i32,
+    );
+    let mut state_e = _mm_set_epi32(state[4] as i32, 0, 0, 0);
+
+    for block in blocks {
+        // SAFETY: we use only unaligned loads with this pointer
+        #[allow(clippy::cast_ptr_alignment)]
+        let block_ptr = block.as_ptr() as *const __m128i;
+
+        let mut w0 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(0)), MASK);
+        let mut w1 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(1)), MASK);
+        let mut w2 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(2)), MASK);
+        let mut w3 = _mm_shuffle_epi8(_mm_loadu_si128(block_ptr.offset(3)), MASK);
+        #[allow(clippy::needless_late_init)]
+        let mut w4;
+
+        let mut h0 = state_abcd;
+        let mut h1 = _mm_add_epi32(state_e, w0);
+
+        // Rounds 0..20
+        h1 = _mm_sha1rnds4_epu32(h0, h1, 0);
+        h0 = rounds4!(h1, h0, w1, 0);
+        h1 = rounds4!(h0, h1, w2, 0);
+        h0 = rounds4!(h1, h0, w3, 0);
+        schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 0);
+
+        // Rounds 20..40
+        schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 1);
+        schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 1);
+        schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 1);
+        schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 1);
+        schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 1);
+
+        // Rounds 40..60
+        schedule_rounds4!(h0, h1, w1, w2, w3, w4, w0, 2);
+        schedule_rounds4!(h1, h0, w2, w3, w4, w0, w1, 2);
+        schedule_rounds4!(h0, h1, w3, w4, w0, w1, w2, 2);
+        schedule_rounds4!(h1, h0, w4, w0, w1, w2, w3, 2);
+        schedule_rounds4!(h0, h1, w0, w1, w2, w3, w4, 2);
+
+        // Rounds 60..80
+        schedule_rounds4!(h1, h0, w1, w2, w3, w4, w0, 3);
+        schedule_rounds4!(h0, h1, w2, w3, w4, w0, w1, 3);
+        schedule_rounds4!(h1, h0, w3, w4, w0, w1, w2, 3);
+        schedule_rounds4!(h0, h1, w4, w0, w1, w2, w3, 3);
+        schedule_rounds4!(h1, h0, w0, w1, w2, w3, w4, 3);
+
+        state_abcd = _mm_add_epi32(state_abcd, h0);
+        state_e = _mm_sha1nexte_epu32(h1, state_e);
+    }
+
+    state[0] = _mm_extract_epi32(state_abcd, 3) as u32;
+    state[1] = _mm_extract_epi32(state_abcd, 2) as u32;
+    state[2] = _mm_extract_epi32(state_abcd, 1) as u32;
+    state[3] = _mm_extract_epi32(state_abcd, 0) as u32;
+    state[4] = _mm_extract_epi32(state_e, 3) as u32;
+}
+
+cpufeatures::new!(shani_cpuid, "sha", "sse2", "ssse3", "sse4.1");
+
+pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) {
+    // TODO: Replace with https://github.com/rust-lang/rfcs/pull/2725
+    // after stabilization
+    if shani_cpuid::get() {
+        unsafe {
+            digest_blocks(state, blocks);
+        }
+    } else {
+        super::soft::compress(state, blocks);
+    }
+}