summaryrefslogtreecommitdiffstats
path: root/vendor/sha2/src
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/sha2/src')
-rw-r--r--vendor/sha2/src/sha256.rs3
-rw-r--r--vendor/sha2/src/sha256/loongarch64_asm.rs227
-rw-r--r--vendor/sha2/src/sha512.rs3
-rw-r--r--vendor/sha2/src/sha512/loongarch64_asm.rs242
4 files changed, 475 insertions, 0 deletions
diff --git a/vendor/sha2/src/sha256.rs b/vendor/sha2/src/sha256.rs
index a45331e17..8f8287836 100644
--- a/vendor/sha2/src/sha256.rs
+++ b/vendor/sha2/src/sha256.rs
@@ -17,6 +17,9 @@ cfg_if::cfg_if! {
mod soft;
mod aarch64;
use aarch64::compress;
+ } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
+ mod loongarch64_asm;
+ use loongarch64_asm::compress;
} else {
mod soft;
use soft::compress;
diff --git a/vendor/sha2/src/sha256/loongarch64_asm.rs b/vendor/sha2/src/sha256/loongarch64_asm.rs
new file mode 100644
index 000000000..c80fce8bb
--- /dev/null
+++ b/vendor/sha2/src/sha256/loongarch64_asm.rs
@@ -0,0 +1,227 @@
+//! LoongArch64 assembly backend
+
+macro_rules! c {
+ ($($l:expr)*) => {
+ concat!($($l ,)*)
+ };
+}
+
+macro_rules! rounda {
+ ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+ c!(
+ "ld.w $a5, $a1, (" $i " * 4);"
+ "revb.2h $a5, $a5;"
+ "rotri.w $a5, $a5, 16;"
+ roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+ )
+ };
+}
+
+macro_rules! roundb {
+ ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+ c!(
+ "ld.w $a4, $sp, (((" $i " - 15) & 0xF) * 4);"
+ "ld.w $a5, $sp, (((" $i " - 16) & 0xF) * 4);"
+ "ld.w $a6, $sp, (((" $i " - 7) & 0xF) * 4);"
+ "add.w $a5, $a5, $a6;"
+ "rotri.w $a6, $a4, 18;"
+ "srli.w $a7, $a4, 3;"
+ "rotri.w $a4, $a4, 7;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "add.w $a5, $a5, $a4;"
+ "ld.w $a4, $sp, (((" $i " - 2) & 0xF) * 4);"
+ "rotri.w $a6, $a4, 19;"
+ "srli.w $a7, $a4, 10;"
+ "rotri.w $a4, $a4, 17;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "add.w $a5, $a5, $a4;"
+ roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+ )
+ };
+}
+
+macro_rules! roundtail {
+ ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+ c!(
+ // Part 0
+ "rotri.w $a6, " $e ", 11;"
+ "rotri.w $a7, " $e ", 25;"
+ "rotri.w $a4, " $e ", 6;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "xor $a6, " $g ", " $f ";"
+ "ld.w $a7, $a3, " $i " * 4;"
+ "and $a6, $a6, " $e ";"
+ "xor $a6, $a6, " $g ";"
+ "add.w $a4, $a4, $a6;"
+ "add.w $a4, $a4, $a7;"
+ "add.w " $h ", " $h ", $a5;"
+ "add.w " $h ", " $h ", $a4;"
+ // Part 1
+ "add.w " $d ", " $d ", " $h ";"
+ // Part 2
+ "rotri.w $a6, " $a ", 13;"
+ "rotri.w $a7, " $a ", 22;"
+ "rotri.w $a4, " $a ", 2;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "add.w " $h ", " $h ", $a4;"
+ "or $a4, " $c ", " $b ";"
+ "and $a6, " $c ", " $b ";"
+ "and $a4, $a4, " $a ";"
+ "or $a4, $a4, $a6;"
+ "add.w " $h ", " $h ", $a4;"
+ "st.w $a5, $sp, ((" $i " & 0xF) * 4);"
+ )
+ };
+}
+
+pub fn compress(state: &mut [u32; 8], blocks: &[[u8; 64]]) {
+ if blocks.is_empty() {
+ return;
+ }
+
+ unsafe {
+ core::arch::asm!(
+ // Allocate scratch stack space
+ "addi.d $sp, $sp, -64;",
+
+ // Load state
+ "ld.w $t0, $a0, 0",
+ "ld.w $t1, $a0, 4",
+ "ld.w $t2, $a0, 8",
+ "ld.w $t3, $a0, 12",
+ "ld.w $t4, $a0, 16",
+ "ld.w $t5, $a0, 20",
+ "ld.w $t6, $a0, 24",
+ "ld.w $t7, $a0, 28",
+
+ "42:",
+
+ // Do 64 rounds of hashing
+ rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+
+ // Update state registers
+ "ld.w $a4, $a0, 0", // a
+ "ld.w $a5, $a0, 4", // b
+ "ld.w $a6, $a0, 8", // c
+ "ld.w $a7, $a0, 12", // d
+ "add.w $t0, $t0, $a4",
+ "add.w $t1, $t1, $a5",
+ "add.w $t2, $t2, $a6",
+ "add.w $t3, $t3, $a7",
+ "ld.w $a4, $a0, 16", // e
+ "ld.w $a5, $a0, 20", // f
+ "ld.w $a6, $a0, 24", // g
+ "ld.w $a7, $a0, 28", // h
+ "add.w $t4, $t4, $a4",
+ "add.w $t5, $t5, $a5",
+ "add.w $t6, $t6, $a6",
+ "add.w $t7, $t7, $a7",
+
+ // Save updated state
+ "st.w $t0, $a0, 0",
+ "st.w $t1, $a0, 4",
+ "st.w $t2, $a0, 8",
+ "st.w $t3, $a0, 12",
+ "st.w $t4, $a0, 16",
+ "st.w $t5, $a0, 20",
+ "st.w $t6, $a0, 24",
+ "st.w $t7, $a0, 28",
+
+ // Looping over blocks
+ "addi.d $a1, $a1, 64",
+ "addi.d $a2, $a2, -1",
+ "bnez $a2, 42b",
+
+ // Restore stack register
+ "addi.d $sp, $sp, 64",
+
+ in("$a0") state,
+ inout("$a1") blocks.as_ptr() => _,
+ inout("$a2") blocks.len() => _,
+ in("$a3") crate::consts::K32.as_ptr(),
+
+ // Clobbers
+ out("$a4") _,
+ out("$a5") _,
+ out("$a6") _,
+ out("$a7") _,
+ out("$t0") _,
+ out("$t1") _,
+ out("$t2") _,
+ out("$t3") _,
+ out("$t4") _,
+ out("$t5") _,
+ out("$t6") _,
+ out("$t7") _,
+
+ options(preserves_flags),
+ );
+ }
+}
diff --git a/vendor/sha2/src/sha512.rs b/vendor/sha2/src/sha512.rs
index af4178c0b..dfe0b454f 100644
--- a/vendor/sha2/src/sha512.rs
+++ b/vendor/sha2/src/sha512.rs
@@ -19,6 +19,9 @@ cfg_if::cfg_if! {
mod soft;
mod aarch64;
use aarch64::compress;
+ } else if #[cfg(all(feature = "loongarch64_asm", target_arch = "loongarch64"))] {
+ mod loongarch64_asm;
+ use loongarch64_asm::compress;
} else {
mod soft;
use soft::compress;
diff --git a/vendor/sha2/src/sha512/loongarch64_asm.rs b/vendor/sha2/src/sha512/loongarch64_asm.rs
new file mode 100644
index 000000000..557089def
--- /dev/null
+++ b/vendor/sha2/src/sha512/loongarch64_asm.rs
@@ -0,0 +1,242 @@
+//! LoongArch64 assembly backend
+
+macro_rules! c {
+ ($($l:expr)*) => {
+ concat!($($l ,)*)
+ };
+}
+
+macro_rules! rounda {
+ ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+ c!(
+ "ld.d $a5, $a1, (" $i " * 8);"
+ "revb.d $a5, $a5;"
+ roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+ )
+ };
+}
+
+macro_rules! roundb {
+ ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+ c!(
+ "ld.d $a4, $sp, (((" $i " - 15) & 0xF) * 8);"
+ "ld.d $a5, $sp, (((" $i " - 16) & 0xF) * 8);"
+ "ld.d $a6, $sp, (((" $i " - 7) & 0xF) * 8);"
+ "add.d $a5, $a5, $a6;"
+ "rotri.d $a6, $a4, 8;"
+ "srli.d $a7, $a4, 7;"
+ "rotri.d $a4, $a4, 1;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "add.d $a5, $a5, $a4;"
+ "ld.d $a4, $sp, (((" $i " - 2) & 0xF) * 8);"
+ "rotri.d $a6, $a4, 61;"
+ "srli.d $a7, $a4, 6;"
+ "rotri.d $a4, $a4, 19;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "add.d $a5, $a5, $a4;"
+ roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
+ )
+ };
+}
+
+macro_rules! roundtail {
+ ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
+ c!(
+ // Part 0
+ "rotri.d $a6, " $e ", 18;"
+ "rotri.d $a7, " $e ", 41;"
+ "rotri.d $a4, " $e ", 14;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "xor $a6, " $g ", " $f ";"
+ "ld.d $a7, $a3, " $i " * 8;"
+ "and $a6, $a6, " $e ";"
+ "xor $a6, $a6, " $g ";"
+ "add.d $a4, $a4, $a6;"
+ "add.d $a4, $a4, $a7;"
+ "add.d " $h ", " $h ", $a5;"
+ "add.d " $h ", " $h ", $a4;"
+ // Part 1
+ "add.d " $d ", " $d ", " $h ";"
+ // Part 2
+ "rotri.d $a6, " $a ", 39;"
+ "rotri.d $a7, " $a ", 34;"
+ "rotri.d $a4, " $a ", 28;"
+ "xor $a6, $a6, $a7;"
+ "xor $a4, $a4, $a6;"
+ "add.d " $h ", " $h ", $a4;"
+ "or $a4, " $c ", " $b ";"
+ "and $a6, " $c ", " $b ";"
+ "and $a4, $a4, " $a ";"
+ "or $a4, $a4, $a6;"
+ "add.d " $h ", " $h ", $a4;"
+ "st.d $a5, $sp, ((" $i " & 0xF) * 8);"
+ )
+ };
+}
+
+pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
+ if blocks.is_empty() {
+ return;
+ }
+
+ unsafe {
+ core::arch::asm!(
+ // Allocate scratch stack space
+ "addi.d $sp, $sp, -128;",
+
+ // Load state
+ "ld.d $t0, $a0, 0",
+ "ld.d $t1, $a0, 8",
+ "ld.d $t2, $a0, 16",
+ "ld.d $t3, $a0, 24",
+ "ld.d $t4, $a0, 32",
+ "ld.d $t5, $a0, 40",
+ "ld.d $t6, $a0, 48",
+ "ld.d $t7, $a0, 56",
+
+ "42:",
+
+ // Do 64 rounds of hashing
+ rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(64, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(65, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(66, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(67, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(68, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(69, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(70, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(71, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+ roundb!(72, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
+ roundb!(73, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
+ roundb!(74, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
+ roundb!(75, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
+ roundb!(76, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
+ roundb!(77, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
+ roundb!(78, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
+ roundb!(79, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
+
+ // Update state registers
+ "ld.d $a4, $a0, 0", // a
+ "ld.d $a5, $a0, 8", // b
+ "ld.d $a6, $a0, 16", // c
+ "ld.d $a7, $a0, 24", // d
+ "add.d $t0, $t0, $a4",
+ "add.d $t1, $t1, $a5",
+ "add.d $t2, $t2, $a6",
+ "add.d $t3, $t3, $a7",
+ "ld.d $a4, $a0, 32", // e
+ "ld.d $a5, $a0, 40", // f
+ "ld.d $a6, $a0, 48", // g
+ "ld.d $a7, $a0, 56", // h
+ "add.d $t4, $t4, $a4",
+ "add.d $t5, $t5, $a5",
+ "add.d $t6, $t6, $a6",
+ "add.d $t7, $t7, $a7",
+
+ // Save updated state
+ "st.d $t0, $a0, 0",
+ "st.d $t1, $a0, 8",
+ "st.d $t2, $a0, 16",
+ "st.d $t3, $a0, 24",
+ "st.d $t4, $a0, 32",
+ "st.d $t5, $a0, 40",
+ "st.d $t6, $a0, 48",
+ "st.d $t7, $a0, 56",
+
+ // Looping over blocks
+ "addi.d $a1, $a1, 128",
+ "addi.d $a2, $a2, -1",
+ "bnez $a2, 42b",
+
+ // Restore stack register
+ "addi.d $sp, $sp, 128",
+
+ in("$a0") state,
+ inout("$a1") blocks.as_ptr() => _,
+ inout("$a2") blocks.len() => _,
+ in("$a3") crate::consts::K64.as_ptr(),
+
+ // Clobbers
+ out("$a4") _,
+ out("$a5") _,
+ out("$a6") _,
+ out("$a7") _,
+ out("$t0") _,
+ out("$t1") _,
+ out("$t2") _,
+ out("$t3") _,
+ out("$t4") _,
+ out("$t5") _,
+ out("$t6") _,
+ out("$t7") _,
+
+ options(preserves_flags),
+ );
+ }
+}