//! LoongArch64 assembly backend use core::arch::asm; const K: [u32; 4] = [0x5A827999, 0x6ED9EBA1, 0x8F1BBCDC, 0xCA62C1D6]; macro_rules! c { ($($l:expr)*) => { concat!($($l ,)*) }; } macro_rules! round0a { ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { c!( "ld.w $t5, $a1, (" $i " * 4);" "revb.2h $t5, $t5;" "rotri.w $t5, $t5, 16;" "add.w " $e ", " $e ", $t5;" "st.w $t5, $sp, (" $i " * 4);" "xor $t5, " $c "," $d ";" "and $t5, $t5, " $b ";" "xor $t5, $t5, " $d ";" roundtail!($a, $b, $e, $i, "$a4") ) }; } macro_rules! scheldule { ($i:literal, $e:literal) => { c!( "ld.w $t5, $sp, (((" $i " - 3) & 0xF) * 4);" "ld.w $t6, $sp, (((" $i " - 8) & 0xF) * 4);" "ld.w $t7, $sp, (((" $i " - 14) & 0xF) * 4);" "ld.w $t8, $sp, (((" $i " - 16) & 0xF) * 4);" "xor $t5, $t5, $t6;" "xor $t5, $t5, $t7;" "xor $t5, $t5, $t8;" "rotri.w $t5, $t5, 31;" "add.w " $e "," $e ", $t5;" "st.w $t5, $sp, ((" $i " & 0xF) * 4);" ) }; } macro_rules! round0b { ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { c!( scheldule!($i, $e) "xor $t5," $c "," $d ";" "and $t5, $t5," $b ";" "xor $t5, $t5," $d ";" roundtail!($a, $b, $e, $i, "$a4") ) }; } macro_rules! round1 { ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { c!( scheldule!($i, $e) "xor $t5," $b "," $c ";" "xor $t5, $t5," $d ";" roundtail!($a, $b, $e, $i, "$a5") ) }; } macro_rules! round2 { ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { c!( scheldule!($i, $e) "or $t5," $c "," $d ";" "and $t5, $t5, " $b ";" "and $t7," $c "," $d ";" "or $t5, $t5, $t7;" roundtail!($a, $b, $e, $i, "$a6") ) }; } macro_rules! round3 { ($a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $i:literal) => { c!( scheldule!($i, $e) "xor $t5," $b "," $c ";" "xor $t5, $t5," $d ";" roundtail!($a, $b, $e, $i, "$a7") ) }; } macro_rules! roundtail { ($a:literal, $b:literal, $e:literal, $i:literal, $k:literal) => { c!( "rotri.w " $b "," $b ", 2;" "add.w " $e "," $e ", $t5;" "add.w " $e "," $e "," $k ";" "rotri.w $t5," $a ", 27;" "add.w " $e "," $e ", $t5;" ) }; } pub fn compress(state: &mut [u32; 5], blocks: &[[u8; 64]]) { if blocks.is_empty() { return; } unsafe { asm!( // Allocate scratch stack space "addi.d $sp, $sp, -64;", // Load state "ld.w $t0, $a0, 0", "ld.w $t1, $a0, 4", "ld.w $t2, $a0, 8", "ld.w $t3, $a0, 12", "ld.w $t4, $a0, 16", "42:", round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 0), round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 1), round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 2), round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 3), round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 4), round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 5), round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 6), round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 7), round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 8), round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 9), round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 10), round0a!("$t4", "$t0", "$t1", "$t2", "$t3", 11), round0a!("$t3", "$t4", "$t0", "$t1", "$t2", 12), round0a!("$t2", "$t3", "$t4", "$t0", "$t1", 13), round0a!("$t1", "$t2", "$t3", "$t4", "$t0", 14), round0a!("$t0", "$t1", "$t2", "$t3", "$t4", 15), round0b!("$t4", "$t0", "$t1", "$t2", "$t3", 16), round0b!("$t3", "$t4", "$t0", "$t1", "$t2", 17), round0b!("$t2", "$t3", "$t4", "$t0", "$t1", 18), round0b!("$t1", "$t2", "$t3", "$t4", "$t0", 19), round1!("$t0", "$t1", "$t2", "$t3", "$t4", 20), round1!("$t4", "$t0", "$t1", "$t2", "$t3", 21), round1!("$t3", "$t4", "$t0", "$t1", "$t2", 22), round1!("$t2", "$t3", "$t4", "$t0", "$t1", 23), round1!("$t1", "$t2", "$t3", "$t4", "$t0", 24), round1!("$t0", "$t1", "$t2", "$t3", "$t4", 25), round1!("$t4", "$t0", "$t1", "$t2", "$t3", 26), round1!("$t3", "$t4", "$t0", "$t1", "$t2", 27), round1!("$t2", "$t3", "$t4", "$t0", "$t1", 28), round1!("$t1", "$t2", "$t3", "$t4", "$t0", 29), round1!("$t0", "$t1", "$t2", "$t3", "$t4", 30), round1!("$t4", "$t0", "$t1", "$t2", "$t3", 31), round1!("$t3", "$t4", "$t0", "$t1", "$t2", 32), round1!("$t2", "$t3", "$t4", "$t0", "$t1", 33), round1!("$t1", "$t2", "$t3", "$t4", "$t0", 34), round1!("$t0", "$t1", "$t2", "$t3", "$t4", 35), round1!("$t4", "$t0", "$t1", "$t2", "$t3", 36), round1!("$t3", "$t4", "$t0", "$t1", "$t2", 37), round1!("$t2", "$t3", "$t4", "$t0", "$t1", 38), round1!("$t1", "$t2", "$t3", "$t4", "$t0", 39), round2!("$t0", "$t1", "$t2", "$t3", "$t4", 40), round2!("$t4", "$t0", "$t1", "$t2", "$t3", 41), round2!("$t3", "$t4", "$t0", "$t1", "$t2", 42), round2!("$t2", "$t3", "$t4", "$t0", "$t1", 43), round2!("$t1", "$t2", "$t3", "$t4", "$t0", 44), round2!("$t0", "$t1", "$t2", "$t3", "$t4", 45), round2!("$t4", "$t0", "$t1", "$t2", "$t3", 46), round2!("$t3", "$t4", "$t0", "$t1", "$t2", 47), round2!("$t2", "$t3", "$t4", "$t0", "$t1", 48), round2!("$t1", "$t2", "$t3", "$t4", "$t0", 49), round2!("$t0", "$t1", "$t2", "$t3", "$t4", 50), round2!("$t4", "$t0", "$t1", "$t2", "$t3", 51), round2!("$t3", "$t4", "$t0", "$t1", "$t2", 52), round2!("$t2", "$t3", "$t4", "$t0", "$t1", 53), round2!("$t1", "$t2", "$t3", "$t4", "$t0", 54), round2!("$t0", "$t1", "$t2", "$t3", "$t4", 55), round2!("$t4", "$t0", "$t1", "$t2", "$t3", 56), round2!("$t3", "$t4", "$t0", "$t1", "$t2", 57), round2!("$t2", "$t3", "$t4", "$t0", "$t1", 58), round2!("$t1", "$t2", "$t3", "$t4", "$t0", 59), round3!("$t0", "$t1", "$t2", "$t3", "$t4", 60), round3!("$t4", "$t0", "$t1", "$t2", "$t3", 61), round3!("$t3", "$t4", "$t0", "$t1", "$t2", 62), round3!("$t2", "$t3", "$t4", "$t0", "$t1", 63), round3!("$t1", "$t2", "$t3", "$t4", "$t0", 64), round3!("$t0", "$t1", "$t2", "$t3", "$t4", 65), round3!("$t4", "$t0", "$t1", "$t2", "$t3", 66), round3!("$t3", "$t4", "$t0", "$t1", "$t2", 67), round3!("$t2", "$t3", "$t4", "$t0", "$t1", 68), round3!("$t1", "$t2", "$t3", "$t4", "$t0", 69), round3!("$t0", "$t1", "$t2", "$t3", "$t4", 70), round3!("$t4", "$t0", "$t1", "$t2", "$t3", 71), round3!("$t3", "$t4", "$t0", "$t1", "$t2", 72), round3!("$t2", "$t3", "$t4", "$t0", "$t1", 73), round3!("$t1", "$t2", "$t3", "$t4", "$t0", 74), round3!("$t0", "$t1", "$t2", "$t3", "$t4", 75), round3!("$t4", "$t0", "$t1", "$t2", "$t3", 76), round3!("$t3", "$t4", "$t0", "$t1", "$t2", 77), round3!("$t2", "$t3", "$t4", "$t0", "$t1", 78), round3!("$t1", "$t2", "$t3", "$t4", "$t0", 79), // Update state registers "ld.w $t5, $a0, 0", // a "ld.w $t6, $a0, 4", // b "ld.w $t7, $a0, 8", // c "ld.w $t8, $a0, 12", // d "add.w $t0, $t0, $t5", "ld.w $t5, $a0, 16", // e "add.w $t1, $t1, $t6", "add.w $t2, $t2, $t7", "add.w $t3, $t3, $t8", "add.w $t4, $t4, $t5", // Save updated state "st.w $t0, $a0, 0", "st.w $t1, $a0, 4", "st.w $t2, $a0, 8", "st.w $t3, $a0, 12", "st.w $t4, $a0, 16", // Looping over blocks "addi.d $a1, $a1, 64", "addi.d $a2, $a2, -1", "bnez $a2, 42b", // Restore stack register "addi.d $sp, $sp, 64", in("$a0") state, inout("$a1") blocks.as_ptr() => _, inout("$a2") blocks.len() => _, in("$a4") K[0], in("$a5") K[1], in("$a6") K[2], in("$a7") K[3], // Clobbers out("$t0") _, out("$t1") _, out("$t2") _, out("$t3") _, out("$t4") _, out("$t5") _, out("$t6") _, out("$t7") _, out("$t8") _, options(preserves_flags), ); } }