summaryrefslogtreecommitdiffstats
path: root/vendor/sha2/src/sha512/loongarch64_asm.rs
blob: 557089def147be871319a7262d3ee46082410260 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
//! LoongArch64 assembly backend

macro_rules! c {
    ($($l:expr)*) => {
        concat!($($l ,)*)
    };
}

macro_rules! rounda {
    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
        c!(
            "ld.d    $a5, $a1, (" $i " * 8);"
            "revb.d  $a5, $a5;"
            roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
        )
    };
}

macro_rules! roundb {
    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
        c!(
            "ld.d    $a4, $sp, (((" $i " - 15) & 0xF) * 8);"
            "ld.d    $a5, $sp, (((" $i " - 16) & 0xF) * 8);"
            "ld.d    $a6, $sp, (((" $i " -  7) & 0xF) * 8);"
            "add.d   $a5, $a5, $a6;"
            "rotri.d $a6, $a4, 8;"
            "srli.d  $a7, $a4, 7;"
            "rotri.d $a4, $a4, 1;"
            "xor     $a6, $a6, $a7;"
            "xor     $a4, $a4, $a6;"
            "add.d   $a5, $a5, $a4;"
            "ld.d    $a4, $sp, (((" $i " -  2) & 0xF) * 8);"
            "rotri.d $a6, $a4, 61;"
            "srli.d  $a7, $a4,  6;"
            "rotri.d $a4, $a4, 19;"
            "xor     $a6, $a6, $a7;"
            "xor     $a4, $a4, $a6;"
            "add.d   $a5, $a5, $a4;"
            roundtail!($i, $a, $b, $c, $d, $e, $f, $g, $h)
        )
    };
}

macro_rules! roundtail {
    ($i:literal, $a:literal, $b:literal, $c:literal, $d:literal, $e:literal, $f:literal, $g:literal, $h:literal) => {
        c!(
            // Part 0
            "rotri.d $a6, " $e ", 18;"
            "rotri.d $a7, " $e ", 41;"
            "rotri.d $a4, " $e ", 14;"
            "xor     $a6, $a6, $a7;"
            "xor     $a4, $a4, $a6;"
            "xor     $a6, " $g ", " $f ";"
            "ld.d    $a7, $a3, " $i " * 8;"
            "and     $a6, $a6, " $e ";"
            "xor     $a6, $a6, " $g ";"
            "add.d   $a4, $a4, $a6;"
            "add.d   $a4, $a4, $a7;"
            "add.d   " $h ", " $h ", $a5;"
            "add.d   " $h ", " $h ", $a4;"
            // Part 1
            "add.d   " $d ", " $d ", " $h ";"
            // Part 2
            "rotri.d $a6, " $a ", 39;"
            "rotri.d $a7, " $a ", 34;"
            "rotri.d $a4, " $a ", 28;"
            "xor     $a6, $a6, $a7;"
            "xor     $a4, $a4, $a6;"
            "add.d   " $h ", " $h ", $a4;"
            "or      $a4, " $c ", " $b ";"
            "and     $a6, " $c ", " $b ";"
            "and     $a4, $a4, " $a ";"
            "or      $a4, $a4, $a6;"
            "add.d   " $h ", " $h ", $a4;"
            "st.d    $a5, $sp, ((" $i " & 0xF) * 8);"
        )
    };
}

pub fn compress(state: &mut [u64; 8], blocks: &[[u8; 128]]) {
    if blocks.is_empty() {
        return;
    }

    unsafe {
        core::arch::asm!(
            // Allocate scratch stack space
            "addi.d  $sp, $sp, -128;",

            // Load state
            "ld.d    $t0, $a0, 0",
            "ld.d    $t1, $a0, 8",
            "ld.d    $t2, $a0, 16",
            "ld.d    $t3, $a0, 24",
            "ld.d    $t4, $a0, 32",
            "ld.d    $t5, $a0, 40",
            "ld.d    $t6, $a0, 48",
            "ld.d    $t7, $a0, 56",

            "42:",

            // Do 64 rounds of hashing
            rounda!( 0, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            rounda!( 1, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            rounda!( 2, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            rounda!( 3, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            rounda!( 4, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            rounda!( 5, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            rounda!( 6, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            rounda!( 7, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            rounda!( 8, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            rounda!( 9, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            rounda!(10, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            rounda!(11, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            rounda!(12, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            rounda!(13, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            rounda!(14, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            rounda!(15, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(16, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(17, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(18, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(19, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(20, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(21, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(22, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(23, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(24, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(25, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(26, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(27, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(28, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(29, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(30, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(31, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(32, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(33, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(34, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(35, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(36, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(37, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(38, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(39, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(40, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(41, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(42, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(43, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(44, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(45, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(46, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(47, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(48, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(49, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(50, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(51, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(52, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(53, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(54, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(55, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(56, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(57, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(58, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(59, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(60, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(61, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(62, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(63, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(64, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(65, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(66, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(67, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(68, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(69, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(70, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(71, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),
            roundb!(72, "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7"),
            roundb!(73, "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6"),
            roundb!(74, "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4", "$t5"),
            roundb!(75, "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3", "$t4"),
            roundb!(76, "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2", "$t3"),
            roundb!(77, "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1" , "$t2"),
            roundb!(78, "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0" , "$t1"),
            roundb!(79, "$t1" , "$t2", "$t3", "$t4", "$t5", "$t6", "$t7", "$t0"),

            // Update state registers
            "ld.d    $a4, $a0, 0",  // a
            "ld.d    $a5, $a0, 8",  // b
            "ld.d    $a6, $a0, 16",  // c
            "ld.d    $a7, $a0, 24", // d
            "add.d   $t0, $t0, $a4",
            "add.d   $t1, $t1, $a5",
            "add.d   $t2, $t2, $a6",
            "add.d   $t3, $t3, $a7",
            "ld.d    $a4, $a0, 32", // e
            "ld.d    $a5, $a0, 40", // f
            "ld.d    $a6, $a0, 48", // g
            "ld.d    $a7, $a0, 56", // h
            "add.d   $t4, $t4, $a4",
            "add.d   $t5, $t5, $a5",
            "add.d   $t6, $t6, $a6",
            "add.d   $t7, $t7, $a7",

            // Save updated state
            "st.d    $t0, $a0, 0",
            "st.d    $t1, $a0, 8",
            "st.d    $t2, $a0, 16",
            "st.d    $t3, $a0, 24",
            "st.d    $t4, $a0, 32",
            "st.d    $t5, $a0, 40",
            "st.d    $t6, $a0, 48",
            "st.d    $t7, $a0, 56",

            // Looping over blocks
            "addi.d  $a1, $a1, 128",
            "addi.d  $a2, $a2, -1",
            "bnez    $a2, 42b",

            // Restore stack register
            "addi.d  $sp, $sp, 128",

            in("$a0") state,
            inout("$a1") blocks.as_ptr() => _,
            inout("$a2") blocks.len() => _,
            in("$a3") crate::consts::K64.as_ptr(),

            // Clobbers
            out("$a4") _,
            out("$a5") _,
            out("$a6") _,
            out("$a7") _,
            out("$t0") _,
            out("$t1") _,
            out("$t2") _,
            out("$t3") _,
            out("$t4") _,
            out("$t5") _,
            out("$t6") _,
            out("$t7") _,

            options(preserves_flags),
        );
    }
}