summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/sha256_mb/aarch64/sha256_mb_x3_ce.S
blob: 6ed1591ba7deb22ec67f1eca84ccb41f6e3c9d19 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
/**********************************************************************
  Copyright(c) 2019 Arm Corporation All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
    * Neither the name of Arm Corporation nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
	.arch armv8-a+crypto
	.text
	.align	2
	.p2align 3,,7

/*
Macros
*/

.macro	declare_var_vector_reg name:req,reg:req
	\name\()_q	.req	q\reg
	\name\()_v	.req	v\reg
	\name\()_s	.req	s\reg
.endm
/**
maros for round 48-63
*/
.macro sha256_4_rounds_high msg:req,tmp0:req,tmp1:req
	ldr		key_q , [tmp]
	mov		l0_tmp2_v.16b,l0_abcd_v.16b
	mov		l1_tmp2_v.16b,l1_abcd_v.16b
	mov		l2_tmp2_v.16b,l2_abcd_v.16b
	add		tmp,tmp,16
	add		l0_\tmp1\()_v.4s,l0_\msg\()_v.4s,key_v.4s
	add		l1_\tmp1\()_v.4s,l1_\msg\()_v.4s,key_v.4s
	add		l2_\tmp1\()_v.4s,l2_\msg\()_v.4s,key_v.4s
	sha256h		l0_abcd_q,l0_efgh_q,l0_\tmp0\()_v.4s
	sha256h		l1_abcd_q,l1_efgh_q,l1_\tmp0\()_v.4s
	sha256h		l2_abcd_q,l2_efgh_q,l2_\tmp0\()_v.4s
	sha256h2	l0_efgh_q,l0_tmp2_q,l0_\tmp0\()_v.4s
	sha256h2	l1_efgh_q,l1_tmp2_q,l1_\tmp0\()_v.4s
	sha256h2	l2_efgh_q,l2_tmp2_q,l2_\tmp0\()_v.4s

.endm
/**
maros for round 0-47
*/
.macro sha256_4_rounds_low msg0:req,msg1:req,msg2:req,msg3:req,tmp0:req,tmp1:req
	sha256su0		l0_\msg0\()_v.4s,l0_\msg1\()_v.4s
	sha256su0		l1_\msg0\()_v.4s,l1_\msg1\()_v.4s
	sha256su0		l2_\msg0\()_v.4s,l2_\msg1\()_v.4s
	sha256_4_rounds_high	\msg1,\tmp0,\tmp1
	sha256su1		l0_\msg0\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s
	sha256su1		l1_\msg0\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s
	sha256su1		l2_\msg0\()_v.4s,l2_\msg2\()_v.4s,l2_\msg3\()_v.4s
.endm


/*
Variable list
*/

	declare_var_vector_reg	key,31


/*
digest variables
*/
	declare_var_vector_reg	l0_abcd,0
	declare_var_vector_reg	l0_efgh,1
	declare_var_vector_reg	l1_abcd,2
	declare_var_vector_reg	l1_efgh,3
	declare_var_vector_reg	l2_abcd,4
	declare_var_vector_reg	l2_efgh,5
	declare_var_vector_reg	l1_abcd_saved,16
	declare_var_vector_reg	l1_efgh_saved,17
	declare_var_vector_reg	l0_abcd_saved,20
	declare_var_vector_reg	l0_efgh_saved,21
	declare_var_vector_reg	l2_abcd_saved,24
	declare_var_vector_reg	l2_efgh_saved,25
/*
Temporay variables
*/
	declare_var_vector_reg	l0_tmp0,6
	declare_var_vector_reg	l0_tmp1,7
	declare_var_vector_reg	l0_tmp2,8
	declare_var_vector_reg	l1_tmp0,9
	declare_var_vector_reg	l1_tmp1,10
	declare_var_vector_reg	l1_tmp2,11
	declare_var_vector_reg	l2_tmp0,12
	declare_var_vector_reg	l2_tmp1,13
	declare_var_vector_reg	l2_tmp2,14
/*
Message variables
*/
	declare_var_vector_reg	l0_msg0,16
	declare_var_vector_reg	l0_msg1,17
	declare_var_vector_reg	l0_msg2,18
	declare_var_vector_reg	l0_msg3,19
	declare_var_vector_reg	l1_msg0,20
	declare_var_vector_reg	l1_msg1,21
	declare_var_vector_reg	l1_msg2,22
	declare_var_vector_reg	l1_msg3,23
	declare_var_vector_reg	l2_msg0,24
	declare_var_vector_reg	l2_msg1,25
	declare_var_vector_reg	l2_msg2,26
	declare_var_vector_reg	l2_msg3,27



/*
	void sha256_mb_ce_x3(SHA256_JOB *, SHA256_JOB *, SHA256_JOB *, int);
*/
/*
Arguements list
*/
	l0_job 	.req	x0
	l1_job 	.req	x1
	l2_job 	.req	x2
	len	.req	w3
	l0_data	.req	x4
	l1_data	.req	x5
	l2_data	.req	x6
	tmp	.req	x7
	.global	sha256_mb_ce_x3
	.type	sha256_mb_ce_x3, %function
sha256_mb_ce_x3:
	//push d8~d15
	stp 	d8,d9,[sp,-192]!
	stp 	d10,d11,[sp,16]
	stp 	d12,d13,[sp,32]
	stp 	d14,d15,[sp,48]
	ldr	l0_data, [l0_job]
	ldr	l0_abcd_q, [l0_job, 64]
	ldr	l0_efgh_q, [l0_job, 80]
	ldr	l1_data,   [l1_job]
	ldr	l1_abcd_q, [l1_job, 64]
	ldr	l1_efgh_q, [l1_job, 80]
	ldr	l2_data,   [l2_job]
	ldr	l2_abcd_q, [l2_job, 64]
	ldr	l2_efgh_q, [l2_job, 80]



start_loop:

	//load key addr
	adr	tmp, KEY
	//load msgs
	ld1	{l0_msg0_v.4s-l0_msg3_v.4s},[l0_data]
	ld1	{l1_msg0_v.4s-l1_msg3_v.4s},[l1_data]
	ld1	{l2_msg0_v.4s-l2_msg3_v.4s},[l2_data]
	ldr	key_q,[tmp]
	add	tmp,tmp,16
	//adjust loop parameter
	add	l0_data,l0_data,64
	add	l1_data,l1_data,64
	add	l2_data,l2_data,64
	sub	len, len, #1
	cmp	len, 0
/*
	//backup digest
	mov	l0_abcd_saved_v.16b,l0_abcd_v.16b
	mov	l0_efgh_saved_v.16b,l0_efgh_v.16b
	mov	l1_abcd_saved_v.16b,l1_abcd_v.16b
	mov	l1_efgh_saved_v.16b,l1_efgh_v.16b
	mov	l2_abcd_saved_v.16b,l2_abcd_v.16b
	mov	l2_efgh_saved_v.16b,l2_efgh_v.16b
*/

	rev32	l0_msg0_v.16b,l0_msg0_v.16b
	rev32	l0_msg1_v.16b,l0_msg1_v.16b
	add	l0_tmp0_v.4s, l0_msg0_v.4s,key_v.4s
	rev32	l0_msg2_v.16b,l0_msg2_v.16b
	rev32	l0_msg3_v.16b,l0_msg3_v.16b

	rev32	l1_msg0_v.16b,l1_msg0_v.16b
	rev32	l1_msg1_v.16b,l1_msg1_v.16b
	add	l1_tmp0_v.4s, l1_msg0_v.4s,key_v.4s
	rev32	l1_msg2_v.16b,l1_msg2_v.16b
	rev32	l1_msg3_v.16b,l1_msg3_v.16b

	rev32	l2_msg0_v.16b,l2_msg0_v.16b
	rev32	l2_msg1_v.16b,l2_msg1_v.16b
	add	l2_tmp0_v.4s, l2_msg0_v.4s,key_v.4s
	rev32	l2_msg2_v.16b,l2_msg2_v.16b
	rev32	l2_msg3_v.16b,l2_msg3_v.16b



	sha256_4_rounds_low	msg0,msg1,msg2,msg3,tmp0,tmp1    /* rounds 0-3 */
	sha256_4_rounds_low	msg1,msg2,msg3,msg0,tmp1,tmp0
	sha256_4_rounds_low	msg2,msg3,msg0,msg1,tmp0,tmp1
	sha256_4_rounds_low	msg3,msg0,msg1,msg2,tmp1,tmp0

	sha256_4_rounds_low	msg0,msg1,msg2,msg3,tmp0,tmp1    /* rounds 16-19 */
	sha256_4_rounds_low	msg1,msg2,msg3,msg0,tmp1,tmp0
	sha256_4_rounds_low	msg2,msg3,msg0,msg1,tmp0,tmp1
	sha256_4_rounds_low	msg3,msg0,msg1,msg2,tmp1,tmp0
	sha256_4_rounds_low	msg0,msg1,msg2,msg3,tmp0,tmp1    /* rounds 32-35 */
	sha256_4_rounds_low	msg1,msg2,msg3,msg0,tmp1,tmp0
	sha256_4_rounds_low	msg2,msg3,msg0,msg1,tmp0,tmp1
	sha256_4_rounds_low	msg3,msg0,msg1,msg2,tmp1,tmp0



	sha256_4_rounds_high	msg1,tmp0,tmp1			/* rounds 48-51 */

	/* msg0 msg1 is free , share with digest regs */
	ldr	l0_abcd_saved_q, [l0_job, 64]
	ldr	l1_abcd_saved_q, [l1_job, 64]
	ldr	l2_abcd_saved_q, [l2_job, 64]
	ldr	l0_efgh_saved_q, [l0_job, 80]
	ldr	l1_efgh_saved_q, [l1_job, 80]
	ldr	l2_efgh_saved_q, [l2_job, 80]

	sha256_4_rounds_high	msg2,tmp1,tmp0
	sha256_4_rounds_high	msg3,tmp0,tmp1

	/* rounds 60-63 */
	mov		l0_tmp2_v.16b,l0_abcd_v.16b
	sha256h		l0_abcd_q,l0_efgh_q,l0_tmp1_v.4s
	sha256h2	l0_efgh_q,l0_tmp2_q,l0_tmp1_v.4s

	mov		l1_tmp2_v.16b,l1_abcd_v.16b
	sha256h		l1_abcd_q,l1_efgh_q,l1_tmp1_v.4s
	sha256h2	l1_efgh_q,l1_tmp2_q,l1_tmp1_v.4s

	mov		l2_tmp2_v.16b,l2_abcd_v.16b
	sha256h		l2_abcd_q,l2_efgh_q,l2_tmp1_v.4s
	sha256h2	l2_efgh_q,l2_tmp2_q,l2_tmp1_v.4s

	/* combine state */
	add     l0_abcd_v.4s,l0_abcd_v.4s,l0_abcd_saved_v.4s
	add     l0_efgh_v.4s,l0_efgh_v.4s,l0_efgh_saved_v.4s
	add     l1_abcd_v.4s,l1_abcd_v.4s,l1_abcd_saved_v.4s
	add     l1_efgh_v.4s,l1_efgh_v.4s,l1_efgh_saved_v.4s
	add     l2_abcd_v.4s,l2_abcd_v.4s,l2_abcd_saved_v.4s
	add     l2_efgh_v.4s,l2_efgh_v.4s,l2_efgh_saved_v.4s

	str	l0_abcd_q,	[l0_job, 64]
	str	l0_efgh_q, 	[l0_job, 80]
	str	l1_abcd_q,	[l1_job, 64]
	str	l1_efgh_q, 	[l1_job, 80]
	str	l2_abcd_q,	[l2_job, 64]
	str	l2_efgh_q, 	[l2_job, 80]

	bgt	start_loop


	ldp 	d10,d11,[sp,16]
	ldp 	d12,d13,[sp,32]
	ldp 	d14,d15,[sp,48]
	ldp     d8, d9, [sp], 192
	ret

	.size	sha256_mb_ce_x3, .-sha256_mb_ce_x3
	.section	.rol0_data.cst16,"aM",@progbits,16
	.align	4
KEY:
	.word 0x428A2F98
	.word 0x71374491
	.word 0xB5C0FBCF
	.word 0xE9B5DBA5
	.word 0x3956C25B
	.word 0x59F111F1
	.word 0x923F82A4
	.word 0xAB1C5ED5
	.word 0xD807AA98
	.word 0x12835B01
	.word 0x243185BE
	.word 0x550C7DC3
	.word 0x72BE5D74
	.word 0x80DEB1FE
	.word 0x9BDC06A7
	.word 0xC19BF174
	.word 0xE49B69C1
	.word 0xEFBE4786
	.word 0x0FC19DC6
	.word 0x240CA1CC
	.word 0x2DE92C6F
	.word 0x4A7484AA
	.word 0x5CB0A9DC
	.word 0x76F988DA
	.word 0x983E5152
	.word 0xA831C66D
	.word 0xB00327C8
	.word 0xBF597FC7
	.word 0xC6E00BF3
	.word 0xD5A79147
	.word 0x06CA6351
	.word 0x14292967
	.word 0x27B70A85
	.word 0x2E1B2138
	.word 0x4D2C6DFC
	.word 0x53380D13
	.word 0x650A7354
	.word 0x766A0ABB
	.word 0x81C2C92E
	.word 0x92722C85
	.word 0xA2BFE8A1
	.word 0xA81A664B
	.word 0xC24B8B70
	.word 0xC76C51A3
	.word 0xD192E819
	.word 0xD6990624
	.word 0xF40E3585
	.word 0x106AA070
	.word 0x19A4C116
	.word 0x1E376C08
	.word 0x2748774C
	.word 0x34B0BCB5
	.word 0x391C0CB3
	.word 0x4ED8AA4A
	.word 0x5B9CCA4F
	.word 0x682E6FF3
	.word 0x748F82EE
	.word 0x78A5636F
	.word 0x84C87814
	.word 0x8CC70208
	.word 0x90BEFFFA
	.word 0xA4506CEB
	.word 0xBEF9A3F7
	.word 0xC67178F2