summaryrefslogtreecommitdiffstats
path: root/src/isa-l/crc/aarch64/crc32_aarch64_common.h
blob: a2ef22aea37db36fad5c0c111e29b5d8a99f740a (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
/**********************************************************************
  Copyright(c) 2020 Arm Corporation All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
    * Neither the name of Arm Corporation nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/




.macro	crc32_hw_common		poly_type

.ifc	\poly_type,crc32
	mvn		wCRC,wCRC
.endif
	cbz		LEN, .zero_length_ret
	tbz		BUF, 0, .align_short
	ldrb		wdata,[BUF],1
	sub		LEN,LEN,1
	crc32_u8	wCRC,wCRC,wdata
.align_short:
	tst		BUF,2
	ccmp		LEN,1,0,ne
	bhi		.align_short_2
	tst		BUF,4
	ccmp		LEN,3,0,ne
	bhi		.align_word

.align_finish:

	cmp		LEN, 63
	bls		.loop_16B
.loop_64B:
	ldp		data0, data1, [BUF],#16
	prfm		pldl2keep,[BUF,2048]
	sub		LEN,LEN,#64
	ldp		data2, data3, [BUF],#16
	prfm		pldl1keep,[BUF,256]
	cmp		LEN,#64
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	ldp		data0, data1, [BUF],#16
	crc32_u64	wCRC, wCRC, data2
	crc32_u64	wCRC, wCRC, data3
	ldp		data2, data3, [BUF],#16
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	crc32_u64	wCRC, wCRC, data2
	crc32_u64	wCRC, wCRC, data3
	bge		.loop_64B

.loop_16B:
	cmp		LEN, 15
	bls		.less_16B
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#16
	cmp		LEN,15
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	bls		.less_16B
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#16
	cmp		LEN,15
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	bls		.less_16B
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#16   //MUST less than 16B
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
.less_16B:
	cmp		LEN, 7
	bls		.less_8B
	ldr		data0, [BUF], 8
	sub		LEN, LEN, #8
	crc32_u64	wCRC, wCRC, data0
.less_8B:
	cmp		LEN, 3
	bls		.less_4B
	ldr		wdata, [BUF], 4
	sub		LEN, LEN, #4
	crc32_u32	wCRC, wCRC, wdata
.less_4B:
	cmp		LEN, 1
	bls		.less_2B
	ldrh		wdata, [BUF], 2
	sub		LEN, LEN, #2
	crc32_u16	wCRC, wCRC, wdata
.less_2B:
	cbz		LEN, .zero_length_ret
	ldrb		wdata, [BUF]
	crc32_u8	wCRC, wCRC, wdata
.zero_length_ret:
.ifc	\poly_type,crc32
	mvn		w0, wCRC
.else
	mov		w0, wCRC
.endif
	ret
.align_short_2:
	ldrh		wdata, [BUF], 2
	sub		LEN, LEN, 2
	tst		BUF, 4
	crc32_u16	wCRC, wCRC, wdata
	ccmp		LEN, 3, 0, ne
	bls		.align_finish
.align_word:
	ldr		wdata, [BUF], 4
	sub		LEN, LEN, #4
	crc32_u32	wCRC, wCRC, wdata
	b .align_finish
.endm

.macro	crc32_3crc_fold poly_type
.ifc	\poly_type,crc32
	mvn		wCRC,wCRC
.endif
	cbz		LEN, .zero_length_ret
	tbz		BUF, 0, .align_short
	ldrb		wdata,[BUF],1
	sub		LEN,LEN,1
	crc32_u8	wCRC,wCRC,wdata
.align_short:
	tst		BUF,2
	ccmp		LEN,1,0,ne
	bhi		.align_short_2
	tst		BUF,4
	ccmp		LEN,3,0,ne
	bhi		.align_word

.align_finish:
	cmp	LEN,1023
	adr	const_adr, .Lconstants
	bls	1f
	ldp	dconst0,dconst1,[const_adr]
2:
	ldr		crc0_data0,[ptr_crc0],8
	prfm		pldl2keep,[ptr_crc0,3*1024-8]
	mov		crc1,0
	mov		crc2,0
	add		ptr_crc1,ptr_crc0,336
	add		ptr_crc2,ptr_crc0,336*2
	crc32_u64	crc0,crc0,crc0_data0
	.set		offset,0
	.set		ptr_offset,8
	.rept		5
	ldp		crc0_data0,crc0_data1,[ptr_crc0],16
	ldp		crc1_data0,crc1_data1,[ptr_crc1],16
	.set		offset,offset+64
	.set		ptr_offset,ptr_offset+16
	prfm		pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
	crc32_u64	crc0,crc0,crc0_data0
	crc32_u64	crc0,crc0,crc0_data1
	ldp		crc2_data0,crc2_data1,[ptr_crc2],16
	crc32_u64	crc1,crc1,crc1_data0
	crc32_u64	crc1,crc1,crc1_data1
	crc32_u64	crc2,crc2,crc2_data0
	crc32_u64	crc2,crc2,crc2_data1
	.endr
	.set		l1_offset,0
	.rept		10
	ldp		crc0_data0,crc0_data1,[ptr_crc0],16
	ldp		crc1_data0,crc1_data1,[ptr_crc1],16
	.set		offset,offset+64
	.set		ptr_offset,ptr_offset+16
	prfm		pldl2keep,[ptr_crc0,3*1024-ptr_offset+offset]
	prfm		pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
	.set		l1_offset,l1_offset+64
	crc32_u64	crc0,crc0,crc0_data0
	crc32_u64	crc0,crc0,crc0_data1
	ldp		crc2_data0,crc2_data1,[ptr_crc2],16
	crc32_u64	crc1,crc1,crc1_data0
	crc32_u64	crc1,crc1,crc1_data1
	crc32_u64	crc2,crc2,crc2_data0
	crc32_u64	crc2,crc2,crc2_data1
	.endr

	.rept		6
	ldp		crc0_data0,crc0_data1,[ptr_crc0],16
	ldp		crc1_data0,crc1_data1,[ptr_crc1],16
	.set		ptr_offset,ptr_offset+16
	prfm		pldl1keep,[ptr_crc0,2*1024-ptr_offset+l1_offset]
	.set		l1_offset,l1_offset+64
	crc32_u64	crc0,crc0,crc0_data0
	crc32_u64	crc0,crc0,crc0_data1
	ldp		crc2_data0,crc2_data1,[ptr_crc2],16
	crc32_u64	crc1,crc1,crc1_data0
	crc32_u64	crc1,crc1,crc1_data1
	crc32_u64	crc2,crc2,crc2_data0
	crc32_u64	crc2,crc2,crc2_data1
	.endr
	ldr		crc2_data0,[ptr_crc2]
	fmov		dtmp0,xcrc0
	fmov		dtmp1,xcrc1
	crc32_u64	crc2,crc2,crc2_data0
	add		ptr_crc0,ptr_crc0,1024-(336+8)
	pmull		vtmp0.1q,vtmp0.1d,vconst0.1d
	sub		LEN,LEN,1024
	pmull		vtmp1.1q,vtmp1.1d,vconst1.1d
	cmp		LEN,1024
	fmov		xcrc0,dtmp0
	fmov		xcrc1,dtmp1
	crc32_u64	crc0,wzr,xcrc0
	crc32_u64	crc1,wzr,xcrc1

	eor		crc0,crc0,crc2
	eor		crc0,crc0,crc1

	bhs	2b
1:
	cmp		LEN, 63
	bls		.loop_16B
.loop_64B:
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#64
	ldp		data2, data3, [BUF],#16
	cmp		LEN,#64
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	ldp		data0, data1, [BUF],#16
	crc32_u64	wCRC, wCRC, data2
	crc32_u64	wCRC, wCRC, data3
	ldp		data2, data3, [BUF],#16
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	crc32_u64	wCRC, wCRC, data2
	crc32_u64	wCRC, wCRC, data3
	bge		.loop_64B

.loop_16B:
	cmp		LEN, 15
	bls		.less_16B
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#16
	cmp		LEN,15
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	bls		.less_16B
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#16
	cmp		LEN,15
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
	bls		.less_16B
	ldp		data0, data1, [BUF],#16
	sub		LEN,LEN,#16   //MUST less than 16B
	crc32_u64	wCRC, wCRC, data0
	crc32_u64	wCRC, wCRC, data1
.less_16B:
	cmp		LEN, 7
	bls		.less_8B
	ldr		data0, [BUF], 8
	sub		LEN, LEN, #8
	crc32_u64	wCRC, wCRC, data0
.less_8B:
	cmp		LEN, 3
	bls		.less_4B
	ldr		wdata, [BUF], 4
	sub		LEN, LEN, #4
	crc32_u32	wCRC, wCRC, wdata
.less_4B:
	cmp		LEN, 1
	bls		.less_2B
	ldrh		wdata, [BUF], 2
	sub		LEN, LEN, #2
	crc32_u16	wCRC, wCRC, wdata
.less_2B:
	cbz		LEN, .zero_length_ret
	ldrb		wdata, [BUF]
	crc32_u8	wCRC, wCRC, wdata
.zero_length_ret:
.ifc	\poly_type,crc32
	mvn		w0, wCRC
.else
	mov		w0, wCRC
.endif
	ret
.align_short_2:
	ldrh		wdata, [BUF], 2
	sub		LEN, LEN, 2
	tst		BUF, 4
	crc32_u16	wCRC, wCRC, wdata
	ccmp		LEN, 3, 0, ne
	bls		.align_finish
.align_word:
	ldr		wdata, [BUF], 4
	sub		LEN, LEN, #4
	crc32_u32	wCRC, wCRC, wdata
	b .align_finish
.Lconstants:
.ifc	\poly_type,crc32
	.quad		0xb486819b
	.quad		0x76278617
.else
	.quad		0xe417f38a
	.quad		0x8f158014
.endif

.endm