summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/aarch64/isal_update_histogram.S
blob: abcec0f142bfc32756bba1a0cffd35f654cd54ee (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
/**********************************************************************
  Copyright(c) 2019 Arm Corporation All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
    * Neither the name of Arm Corporation nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
	.arch armv8-a+crc
	.text
	.align	2

#include "lz0a_const_aarch64.h"
#include "data_struct_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"

/*
declare Macros
*/

.macro	declare_generic_reg name:req,reg:req,default:req
	\name		.req	\default\reg
	w_\name		.req	w\reg
	x_\name		.req	x\reg
.endm

.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req
	mov     w_\tmp0, w_\dist
	mov     w_\dist, -1
	cmp     w_\tmp0, 32768
	bhi     .dist2code_done
	sub     w_\dist, w_\tmp0, #1
	cmp     w_\tmp0, 4
	bls     .dist2code_done
	clz     w_\tmp1, w_\dist
	mov     w_\tmp0, 30
	sub     w_\tmp0, w_\tmp0, w_\tmp1
	lsr     w_\dist, w_\dist, w_\tmp0
	add     w_\dist, w_\dist, w_\tmp0, lsl 1
.dist2code_done:
.endm

.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
        adrp    x_\tmp0, .len_to_code_tab_lanchor
        add     x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
        ldr     w_\length_out, [x_\tmp0, w_\length, uxtw 2]
        add     w_\length_out, w_\length_out, 256
.endm

        .section        .rodata
        .align  4
.len_to_code_tab_lanchor = . + 0
        .type   len_to_code_tab, %object
        .size   len_to_code_tab, 1056
len_to_code_tab:
        .word 0x00, 0x00, 0x00
        .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
        .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
        .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
        .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
        .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
        .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
        .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
        .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
        .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
        .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
        .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
        .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
        .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
        .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
        .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
        .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
        .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
        .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
        .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
        .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
        .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
        .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
        .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
        .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
        .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
        .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
        .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
        .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
        .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
        .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
        .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
        .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
        .word 0x00, 0x00, 0x00, 0x00, 0x00

	.text
	.global	isal_update_histogram_aarch64
	.arch armv8-a+crc
	.type	isal_update_histogram_aarch64, %function

/*
void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
				struct isal_huff_histogram *histogram);
*/

	/* arguments */
	declare_generic_reg	start_stream,		0,x
	declare_generic_reg	length,			1,x
	declare_generic_reg	histogram,		2,x

	declare_generic_reg	param0,			0,x
	declare_generic_reg	param1,			1,x
	declare_generic_reg	param2,			2,x

	/* local variable */
	declare_generic_reg	start_stream_saved,	10,x
	declare_generic_reg	histogram_saved,	23,x
	declare_generic_reg	current,		19,x
	declare_generic_reg	last_seen,		20,x
	declare_generic_reg	end_stream,		21,x
	declare_generic_reg	loop_end_iter,		22,x
	declare_generic_reg	dist_histogram,		12,x
	declare_generic_reg	lit_len_histogram,	23,x
	declare_generic_reg	literal,		8,x
	declare_generic_reg	next_hash,		9,x
	declare_generic_reg	end,			4,x
	declare_generic_reg	dist,			7,x
	declare_generic_reg	D,			11,w
	declare_generic_reg	match_length,		3,w

	declare_generic_reg	tmp0,			5,w
	declare_generic_reg	tmp1,			6,w

/* constant */
.equ	LIT_LEN, 286
.equ	DIST_LEN, 30

.equ	lit_len_offset, 0
.equ	dist_offset, (8*LIT_LEN)                   // 2288
.equ	hash_offset, (dist_offset + 8*DIST_LEN)    // 2528
.equ	hash_table_size, (8*1024*2)                // 16384

isal_update_histogram_aarch64:
	cmp	w_length, 0
	ble	.done

	stp	x29, x30, [sp, -64]!
	add	x29, sp, 0
	stp	x19, x20, [sp, 16]
	stp	x21, x22, [sp, 32]
	str	x23, [sp, 48]

	add	last_seen, histogram, hash_offset
	add	end_stream, start_stream, w_length, sxtw
	mov	current, start_stream
	sub	loop_end_iter, end_stream, #3
	mov	histogram_saved, histogram

	mov	x0, last_seen
	mov	w1, 0
	mov	x2, hash_table_size
	bl	memset

	cmp	current, loop_end_iter
	bcs	.loop_end

	mov	start_stream_saved, current
	add	dist_histogram, histogram_saved, dist_offset
	mov	D, 32766
	b	.loop

	.align 2
.loop_2nd_stream:
	and	literal, literal, 0xff
	mov	current, next_hash
	cmp	loop_end_iter, current

	ldr	x0, [lit_len_histogram, literal, lsl 3]
	add	x0, x0, 1
	str	x0, [lit_len_histogram, literal, lsl 3]
	bls	.loop_end

.loop:
	ldr	w_literal, [current]
	add	next_hash, current, 1

	mov	w0, w_literal
	crc32cw	w0, wzr, w0

	ubfiz	x0, x0, 1, 13
	sub	x2, current, start_stream_saved
	ldrh	w_dist, [last_seen, x0]
	strh	w2, [last_seen, x0]
	sub	w2, w2, w_dist
	and	w_dist, w2, 65535

	sub	w0, w_dist, #1
	cmp	w0, D
	bhi	.loop_2nd_stream

	sub	w2, w_end_stream, w_current
	mov	x1, current
	sub	x0, current, w_dist, uxth
	compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1

	cmp	match_length, 3
	bls	.loop_2nd_stream

	add	end, current, 3
	cmp	end, loop_end_iter
	csel	end, end, loop_end_iter, ls
	cmp	end, next_hash
	bls	.skip_inner_loop

	.align 3
.inner_loop:
	ldr	w0, [next_hash]
	crc32cw	w0, wzr, w0

	ubfiz	x0, x0, 1, 13
	sub	x1, next_hash, start_stream_saved
	add	next_hash, next_hash, 1
	cmp	next_hash, end
	strh	w1, [last_seen, x0]
	bne	.inner_loop

.skip_inner_loop:
	convert_dist_to_dist_sym dist, tmp0, tmp1
	uxtw	x2, w_dist
	ldr	x1, [dist_histogram, x2, lsl 3]
	add	x1, x1, 1
	str	x1, [dist_histogram, x2, lsl 3]

	convert_length_to_len_sym match_length,tmp1,tmp0
	uxtw	x0, w_tmp1
	ldr	x1, [lit_len_histogram, x0, lsl 3]
	add	x1, x1, 1
	str	x1, [lit_len_histogram, x0, lsl 3]

	sub	match_length, match_length, #1
	add	x3, x3, 1
	add	current, current, x3
	cmp	loop_end_iter, current
	bhi	.loop

	.align 3
// fold the last for loop
.loop_end:
	cmp	end_stream, current
	bls	.loop_fold_end

	mov	x0, current
	ldrb	w1, [x0], 1
	cmp	end_stream, x0
	ldr	x0, [lit_len_histogram, x1, lsl 3]
	add	x0, x0, 1
	str	x0, [lit_len_histogram, x1, lsl 3]
	bls	.loop_fold_end

	ldrb	w1, [current, 1]
	add	x0, current, 2
	cmp	end_stream, x0
	ldr	x0, [lit_len_histogram, x1, lsl 3]
	add	x0, x0, 1
	str	x0, [lit_len_histogram, x1, lsl 3]
	bls	.loop_fold_end

	ldrb	w1, [current, 2]
	add	x0, current, 3
	cmp	end_stream, x0
	ldr	x0, [lit_len_histogram, x1, lsl 3]
	add	x0, x0, 1
	str	x0, [lit_len_histogram, x1, lsl 3]
	bls	.loop_fold_end

	ldrb	w1, [current, 3]
	ldr	x0, [lit_len_histogram, x1, lsl 3]
	add	x0, x0, 1
	str	x0, [lit_len_histogram, x1, lsl 3]

.loop_fold_end:
	ldr	x0, [lit_len_histogram, (256*8)]
	add	x0, x0, 1
	str	x0, [lit_len_histogram, (256*8)]

	ldr	x23, [sp, 48]
	ldp	x19, x20, [sp, 16]
	ldp	x21, x22, [sp, 32]
	ldp	x29, x30, [sp], 64
	ret
	.align 2
.done:
	ret
	.size	isal_update_histogram_aarch64, .-isal_update_histogram_aarch64