summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/aarch64/gen_icf_map.S
blob: fe04ee4c318e9463963414d16d08ef3579a7aa37 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/**********************************************************************
  Copyright(c) 2019 Arm Corporation All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
    * Neither the name of Arm Corporation nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
	.arch armv8-a+crc+crypto
	.text
	.align	2

#include "lz0a_const_aarch64.h"
#include "data_struct_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"

/*
declare Macros
*/

.macro	declare_generic_reg name:req,reg:req,default:req
	\name		.req	\default\reg
	w_\name		.req	w\reg
	x_\name		.req	x\reg
.endm

.macro tzbytecnt param0:req,param1:req
	rbit	x_\param1, x_\param0
	cmp	x_\param0, 0
	clz	x_\param1, x_\param1
	mov	w_\param0, 8
	lsr	w_\param1, w_\param1, 3
	csel	w_\param0, w_\param1, w_\param0, ne
.endm

.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req
	orr	w_\param1, w_\param1, w_\param3, lsl 19
	orr	w_\param1, w_\param1, w_\param2, lsl 10
	str	w_\param1, [x_\param0]
.endm

	.align	2
	.global	gen_icf_map_h1_aarch64
	.type	gen_icf_map_h1_aarch64, %function

	/* arguments */
	declare_generic_reg	stream_param,			0,x
	declare_generic_reg	matches_icf_lookup_param,	1,x
	declare_generic_reg	input_size_param,		2,x

	declare_generic_reg	param0,				0,x
	declare_generic_reg	param1,				1,x
	declare_generic_reg	param2,				2,x
	declare_generic_reg	param3,				3,x

	/* return */
	declare_generic_reg	ret_val,			0,x

	/* variables */
	declare_generic_reg	input_size,			3,x
	declare_generic_reg	next_in,			4,x
	declare_generic_reg	matches_icf_lookup,		6,x
	declare_generic_reg	hash_table,			7,x
	declare_generic_reg	end_in,				8,x
	declare_generic_reg	file_start,			9,x
	declare_generic_reg	hash_mask,			10,w
	declare_generic_reg	hist_size,			11,w
	declare_generic_reg	stream_saved,			12,x
	declare_generic_reg	literal_32,			13,w
	declare_generic_reg	literal_1,			14,w
	declare_generic_reg	dist,				15,w

	declare_generic_reg	tmp_has_hist,			0,w
	declare_generic_reg	tmp_offset_hash_table,		1,x
	declare_generic_reg	tmp0,				0,x
	declare_generic_reg	tmp1,				1,x
	declare_generic_reg	tmp2,				2,x
	declare_generic_reg	tmp3,				3,x
	declare_generic_reg	tmp5,				5,x

/* constant */
.equ	ISAL_LOOK_AHEAD, 288
.equ	SHORTEST_MATCH, 4
.equ	LEN_OFFSET, 254

/* mask */
.equ	mask_10bit, 1023
.equ	mask_lit_dist, 0x7800

/* offset of struct isal_zstream */
.equ	offset_next_in, 0
.equ	offset_avail_in, 8
.equ	offset_total_in, 12
.equ	offset_next_out, 16
.equ	offset_avail_out, 24
.equ	offset_total_out, 28
.equ	offset_hufftables, 32
.equ	offset_level, 40
.equ	offset_level_buf_size, 44
.equ	offset_level_buf, 48
.equ	offset_end_of_stream, 56
.equ	offset_flush, 58
.equ	offset_gzip_flag, 60
.equ	offset_hist_bits, 62
.equ	offset_state, 64
.equ	offset_state_block_end, 72
.equ	offset_state_dist_mask, 76
.equ	offset_state_has_hist, 135

/* offset of struct level_buf */
.equ	offset_hash_map_hash_table, 4712

/*
uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
			     struct deflate_icf *matches_icf_lookup, uint64_t input_size)
*/

gen_icf_map_h1_aarch64:
	cmp	input_size_param, (ISAL_LOOK_AHEAD-1) // 287
	bls	.fast_exit
	stp	x29, x30, [sp, -16]!

	mov	stream_saved, stream_param
	mov	matches_icf_lookup, matches_icf_lookup_param
	mov	x29, sp

	ldrb	tmp_has_hist, [stream_saved, offset_state_has_hist]
	mov	tmp_offset_hash_table, offset_hash_map_hash_table
	ldr	end_in, [stream_saved, offset_next_in]
	mov	input_size, input_size_param
	ldr	hash_table, [stream_saved, offset_level_buf]
	ldr	w_file_start, [stream_saved, offset_total_in]
	ldp	hist_size, hash_mask, [stream_saved, offset_state_dist_mask]
	add	hash_table, hash_table, tmp_offset_hash_table
	sub	file_start, end_in, file_start
	cbz	tmp_has_hist, .igzip_no_hist
	b	.while_check1

	.align 3
.igzip_no_hist:
	ldrb	w_tmp1, [end_in]
	add	next_in, end_in, 1
	ldrh	w_tmp0, [matches_icf_lookup]
	bfi	w_tmp0, w_tmp1, 0, 10
	strh	w_tmp0, [matches_icf_lookup]
	ldr	w_tmp0, [matches_icf_lookup]
	and	w_tmp0, w_tmp0, mask_10bit
	orr	w_tmp0, w_tmp0, mask_lit_dist
	str	w_tmp0, [matches_icf_lookup], 4
	ldr	w_tmp0, [end_in]
	crc32cw	w_tmp0, wzr, w_tmp0

	and	w_tmp5, w_tmp0, hash_mask
	sub	x_tmp1, end_in, file_start
	mov	w_tmp2, 1
	mov	x_tmp0, 1
	strh	w_tmp1, [hash_table, x_tmp5, lsl 1]
	strb	w_tmp2, [stream_saved, offset_state_has_hist]
	b	.while_check2

.while_check1:
	mov	next_in, end_in
	mov	x_tmp0, 0

.while_check2:
	sub	input_size, input_size, #288
	add	end_in, end_in, input_size
	cmp	next_in, end_in
	bcs	.exit
	mov	literal_32, 32
	mov	literal_1, 1
	b	.while_loop

	.align 3
.new_match_found:
	clz	w_tmp5, w_tmp2
	add	w_tmp1, w_tmp0, LEN_OFFSET
	sub	w_tmp5, literal_32, w_tmp5
	cmp	dist, 2
	sub	w_tmp5, w_tmp5, #2
	bls	.skip_compute_dist_icf_code

	lsl	w_tmp3, literal_1, w_tmp5
	sub	w_tmp3, w_tmp3, #1
	lsr	w_tmp0, w_tmp2, w_tmp5
	and	w_tmp3, w_tmp3, w_tmp2
	add	w_tmp2, w_tmp0, w_tmp5, lsl 1

.skip_compute_dist_icf_code:
	mov	param0, matches_icf_lookup
	write_deflate_icf param0,param1,param2,param3

	add	next_in, next_in, 1
	add	matches_icf_lookup, matches_icf_lookup, 4
	cmp	next_in, end_in
	beq	.save_with_exit

.while_loop:
	ldr	w_tmp0, [next_in]
	crc32cw	w_tmp0, wzr, w_tmp0

	and	w_tmp0, w_tmp0, hash_mask
	sub	x_tmp1, next_in, file_start
	lsl	x_tmp0, x_tmp0, 1
	sub	w_tmp2, w_tmp1, #1
	ldrh	w_tmp3, [hash_table, x_tmp0]
	strh	w_tmp1, [hash_table, x_tmp0]
	sub	w_tmp2, w_tmp2, w_tmp3
	and	w_tmp2, w_tmp2, hist_size
	add	dist, w_tmp2, 1
	ldr	x_tmp0, [next_in]
	sub	x_tmp1, next_in, w_dist, uxtw
	ldr	x_tmp1, [x_tmp1]
	eor	x_tmp0, x_tmp1, x_tmp0
	tzbytecnt	param0,param1

	cmp	w_tmp0, (SHORTEST_MATCH-1)
	mov	w_tmp3, 0
	bhi	.new_match_found

	ldrb	w_param1, [next_in]
	mov	x_param0, matches_icf_lookup
	mov	w_param3, 0
	mov	w_param2, 0x1e
	write_deflate_icf param0,param1,param2,param3

	add	next_in, next_in, 1
	add	matches_icf_lookup, matches_icf_lookup, 4
	cmp	next_in, end_in
	bne	.while_loop

.save_with_exit:
	ldr	ret_val, [stream_saved, offset_next_in]
	sub	ret_val, next_in, ret_val

.exit:
	ldp	x29, x30, [sp], 16
	ret

	.align 3
.fast_exit:
	mov	ret_val, 0
	ret
	.size	gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64