summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/igzip_icf_finish.asm
blob: 231ac0667997e4f92fa98491f06d29e1a57c62d6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;  Copyright(c) 2011-2016 Intel Corporation All rights reserved.
;
;  Redistribution and use in source and binary forms, with or without
;  modification, are permitted provided that the following conditions
;  are met:
;    * Redistributions of source code must retain the above copyright
;      notice, this list of conditions and the following disclaimer.
;    * Redistributions in binary form must reproduce the above copyright
;      notice, this list of conditions and the following disclaimer in
;      the documentation and/or other materials provided with the
;      distribution.
;    * Neither the name of Intel Corporation nor the names of its
;      contributors may be used to endorse or promote products derived
;      from this software without specific prior written permission.
;
;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

%include "options.asm"
%include "lz0a_const.asm"
%include "data_struct2.asm"
%include "bitbuf2.asm"
%include "huffman.asm"
%include "igzip_compare_types.asm"

%include "stdmac.asm"
%include "reg_sizes.asm"

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

%define curr_data	rax
%define tmp1		rax

%define f_index		rbx
%define code		rbx
%define tmp4		rbx
%define tmp5		rbx
%define tmp6		rbx

%define tmp2		rcx
%define hash		rcx

%define tmp3		rdx

%define stream		rsi

%define f_i		rdi

%define code_len2	rbp
%define hmask1		rbp

%define m_out_buf	r8

%define level_buf	r9

%define dist 		r10
%define hmask2		r10

%define code2		r12
%define f_end_i		r12

%define file_start	r13

%define len		r14

%define hufftables	r15

%define hash_table level_buf + _hash8k_hash_table
%define lit_len_hist level_buf + _hist_lit_len
%define dist_hist level_buf + _hist_dist

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
f_end_i_mem_offset	equ 0    ; local variable (8 bytes)
m_out_end		equ 8
m_out_start		equ 16
dist_mask_offset	equ 24
hash_mask_offset	equ 32
stack_size		equ 5*8

%xdefine METHOD hash_hist

[bits 64]
default rel
section .text

; void isal_deflate_icf_finish ( isal_zstream *stream )
; arg 1: rcx: addr of stream
global isal_deflate_icf_finish_ %+ METHOD %+ _01
isal_deflate_icf_finish_ %+ METHOD %+ _01:
	endbranch
	PUSH_ALL	rbx, rsi, rdi, rbp, r12, r13, r14, r15
	sub	rsp, stack_size

%ifidn __OUTPUT_FORMAT__, win64
	mov	stream, rcx
%else
	mov	stream, rdi
%endif

	; state->bitbuf.set_buf(stream->next_out, stream->avail_out);
	mov	tmp2 %+ d, dword [stream + _internal_state_dist_mask]
	mov	tmp3 %+ d, dword [stream + _internal_state_hash_mask]
	mov	level_buf, [stream + _level_buf]
	mov	m_out_buf, [level_buf + _icf_buf_next]
	mov	[rsp + m_out_start], m_out_buf
	mov	tmp1, [level_buf + _icf_buf_avail_out]
	add	tmp1, m_out_buf
	sub	tmp1, 4

	mov     [rsp + dist_mask_offset], tmp2
	mov	[rsp + hash_mask_offset], tmp3
	mov	[rsp + m_out_end], tmp1

	mov	hufftables, [stream + _hufftables]

	mov	file_start, [stream + _next_in]

	mov	f_i %+ d, dword [stream + _total_in]
	sub	file_start, f_i

	mov	f_end_i %+ d, dword [stream + _avail_in]
	add	f_end_i, f_i

	sub	f_end_i, LAST_BYTES_COUNT
	mov	[rsp + f_end_i_mem_offset], f_end_i
	; for (f_i = f_start_i; f_i < f_end_i; f_i++) {
	cmp	f_i, f_end_i
	jge	.end_loop_2

	mov	curr_data %+ d, [file_start + f_i]

	cmp	byte [stream + _internal_state_has_hist], IGZIP_NO_HIST
	jne	.skip_write_first_byte

	cmp	m_out_buf, [rsp + m_out_end]
	ja	.end_loop_2

	mov	hmask1 %+ d, [rsp + hash_mask_offset]
	compute_hash	hash, curr_data
	and	hash %+ d, hmask1 %+ d
	mov	[hash_table + 2 * hash], f_i %+ w
	mov	byte [stream + _internal_state_has_hist], IGZIP_HIST
	jmp	.encode_literal

.skip_write_first_byte:

.loop2:
	mov	tmp3 %+ d, [rsp + dist_mask_offset]
	mov	hmask1 %+ d, [rsp + hash_mask_offset]
	; if (state->bitbuf.is_full()) {
	cmp	m_out_buf, [rsp + m_out_end]
	ja	.end_loop_2

	; hash = compute_hash(state->file_start + f_i) & hash_mask;
	mov	curr_data %+ d, [file_start + f_i]
	compute_hash	hash, curr_data
	and	hash %+ d, hmask1 %+ d

	; f_index = state->head[hash];
	movzx	f_index %+ d, word [hash_table + 2 * hash]

	; state->head[hash] = (uint16_t) f_i;
	mov	[hash_table + 2 * hash], f_i %+ w

	; dist = f_i - f_index; // mod 64k
	mov	dist %+ d, f_i %+ d
	sub	dist %+ d, f_index %+ d
	and	dist %+ d, 0xFFFF

	; if ((dist-1) <= (D-1)) {
	mov	tmp1 %+ d, dist %+ d
	sub	tmp1 %+ d, 1
	cmp	tmp1 %+ d, tmp3 %+ d
	jae	.encode_literal

	; len = f_end_i - f_i;
	mov	tmp4, [rsp + f_end_i_mem_offset]
	sub	tmp4, f_i
	add	tmp4, LAST_BYTES_COUNT

	; if (len > 258) len = 258;
	cmp	tmp4, 258
	cmovg	tmp4, [c258]

	; len = compare(state->file_start + f_i,
	;               state->file_start + f_i - dist, len);
	lea	tmp1, [file_start + f_i]
	mov	tmp2, tmp1
	sub	tmp2, dist
	compare	tmp4, tmp1, tmp2, len, tmp3

	; if (len >= SHORTEST_MATCH) {
	cmp	len, SHORTEST_MATCH
	jb	.encode_literal

	;; encode as dist/len

	; get_dist_code(dist, &code2, &code_len2);
	dec	dist
	get_dist_icf_code	dist, code2, tmp3 ;; clobbers dist, rcx

	;; get_len_code
	lea	code, [len + 254]

	mov	hmask2 %+ d, [rsp + hash_mask_offset]

	or	code2, code
	inc	dword [lit_len_hist + HIST_ELEM_SIZE*code]

	; for (k = f_i+1, f_i += len-1; k <= f_i; k++) {
	lea	tmp3, [f_i + 1]	; tmp3 <= k
	add	f_i, len
	cmp	f_i, [rsp + f_end_i_mem_offset]
	jae	.skip_hash_update

	; only update hash twice

	; hash = compute_hash(state->file_start + k) & hash_mask;
	mov	tmp6 %+ d, dword [file_start + tmp3]
	compute_hash	hash, tmp6
	and	hash %+ d, hmask2 %+ d
	; state->head[hash] = k;
	mov	[hash_table + 2 * hash], tmp3 %+ w

	add	tmp3, 1

	; hash = compute_hash(state->file_start + k) & hash_mask;
	mov	tmp6 %+ d, dword [file_start + tmp3]
	compute_hash	hash, tmp6
	and	hash %+ d, hmask2 %+ d
	; state->head[hash] = k;
	mov	[hash_table + 2 * hash], tmp3 %+ w

.skip_hash_update:
	write_dword	code2, m_out_buf
	shr	code2, DIST_OFFSET
	and	code2, 0x1F
	inc	dword [dist_hist + HIST_ELEM_SIZE*code2]
	; continue
	cmp	f_i, [rsp + f_end_i_mem_offset]
	jl	.loop2
	jmp	.end_loop_2

.encode_literal:
	; get_lit_code(state->file_start[f_i], &code2, &code_len2);
	movzx	tmp5, byte [file_start + f_i]
	inc	dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
	or	tmp5, LIT
	write_dword	tmp5, m_out_buf
	; continue
	add	f_i, 1
	cmp	f_i, [rsp + f_end_i_mem_offset]
	jl	.loop2

.end_loop_2:
	mov	f_end_i, [rsp + f_end_i_mem_offset]
	add	f_end_i, LAST_BYTES_COUNT
	mov	[rsp + f_end_i_mem_offset], f_end_i
	; if ((f_i >= f_end_i) && ! state->bitbuf.is_full()) {
	cmp	f_i, f_end_i
	jge	.input_end

	xor	tmp5, tmp5
.final_bytes:
	cmp	m_out_buf, [rsp + m_out_end]
	ja	.out_end

	movzx	tmp5, byte [file_start + f_i]
	inc	dword [lit_len_hist + HIST_ELEM_SIZE*tmp5]
	or	tmp5, LIT
	write_dword	tmp5, m_out_buf

	inc	f_i
	cmp	f_i, [rsp + f_end_i_mem_offset]
	jl	.final_bytes

.input_end:
	cmp	word [stream + _end_of_stream], 0
	jne	.out_end
	cmp	word [stream + _flush], _NO_FLUSH
	jne	.out_end
	jmp .end

.out_end:
	mov	dword [stream + _internal_state_state], ZSTATE_CREATE_HDR
.end:
	;; Update input buffer
	mov	f_end_i, [rsp + f_end_i_mem_offset]
	mov	[stream + _total_in], f_i %+ d
	mov	[stream + _internal_state_block_end], f_i %+ d

	add	file_start, f_i
	mov	[stream + _next_in], file_start
	sub	f_end_i, f_i
	mov	[stream + _avail_in], f_end_i %+ d

	;; Update output buffer
	mov	[level_buf + _icf_buf_next], m_out_buf

	;    len = state->bitbuf.buffer_used();
	sub	m_out_buf, [rsp + m_out_start]

	;    stream->avail_out -= len;
	sub	[level_buf + _icf_buf_avail_out], m_out_buf

	add	rsp, stack_size
	POP_ALL
	ret

section .data
	align 4
c258:	dq	258