summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S
blob: 46847d3443169cbf6a50233166507a82c442f877 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
/**********************************************************************
  Copyright(c) 2019 Arm Corporation All rights reserved.

  Redistribution and use in source and binary forms, with or without
  modification, are permitted provided that the following conditions
  are met:
    * Redistributions of source code must retain the above copyright
      notice, this list of conditions and the following disclaimer.
    * Redistributions in binary form must reproduce the above copyright
      notice, this list of conditions and the following disclaimer in
      the documentation and/or other materials provided with the
      distribution.
    * Neither the name of Arm Corporation nor the names of its
      contributors may be used to endorse or promote products derived
      from this software without specific prior written permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
**********************************************************************/
	.arch armv8-a
	.text
	.align	2
#include "lz0a_const_aarch64.h"
#include "huffman_aarch64.h"
#include "bitbuf2_aarch64.h"
#include "stdmac_aarch64.h"

#define ENABLE_TBL_INSTRUCTION	1

#define FIELD(name,size,align) \
	.set _FIELD_OFFSET,(_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)); \
	.equ name,_FIELD_OFFSET ;					\
	.set _FIELD_OFFSET,_FIELD_OFFSET + size;			\
	.if align > _STRUCT_ALIGN;					\
		.set _STRUCT_ALIGN, align;				\
	.endif;

#define START_STRUCT(name) .set _FIELD_OFFSET,0;.set _STRUCT_ALIGN,0;

#define END_STRUCT(name)    .set _##name##_size , _FIELD_OFFSET;\
			    .set _##name##_align,_STRUCT_ALIGN

#define CONST(name,value)  .equ name,value

#define ISAL_DECODE_LONG_BITS 12
#define ISAL_DECODE_SHORT_BITS 10

#define L_REM (21 - ISAL_DECODE_LONG_BITS)
#define S_REM (15 - ISAL_DECODE_SHORT_BITS)
#define L_DUP ((1 << L_REM) - (L_REM + 1))
#define S_DUP ((1 << S_REM) - (S_REM + 1))
#define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
#define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
#define L_SIZE (286 + L_DUP + L_UNUSED)
#define S_SIZE (30 + S_DUP + S_UNUSED)
#define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
#define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
#define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
#define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
#define LARGE_SHORT_CODE_SIZE 4
#define LARGE_LONG_CODE_SIZE 2
#define SMALL_SHORT_CODE_SIZE 2
#define SMALL_LONG_CODE_SIZE 2


// inflate_huff_code
START_STRUCT( inflate_huff_code_large )
	//	 name	size	align
	FIELD ( _short_code_lookup_large,	LARGE_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_LONG_BITS)),	LARGE_LONG_CODE_SIZE )
	FIELD ( _long_code_lookup_large,	LARGE_LONG_CODE_SIZE*MAX_LONG_CODE_LARGE,	LARGE_SHORT_CODE_SIZE )
END_STRUCT(inflate_huff_code_large)

// inflate_huff_code
START_STRUCT( inflate_huff_code_small )
	//	 name	size	align
	FIELD ( _short_code_lookup_small,	SMALL_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_SHORT_BITS)),	SMALL_LONG_CODE_SIZE )
	FIELD ( _long_code_lookup_small,	SMALL_LONG_CODE_SIZE*MAX_LONG_CODE_SMALL,	SMALL_SHORT_CODE_SIZE )
END_STRUCT(inflate_huff_code_small)


// inflate_state
START_STRUCT( inflate_state )
	//	 name	size	align
	FIELD ( _next_out,	8,	8 )
	FIELD ( _avail_out,	4,	4 )
	FIELD ( _total_out,	4,	4 )
	FIELD ( _next_in,	8,	8 )
	FIELD ( _read_in,	8,	8 )
	FIELD ( _avail_in,	4,	4 )
	FIELD ( _read_in_length,	4,	4 )
	FIELD ( _lit_huff_code,	_inflate_huff_code_large_size,	_inflate_huff_code_large_align )
	FIELD ( _dist_huff_code,	_inflate_huff_code_small_size,	_inflate_huff_code_small_align )
	FIELD ( _block_state,	4,	4 )
	FIELD ( _dict_length,	4,	4 )
	FIELD ( _bfinal,	4,	4 )
	FIELD ( _crc_flag,	4,	4 )
	FIELD ( _crc,	4,	4 )
	FIELD ( _hist_bits,	4,	4 )
	FIELD ( _type0_block_len,	4,	4 )
	FIELD ( _write_overflow_lits,	4,	4 )
	FIELD ( _write_overflow_len,	4,	4 )
	FIELD ( _copy_overflow_len,	4,	4 )
	FIELD ( _copy_overflow_dist,	4,	4 )
END_STRUCT(inflate_state)

CONST( _lit_huff_code_short_code_lookup , _lit_huff_code+_short_code_lookup_large )
CONST( _lit_huff_code_long_code_lookup , _lit_huff_code+_long_code_lookup_large )
CONST( _dist_huff_code_short_code_lookup , _dist_huff_code+_short_code_lookup_small )
CONST( _dist_huff_code_long_code_lookup , _dist_huff_code+_long_code_lookup_small )
CONST( ISAL_BLOCK_NEW_HDR , 0 )
CONST( ISAL_BLOCK_HDR , 1 )
CONST( ISAL_BLOCK_TYPE0 , 2 )
CONST( ISAL_BLOCK_CODED , 3 )
CONST( ISAL_BLOCK_INPUT_DONE , 4 )
CONST( ISAL_BLOCK_FINISH , 5 )

/* Inflate Return values */
#define ISAL_DECOMP_OK 0	/* No errors encountered while decompressing */
#define ISAL_END_INPUT 1	/* End of input reached */
#define ISAL_OUT_OVERFLOW 2	/* End of output reached */
#define ISAL_NAME_OVERFLOW 3	/* End of gzip name buffer reached */
#define ISAL_COMMENT_OVERFLOW 4	/* End of gzip name buffer reached */
#define ISAL_EXTRA_OVERFLOW 5	/* End of extra buffer reached */
#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */
#define ISAL_INVALID_BLOCK -1	/* Invalid deflate block found */
#define ISAL_INVALID_SYMBOL -2	/* Invalid deflate symbol found */
#define ISAL_INVALID_LOOKBACK -3	/* Invalid lookback distance found */
#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */
#define ISAL_UNSUPPORTED_METHOD -5	/* Gzip/zlib wrapper specifies unsupported compress method */
#define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */


#define ISAL_DEF_MAX_CODE_LEN 15
#define LARGE_SHORT_SYM_LEN 25
#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
#define LARGE_LONG_SYM_LEN 10
#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
#define LARGE_SHORT_CODE_LEN_OFFSET 28
#define LARGE_LONG_CODE_LEN_OFFSET 10
#define LARGE_FLAG_BIT_OFFSET 25
#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
#define LARGE_SYM_COUNT_OFFSET 26
#define LARGE_SYM_COUNT_LEN 2
#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
#define LARGE_SHORT_MAX_LEN_OFFSET 26

#define SMALL_SHORT_SYM_LEN 9
#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
#define SMALL_LONG_SYM_LEN 9
#define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
#define SMALL_SHORT_CODE_LEN_OFFSET 11
#define SMALL_LONG_CODE_LEN_OFFSET 10
#define SMALL_FLAG_BIT_OFFSET 10
#define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)

#define DIST_SYM_OFFSET 0
#define DIST_SYM_LEN 5
#define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
#define DIST_SYM_EXTRA_OFFSET 5
#define DIST_SYM_EXTRA_LEN 4
#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)

#define MAX_LIT_LEN_CODE_LEN 21
#define MAX_LIT_LEN_COUNT (MAX_LIT_LEN_CODE_LEN + 2)
#define MAX_LIT_LEN_SYM 512
#define LIT_LEN_ELEMS 514

#define INVALID_SYMBOL 0x1FFF
#define INVALID_CODE 0xFFFFFF

#define MIN_DEF_MATCH 3

#define TRIPLE_SYM_FLAG 0
#define DOUBLE_SYM_FLAG TRIPLE_SYM_FLAG + 1
#define SINGLE_SYM_FLAG DOUBLE_SYM_FLAG + 1
#define DEFAULT_SYM_FLAG TRIPLE_SYM_FLAG

#define SINGLE_SYM_THRESH (2 * 1024)
#define DOUBLE_SYM_THRESH (4 * 1024)


/*
declare Macros
*/

.macro	declare_generic_reg name:req,reg:req,default:req
	\name		.req	\default\reg
	w_\name		.req	w\reg
	x_\name		.req	x\reg
.endm


.macro	inflate_in_load_read_byte
	cmp	read_in_length,56
	bgt	1f
	cbz	avail_in,1f
	ldrb	w_temp,[next_in],1
	sub	avail_in,avail_in,1
	lsl	temp,temp,x_read_in_length
	orr	read_in,read_in,temp
	add	read_in_length,read_in_length,8
	uxtw	read_in_length,read_in_length

.endm

.macro	inflate_in_load

	cmp	read_in_length, 63
	bgt	1f

	/*if (state->avail_in >= 8) */
	cmp	avail_in, 7
	bhi	2f

	// loop max 7 times
	// while (state->read_in_length < 57 && state->avail_in > 0)
	inflate_in_load_read_byte
	inflate_in_load_read_byte
	inflate_in_load_read_byte
	inflate_in_load_read_byte
	inflate_in_load_read_byte
	inflate_in_load_read_byte
	inflate_in_load_read_byte
	b 1f
2:
	add	new_bytes,read_in_length,7
	mov	w_temp,8
	lsr	new_bytes,new_bytes,3
	sub	new_bytes,w_temp,new_bytes
	ldr	temp,[next_in]
	lsl	temp,temp,x_read_in_length
	orr	read_in,read_in,temp
	add	next_in,next_in,new_bytes,uxtb
	add	read_in_length,read_in_length,new_bytes,lsl 3
	sub	avail_in,avail_in,new_bytes

1:
.endm

.macro copy_word
	sub	repeat_length,repeat_length,#4
	ldr	w_arg0, [arg1],4
	cmp	repeat_length, 3
	str	w_arg0, [next_out],4
	bls	load_byte_less_than_4
.endm


	.global	decode_huffman_code_block_stateless_aarch64
	.type	decode_huffman_code_block_stateless_aarch64, %function
/*
	void decode_huffman_code_block_stateless_aarch64(
				struct inflate_state *state,
				uint8_t * start_out)
*/
	declare_generic_reg	arg0		0, x
	declare_generic_reg	arg1		1, x
	declare_generic_reg	arg2		2, x

	declare_generic_reg	state,		11,x
	declare_generic_reg	start_out,	18,x

	declare_generic_reg	read_in,	3,x
	declare_generic_reg	read_in_length,	4,w
	declare_generic_reg	sym_count,	5,w
	declare_generic_reg	next_bits,	6,w
	declare_generic_reg	next_lits,	6,w
	declare_generic_reg	avail_in,	20,w
	declare_generic_reg	next_in,	23,x

	declare_generic_reg	temp,		16,x	//local temp variable
	declare_generic_reg	new_bytes,	7,w	//temp variable
	declare_generic_reg	copy_overflow_length,		28,w



	declare_generic_reg	block_state,	8,w
	declare_generic_reg	block_state_adr,9,x
	declare_generic_reg	look_back_dist,	10,w
	declare_generic_reg	bfinal,		22,x

	declare_generic_reg	next_out,	12,x
	declare_generic_reg	avail_out,	13,w
	declare_generic_reg	total_out,	14,w

	declare_generic_reg	rfc_table,	15,x
	declare_generic_reg	next_sym,	17,w
	declare_generic_reg	next_dist,	17,w
	declare_generic_reg	bit_count,	19,w

	declare_generic_reg	bit_mask,	21,w
	declare_generic_reg	next_lit,	24,w
	declare_generic_reg	write_overflow_len,25,w
	declare_generic_reg	write_overflow_lits,26,w
	declare_generic_reg	repeat_length,27,w

decode_huffman_code_block_stateless_aarch64:
	//save registers
	push_stack

	//load variables
	mov	state,arg0
	mov	block_state,_block_state
	mov	start_out,arg1
	add	block_state_adr,state,block_state,uxtw
	ldr	block_state,	[block_state_adr]
	ldr	bfinal,		[block_state_adr,_bfinal-_block_state]

	ldr	next_out, [state]
	ldp	avail_out,total_out,[state,_avail_out]
	ldp	next_in,  read_in,  [state,_next_in]
	ldp	avail_in, read_in_length, [state,_avail_in]
	ldp	write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]

	//init rfc_table
	adrp	rfc_table,rfc_lookup_table
	add	rfc_table,rfc_table,:lo12:rfc_lookup_table
#if ENABLE_TBL_INSTRUCTION
	ld1	{v1.16b,v2.16b,v3.16b},[rfc_table]
	add	rfc_table,rfc_table,48
	ld1	{v4.16b-v7.16b},[rfc_table]

#endif

	/*
		state->copy_overflow_length = 0;
		state->copy_overflow_distance = 0;
	*/
	mov	x_copy_overflow_length,xzr
	str	xzr,[block_state_adr,_copy_overflow_len-_block_state]

	/* while (state->block_state == ISAL_BLOCK_CODED) */
block_state_loop:
	cmp	block_state ,ISAL_BLOCK_CODED
	bne	exit_func_success

	inflate_in_load

	/* save state here  */
	str	next_out, [state]
	stp	avail_out,total_out,[state,_avail_out]
	stp	next_in,  read_in,  [state,_next_in]
	stp	avail_in, read_in_length, [state,_avail_in]
	stp	write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]

	/*
	decode_next_lit_len(&next_lits, &sym_count,
				state, &state->lit_huff_code,
				&temp_dat, &temp_bytes);
	*/
	cmp	read_in_length,ISAL_DEF_MAX_CODE_LEN
	ble	inflate_in_load_decode
decode_next_lit_len_start:
	and	x_next_bits,read_in,((1 << ISAL_DECODE_LONG_BITS) - 1)
	/*next_sym = huff_code->short_code_lookup[next_bits];*/
	add	next_bits,next_bits,_lit_huff_code>>2
	ldr	next_sym,[state,x_next_bits,lsl 2]
	/*if ((next_sym & LARGE_FLAG_BIT) == 0) {*/
	tbnz	next_sym,LARGE_FLAG_BIT_OFFSET,long_code_lookup_routine
	lsr	bit_count,next_sym,LARGE_SHORT_CODE_LEN_OFFSET
	sub	read_in_length,read_in_length,bit_count
	lsr	read_in,read_in,x_bit_count
	mov	temp,0x1fff
	cmp	bit_count,0
	csel	next_sym,next_sym,w_temp,ne
	ubfx	sym_count,next_sym,LARGE_SYM_COUNT_OFFSET,LARGE_SYM_COUNT_LEN
	and	next_lits,next_sym,LARGE_SHORT_SYM_MASK
	b	decode_next_lit_len_end
long_code_lookup_routine:
	lsr	bit_mask,next_sym,LARGE_SHORT_MAX_LEN_OFFSET
	mov	sym_count,1
	and	next_sym,next_sym,LARGE_SHORT_SYM_MASK
	mov	temp,1023
	lsl	bit_mask,sym_count,bit_mask
	sub	bit_mask,bit_mask,1
	and	x_next_bits,read_in,x_bit_mask
	add	next_bits,next_sym,next_bits,lsr ISAL_DECODE_LONG_BITS
	mov	next_sym,(_lit_huff_code+_long_code_lookup_large)>>1
	add	next_bits,next_bits,next_sym
	ldrh	next_sym,[state,x_next_bits,lsl 1]
	lsr	bit_count,next_sym,10
	sub	read_in_length,read_in_length,bit_count
	and	next_lits,next_sym,w_temp
	lsr	read_in,read_in,x_bit_count
	cmp	bit_count,0
	csel	next_lits,next_lits,w_temp,ne
decode_next_lit_len_end:

	/* if (sym_count == 0) */
	cbz	sym_count,invalid_symbol
	tbnz	read_in_length,31, end_input

	/* while (sym_count > 0) start */
sym_count_loop:
	and	next_lit,next_lits , 0xffff

	/*if (next_lit < 256 || sym_count > 1) {*/
	cmp	next_lit,255
	ccmp	sym_count,1,0,hi
	beq	next_lit_256

	/* if (state->avail_out < 1) { */
	cbnz	avail_out,sym_count_adjust

	mov	write_overflow_len,sym_count
	lsl	sym_count,sym_count,3
	mov	write_overflow_lits,next_lits
	sub	sym_count,sym_count,8
	lsr	next_lits,next_lits,sym_count
	mov	sym_count,1
	cmp	next_lits,255
	bls	isal_out_overflow
	cmp	next_lits,256
	sub	write_overflow_len,write_overflow_len,1
	beq	isal_out_overflow_1
	b	sym_count_loop

sym_count_adjust:
	/*
		while (sym_count > 0) end
		next_lits >>= 8;
		sym_count--;
	*/
	subs	sym_count,sym_count,1
	lsr	next_lits,next_lits,8
	strb	next_lit,[next_out],1
	sub	avail_out,avail_out,1
	add	total_out,total_out,1
	bne	sym_count_loop
	b	block_state_loop

next_lit_256:
	/* if (next_lit == 256) { */
	cmp	next_lit,256
	beq	next_lit_eq_256


	/*
	if (next_lit <= MAX_LIT_LEN_SYM)
		sym_count must be 1
	*/
	cmp	next_lit,MAX_LIT_LEN_SYM
	bhi	invalid_symbol
	sub	repeat_length,next_lit,254
	/*
	    next_dist =
	    decode_next_dist(state, &state->dist_huff_code, &temp_dat,
			     &temp_bytes);
	*/
	cmp	read_in_length,ISAL_DEF_MAX_CODE_LEN
	ble	inflate_in_load_decode_next_dist
decode_next_dist_start:
	and	x_next_bits,read_in,((1 << ISAL_DECODE_SHORT_BITS) - 1)
	mov	next_sym,_dist_huff_code>>1
	add	next_bits,next_bits,next_sym
	ldrh	next_sym, [state,x_next_bits,lsl 1]
	tbz	next_sym,SMALL_FLAG_BIT_OFFSET,decode_next_dist_flag
	sub	bit_mask,next_sym,SMALL_FLAG_BIT
	mov	temp,1
	asr	bit_mask,bit_mask,SMALL_SHORT_CODE_LEN_OFFSET
	and	next_sym,next_sym,SMALL_SHORT_SYM_MASK
	lsl	bit_mask,w_temp,bit_mask
	sub	bit_mask,bit_mask,1
	and	x_next_bits,read_in,x_bit_mask
	add	next_bits,next_sym,next_bits,lsr ISAL_DECODE_SHORT_BITS
	mov	next_sym,(_dist_huff_code + _long_code_lookup_small)>>1
	add	next_bits,next_bits,next_sym
	ldrh	next_sym,[state,x_next_bits,lsl 1]
	lsr	bit_count,next_sym,SMALL_LONG_CODE_LEN_OFFSET
	b	decode_next_dist_adjust
decode_next_dist_flag:
	lsr	bit_count,next_sym,SMALL_SHORT_CODE_LEN_OFFSET
decode_next_dist_adjust:
	sub	read_in_length,read_in_length,bit_count
	lsr	read_in,read_in,x_bit_count
	cbnz	bit_count,decode_next_dist_end
	sub	read_in_length,read_in_length,next_sym
	mov	next_sym,INVALID_SYMBOL
decode_next_dist_end:
	and	next_sym,next_sym,DIST_SYM_MASK

	tbnz	read_in_length,31,end_input_1
	cmp	next_dist,29
	bhi	invalid_symbol


#if ENABLE_TBL_INSTRUCTION
	ins	v0.b[0],next_dist
	tbl	v0.8b,{v2.16b,v3.16b},v0.8b
	umov	bit_count,v0.b[0]
#else
	ldrb	bit_count,[rfc_table,next_dist,sxtw]
#endif

	/*inflate_in_read_bits(state,
		 dist_extra_bit_count, &temp_dat,
		 &temp_bytes);
	*/
	inflate_in_load
	mov	temp,1
	lsl	temp,temp,x_bit_count
	sub	read_in_length,read_in_length,bit_count
	sub	temp,temp,1
	and	x_look_back_dist,temp,read_in
	lsr	read_in,read_in,x_bit_count
#if ENABLE_TBL_INSTRUCTION
	dup	v0.8b,next_dist
	add	v0.8b,v1.8b,v0.8b
	tbl	v0.8b,{v4.16b-v7.16b},v0.8b
	umov	next_dist,v0.h[0]
#else
	add	next_dist,next_dist,16
	ldrh	next_dist,[rfc_table,x_next_dist,lsl 1]
#endif
	add	look_back_dist,look_back_dist,next_dist

	/*
		if (state->read_in_length < 0) {
	*/
	tbnz	read_in_length,31,end_input_1

	/*
	if (state->next_out - look_back_dist < start_out) {
	*/
	sub	temp,next_out,x_look_back_dist
	cmp	temp,start_out
	bcc	isal_invalid_lookback
	/*
		if (state->avail_out < repeat_length) {
	*/
	cmp	avail_out , repeat_length
	bcs	decompress_data_start
	sub	copy_overflow_length,repeat_length,avail_out
	stp	copy_overflow_length,look_back_dist,[block_state_adr,_copy_overflow_len-_block_state]
	mov	repeat_length,avail_out

decompress_data_start:
	add	total_out,total_out,repeat_length
	sub	avail_out,avail_out,repeat_length
	sub	arg1,next_out,x_look_back_dist
	#if 1
	cmp	look_back_dist,repeat_length
	bls	byte_copy_start
	#else
	b	byte_copy_start
	#endif


	cbz	repeat_length,decompress_data_end
	cmp     repeat_length, 3
	bls     load_byte_less_than_4 //0.5% will jump
load_byte_4:
	sub     repeat_length, repeat_length, #4
	ldr     w_arg0, [arg1],4
	cmp	repeat_length, 3
	str     w_arg0, [next_out],4
	bls     load_byte_less_than_4
	.rept	62
	copy_word
	.endr
	sub     repeat_length, repeat_length, #4
	ldr     w_arg0, [arg1],4
	cmp	repeat_length, 4
	str     w_arg0, [next_out],4
	bge	load_byte_4
load_byte_less_than_4:
	tbz	repeat_length,0,load_byte_2
	ldrb    w_arg0, [arg1],1
	sub     repeat_length, repeat_length, #1
	strb    w_arg0, [next_out],1
load_byte_2:
	tbz	repeat_length,1,decompress_data_end
	ldrh     w_arg0, [arg1],2
	strh     w_arg0, [next_out],2
decompress_data_end:



	/*
	if (state->copy_overflow_length > 0)
	*/
	cmp	copy_overflow_length,0
	bgt	isal_out_overflow
	b	block_state_loop
next_lit_eq_256:
	/*
		state->block_state = state->bfinal ?
				ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
	*/
	mov	block_state, ISAL_BLOCK_INPUT_DONE
	cmp	w_bfinal,0
	csel	block_state, block_state, w_bfinal, ne
	str	block_state, [block_state_adr]

	b	block_state_loop
exit_func_success:
	mov	w0 , 0
exit_func:
	str	next_out, [state]
	stp	avail_out,total_out,[state,_avail_out]
	stp	next_in,  read_in,  [state,_next_in]
	stp	avail_in, read_in_length, [state,_avail_in]
	stp	write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]

	pop_stack
	ret
end_input_1:
end_input:
	mov	w0,ISAL_END_INPUT
	pop_stack
	ret

invalid_symbol:
	/*
		below variable was changed
	*/
	str	next_out, [state]
	stp	avail_out,total_out,[state,_avail_out]
	stp	next_in,  read_in,  [state,_next_in]
	stp	avail_in, read_in_length, [state,_avail_in]
	stp	write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
	mov	w0,	ISAL_INVALID_SYMBOL
	b exit_func
isal_out_overflow_1:

	cmp	bfinal,0
	mov	block_state, ISAL_BLOCK_INPUT_DONE
	csel	block_state, block_state, wzr, ne
	str	block_state, [block_state_adr]
isal_out_overflow:
	mov	w0, ISAL_OUT_OVERFLOW

	b	exit_func
isal_invalid_lookback:
	mov	w0, ISAL_INVALID_LOOKBACK
	b	exit_func
inflate_in_load_decode:
	inflate_in_load
	b	decode_next_lit_len_start
inflate_in_load_decode_next_dist:
	inflate_in_load
	b	decode_next_dist_start
byte_copy_start:
	add	arg2,next_out,x_repeat_length
	cmp	arg2, next_out
	beq	decompress_data_end
	sub	arg2,arg2,1
byte_copy_loop:
	ldrb	w_arg0, [arg1] , 1
	cmp	arg2, next_out
	strb	w_arg0, [next_out],1
	bne	byte_copy_loop
	b	decompress_data_end
	.size	decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64

	.type	rfc_lookup_table, %object

rfc_lookup_table:
#if ENABLE_TBL_INSTRUCTION
	.byte	0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
	.zero	8
#endif
	//dist_extra_bit_count
	.byte	0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02
	.byte	0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06
	.byte	0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a
	.byte	0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00
	//dist_start
#if ENABLE_TBL_INSTRUCTION
	.byte 0x01,0x02,0x03,0x04,0x05,0x07,0x09,0x0d,0x11,0x19,0x21,0x31,0x41,0x61,0x81,0xc1
	.byte 0x01,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00
	.byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
	.byte 0x01,0x01,0x02,0x03,0x04,0x06,0x08,0x0c,0x10,0x18,0x20,0x30,0x40,0x60,0x00,0x00
#else
	.short	0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
	.short	0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
	.short	0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
	.short	0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
#endif
	.size	rfc_lookup_table, . - rfc_lookup_table