summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'src/isa-l/igzip/aarch64')
-rw-r--r--src/isa-l/igzip/aarch64/bitbuf2_aarch64.h57
-rw-r--r--src/isa-l/igzip/aarch64/data_struct_aarch64.h226
-rw-r--r--src/isa-l/igzip/aarch64/encode_df.S159
-rw-r--r--src/isa-l/igzip/aarch64/gen_icf_map.S266
-rw-r--r--src/isa-l/igzip/aarch64/huffman_aarch64.h173
-rw-r--r--src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S689
-rw-r--r--src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S261
-rw-r--r--src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S264
-rw-r--r--src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S95
-rw-r--r--src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S32
-rw-r--r--src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S178
-rw-r--r--src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c188
-rw-r--r--src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S50
-rw-r--r--src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S194
-rw-r--r--src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S364
-rw-r--r--src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S397
-rw-r--r--src/isa-l/igzip/aarch64/isal_update_histogram.S311
-rw-r--r--src/isa-l/igzip/aarch64/lz0a_const_aarch64.h72
-rw-r--r--src/isa-l/igzip/aarch64/options_aarch64.h71
-rw-r--r--src/isa-l/igzip/aarch64/stdmac_aarch64.h57
20 files changed, 4104 insertions, 0 deletions
diff --git a/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h b/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h
new file mode 100644
index 000000000..88eb18dfd
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h
@@ -0,0 +1,57 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __BITBUF2_AARCH64_H__
+#define __BITBUF2_AARCH64_H__
+#include "options_aarch64.h"
+
+#ifdef __ASSEMBLY__
+.macro update_bits stream:req,code:req,code_len:req,m_bits:req,m_bit_count:req \
+ m_out_buf:req
+
+ lsl x_\code,x_\code,x_\m_bit_count
+ orr x_\m_bits,x_\code,x_\m_bits
+ add x_\m_bit_count,x_\code_len,x_\m_bit_count
+
+ str x_\m_bits,[x_\m_out_buf]
+
+ and w_\code,w_\m_bit_count,-8
+ lsr w_\code_len,w_\m_bit_count,3
+ add x_\m_out_buf,x_\m_out_buf,w_\code_len,uxtw
+ sub w_\m_bit_count,w_\m_bit_count,w_\code
+ lsr x_\m_bits,x_\m_bits,x_\code
+
+ str x_\m_bits,[stream,_internal_state_bitbuf_m_bits]
+ str w_\m_bit_count,[stream,_internal_state_bitbuf_m_bit_count]
+ str x_\m_out_buf,[stream,_internal_state_bitbuf_m_out_buf]
+
+
+.endm
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/data_struct_aarch64.h b/src/isa-l/igzip/aarch64/data_struct_aarch64.h
new file mode 100644
index 000000000..71160fe1b
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/data_struct_aarch64.h
@@ -0,0 +1,226 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#ifndef __AARCH64_DATA_STRUCT_H__
+#define __AARCH64_DATA_STRUCT_H__
+#ifdef __ASSEMBLY__
+
+#define FIELD(name,size,align) \
+ .set _FIELD_OFFSET,(_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)); \
+ .equ name,_FIELD_OFFSET ; \
+ .set _FIELD_OFFSET,_FIELD_OFFSET + size; \
+ .if align > _STRUCT_ALIGN; \
+ .set _STRUCT_ALIGN, align; \
+ .endif;
+
+#define START_STRUCT(name) .set _FIELD_OFFSET,0;.set _STRUCT_ALIGN,0;
+
+#define END_STRUCT(name) .set _##name##_size,_FIELD_OFFSET;\
+ .set _##name##_align,_STRUCT_ALIGN
+
+#define CONST(name,value) .equ name,value
+
+
+/// BitBuf2
+START_STRUCT(BitBuf2)
+ /// name size align
+ FIELD ( _m_bits, 8, 8 )
+ FIELD ( _m_bit_count, 4, 4 )
+ FIELD ( _m_out_buf, 8, 8 )
+ FIELD ( _m_out_end, 8, 8 )
+ FIELD ( _m_out_start, 8, 8 )
+END_STRUCT(BitBuf2)
+
+
+/// isal_mod_hist
+#define HIST_ELEM_SIZE 4
+START_STRUCT(isal_mod_hist)
+ /// name size align
+ FIELD ( _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE )
+ FIELD ( _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE )
+END_STRUCT(isal_mod_hist)
+
+
+/// hufftables_icf
+#define HUFF_CODE_SIZE 4
+START_STRUCT(hufftables_icf)
+ /// name size align
+ FIELD ( _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE )
+ FIELD ( _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE )
+END_STRUCT(hufftables_icf)
+
+
+/// hash8k_buf
+START_STRUCT(hash8k_buf)
+ /// name size align
+ FIELD ( _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2 )
+END_STRUCT(hash8k_buf)
+
+
+/// hash_map_buf
+START_STRUCT(hash_map_buf)
+ /// name size align
+ FIELD ( _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2 )
+ FIELD ( _matches_next, 8, 8 )
+ FIELD ( _matches_end, 8, 8 )
+ FIELD ( _matches, 4*4*1024, 4 )
+ FIELD ( _overflow, 4*LA, 4 )
+END_STRUCT(hash_map_buf)
+
+
+/// level_buf
+#define DEF_MAX_HDR_SIZE 328
+START_STRUCT(level_buf)
+ /// name size align
+ FIELD ( _encode_tables, _hufftables_icf_size, _hufftables_icf_align )
+ FIELD ( _hist, _isal_mod_hist_size, _isal_mod_hist_align )
+ FIELD ( _deflate_hdr_count, 4, 4 )
+ FIELD ( _deflate_hdr_extra_bits,4, 4 )
+ FIELD ( _deflate_hdr, DEF_MAX_HDR_SIZE, 1 )
+ FIELD ( _icf_buf_next, 8, 8 )
+ FIELD ( _icf_buf_avail_out, 8, 8 )
+ FIELD ( _icf_buf_start, 8, 8 )
+ FIELD ( _lvl_extra, _hash_map_buf_size, _hash_map_buf_align )
+END_STRUCT(level_buf)
+
+
+CONST( _hash8k_hash_table , _lvl_extra + _hash8k_table )
+CONST( _hash_map_hash_table , _lvl_extra + _hash_table )
+CONST( _hash_map_matches_next , _lvl_extra + _matches_next )
+CONST( _hash_map_matches_end , _lvl_extra + _matches_end )
+CONST( _hash_map_matches , _lvl_extra + _matches )
+CONST( _hist_lit_len , _hist+_ll_hist )
+CONST( _hist_dist , _hist+_d_hist )
+
+
+/// isal_zstate
+START_STRUCT(isal_zstate)
+ /// name size align
+ FIELD ( _total_in_start,4, 4 )
+ FIELD ( _block_next, 4, 4 )
+ FIELD ( _block_end, 4, 4 )
+ FIELD ( _dist_mask, 4, 4 )
+ FIELD ( _hash_mask, 4, 4 )
+ FIELD ( _state, 4, 4 )
+ FIELD ( _bitbuf, _BitBuf2_size, _BitBuf2_align )
+ FIELD ( _crc, 4, 4 )
+ FIELD ( _has_wrap_hdr, 1, 1 )
+ FIELD ( _has_eob_hdr, 1, 1 )
+ FIELD ( _has_eob, 1, 1 )
+ FIELD ( _has_hist, 1, 1 )
+ FIELD ( _has_level_buf_init, 2, 2 )
+ FIELD ( _count, 4, 4 )
+ FIELD ( _tmp_out_buff, 16, 1 )
+ FIELD ( _tmp_out_start, 4, 4 )
+ FIELD ( _tmp_out_end, 4, 4 )
+ FIELD ( _b_bytes_valid, 4, 4 )
+ FIELD ( _b_bytes_processed, 4, 4 )
+ FIELD ( _buffer, BSIZE, 1 )
+ FIELD ( _head, IGZIP_LVL0_HASH_SIZE*2, 2 )
+END_STRUCT(isal_zstate)
+
+
+
+CONST( _bitbuf_m_bits , _bitbuf+_m_bits )
+CONST( _bitbuf_m_bit_count , _bitbuf+_m_bit_count )
+CONST( _bitbuf_m_out_buf , _bitbuf+_m_out_buf )
+CONST( _bitbuf_m_out_end , _bitbuf+_m_out_end )
+CONST( _bitbuf_m_out_start , _bitbuf+_m_out_start )
+
+
+/// isal_zstream
+START_STRUCT(isal_zstream)
+ /// name size align
+ FIELD ( _next_in, 8, 8 )
+ FIELD ( _avail_in, 4, 4 )
+ FIELD ( _total_in, 4, 4 )
+ FIELD ( _next_out, 8, 8 )
+ FIELD ( _avail_out, 4, 4 )
+ FIELD ( _total_out, 4, 4 )
+ FIELD ( _hufftables, 8, 8 )
+ FIELD ( _level, 4, 4 )
+ FIELD ( _level_buf_size, 4, 4 )
+ FIELD ( _level_buf, 8, 8 )
+ FIELD ( _end_of_stream, 2, 2 )
+ FIELD ( _flush, 2, 2 )
+ FIELD ( _gzip_flag, 2, 2 )
+ FIELD ( _hist_bits, 2, 2 )
+ FIELD ( _internal_state, _isal_zstate_size, _isal_zstate_align )
+END_STRUCT(isal_zstream)
+
+
+
+CONST( _internal_state_total_in_start , _internal_state+_total_in_start )
+CONST( _internal_state_block_next , _internal_state+_block_next )
+CONST( _internal_state_block_end , _internal_state+_block_end )
+CONST( _internal_state_b_bytes_valid , _internal_state+_b_bytes_valid )
+CONST( _internal_state_b_bytes_processed , _internal_state+_b_bytes_processed )
+CONST( _internal_state_crc , _internal_state+_crc )
+CONST( _internal_state_dist_mask , _internal_state+_dist_mask )
+CONST( _internal_state_hash_mask , _internal_state+_hash_mask )
+CONST( _internal_state_bitbuf , _internal_state+_bitbuf )
+CONST( _internal_state_state , _internal_state+_state )
+CONST( _internal_state_count , _internal_state+_count )
+CONST( _internal_state_tmp_out_buff , _internal_state+_tmp_out_buff )
+CONST( _internal_state_tmp_out_start , _internal_state+_tmp_out_start )
+CONST( _internal_state_tmp_out_end , _internal_state+_tmp_out_end )
+CONST( _internal_state_has_wrap_hdr , _internal_state+_has_wrap_hdr )
+CONST( _internal_state_has_eob , _internal_state+_has_eob )
+CONST( _internal_state_has_eob_hdr , _internal_state+_has_eob_hdr )
+CONST( _internal_state_has_hist , _internal_state+_has_hist )
+CONST( _internal_state_has_level_buf_init , _internal_state+_has_level_buf_init )
+CONST( _internal_state_buffer , _internal_state+_buffer )
+CONST( _internal_state_head , _internal_state+_head )
+CONST( _internal_state_bitbuf_m_bits , _internal_state+_bitbuf_m_bits )
+CONST( _internal_state_bitbuf_m_bit_count , _internal_state+_bitbuf_m_bit_count )
+CONST( _internal_state_bitbuf_m_out_buf , _internal_state+_bitbuf_m_out_buf )
+CONST( _internal_state_bitbuf_m_out_end , _internal_state+_bitbuf_m_out_end )
+CONST( _internal_state_bitbuf_m_out_start , _internal_state+_bitbuf_m_out_start )
+
+/// Internal States
+CONST( ZSTATE_NEW_HDR , 0 )
+CONST( ZSTATE_HDR , (ZSTATE_NEW_HDR + 1) )
+CONST( ZSTATE_CREATE_HDR , (ZSTATE_HDR + 1) )
+CONST( ZSTATE_BODY , (ZSTATE_CREATE_HDR + 1) )
+CONST( ZSTATE_FLUSH_READ_BUFFER , (ZSTATE_BODY + 1) )
+CONST( ZSTATE_FLUSH_ICF_BUFFER , (ZSTATE_FLUSH_READ_BUFFER + 1) )
+CONST( ZSTATE_TYPE0_HDR , (ZSTATE_FLUSH_ICF_BUFFER + 1) )
+CONST( ZSTATE_TYPE0_BODY , (ZSTATE_TYPE0_HDR + 1) )
+CONST( ZSTATE_SYNC_FLUSH , (ZSTATE_TYPE0_BODY + 1) )
+CONST( ZSTATE_FLUSH_WRITE_BUFFER , (ZSTATE_SYNC_FLUSH + 1) )
+CONST( ZSTATE_TRL , (ZSTATE_FLUSH_WRITE_BUFFER + 1) )
+
+CONST( _NO_FLUSH , 0 )
+CONST( _SYNC_FLUSH , 1 )
+CONST( _FULL_FLUSH , 2 )
+CONST( _STORED_BLK , 0 )
+CONST( IGZIP_NO_HIST , 0 )
+CONST( IGZIP_HIST , 1 )
+CONST( IGZIP_DICT_HIST , 2 )
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/encode_df.S b/src/isa-l/igzip/aarch64/encode_df.S
new file mode 100644
index 000000000..6dddddf0a
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/encode_df.S
@@ -0,0 +1,159 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a+crc
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .global encode_deflate_icf_aarch64
+ .type encode_deflate_icf_aarch64, %function
+
+/*
+ struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in,
+ struct deflate_icf *end_in, struct BitBuf2 *bb,
+ struct hufftables_icf *hufftables)
+
+*/
+
+ // parameters
+ declare_generic_reg next_in, 0,x
+ declare_generic_reg end_in, 1,x
+ declare_generic_reg bb, 2,x
+ declare_generic_reg hufftables, 3,x
+
+ // local variable
+ declare_generic_reg bb_out_end, 4,x
+ declare_generic_reg bb_bit_count, 5,w
+ declare_generic_reg dist_extra, 6,x
+ declare_generic_reg dist_lit_table, 7,x
+ declare_generic_reg code_and_extra, 8,x
+ declare_generic_reg bb_out_buf, 9,x
+ declare_generic_reg bb_bits, 10,x
+ declare_generic_reg d_length, 11,x
+ declare_generic_reg l_length, 12,x
+ declare_generic_reg d_extra_bit_count, 13,x
+
+ declare_generic_reg code_sum, 4,x
+ declare_generic_reg count_sum, 7,x
+
+ declare_generic_reg tmp0, 14,x
+ declare_generic_reg tmp1, 15,x
+
+// bit buffer offset
+.equ offset_m_bits, 0
+.equ offset_m_bit_count, 8
+.equ offset_m_out_buf, 16
+.equ offset_m_out_end, 24
+
+encode_deflate_icf_aarch64:
+ cmp next_in, end_in
+ bcs .done
+
+ ldp bb_out_buf, bb_out_end, [bb, offset_m_out_buf]
+ cmp bb_out_end, bb_out_buf
+ bcc .done
+
+ ldr bb_bit_count, [bb, offset_m_bit_count]
+ ldr bb_bits, [bb, offset_m_bits]
+ b .loop_start
+
+ .align 3
+.loop:
+ ldr bb_out_end, [bb, offset_m_out_end]
+ cmp bb_out_end, bb_out_buf
+ bcc .done
+
+.loop_start:
+ ldrh w_code_and_extra, [next_in]
+ add next_in, next_in, 4
+ ldr w_dist_lit_table, [next_in, -4]
+ and code_and_extra, code_and_extra, 1023
+
+ ldrh w_dist_extra, [next_in, -2]
+ add code_and_extra, code_and_extra, 31
+ ubfx x_dist_lit_table, x_dist_lit_table, 10, 9
+ add x_tmp0, hufftables, code_and_extra, lsl 2
+ ubfx x_dist_extra, x_dist_extra, 3, 13
+ lsl x_dist_lit_table, x_dist_lit_table, 2
+
+ ldr w_code_and_extra, [hufftables, code_and_extra, lsl 2]
+ add x_d_extra_bit_count, hufftables, x_dist_lit_table
+ ldrb w_l_length, [x_tmp0, 3]
+ and code_and_extra, code_and_extra, 0xffffff
+ ldrh w_code_sum, [hufftables, x_dist_lit_table]
+ ldrb w_d_length, [x_d_extra_bit_count, 3]
+ add w_l_length, w_l_length, bb_bit_count
+ ldrb w_d_extra_bit_count, [x_d_extra_bit_count, 2]
+
+ lsl x_tmp0, code_and_extra, x_bb_bit_count
+ add bb_bit_count, w_d_length, w_l_length
+ lsl x_code_sum, x_code_sum, x_l_length
+ orr x_code_sum, x_code_sum, x_tmp0
+ add w_count_sum, w_d_extra_bit_count, bb_bit_count
+ lsl x_bb_bit_count, x_dist_extra, x_bb_bit_count
+
+ orr x_bb_bit_count, x_bb_bit_count, bb_bits
+ orr x_tmp0, x_code_sum, x_bb_bit_count // me->m_bits => x_tmp0
+ str x_tmp0, [bb, offset_m_bits] // me->m_bits => x_tmp0
+ str w_count_sum, [bb, offset_m_bit_count]
+
+ str x_tmp0, [bb_out_buf] // me->m_bits => x_tmp0
+ ldr bb_bit_count, [bb, offset_m_bit_count]
+ ldr bb_bits, [bb, offset_m_bits]
+ and w_tmp0, bb_bit_count, -8 // bits => w_tmp0
+ ldr bb_out_buf, [bb, offset_m_out_buf]
+ lsr w_tmp1, bb_bit_count, 3 // bits/8 => w_tmp1
+ lsr bb_bits, bb_bits, x_tmp0 // bits => x_tmp0
+ sub bb_bit_count, bb_bit_count, w_tmp0 // bits => w_tmp0
+ add bb_out_buf, bb_out_buf, x_tmp1 // bits/8 => x_tmp1
+ str bb_bits, [bb,offset_m_bits]
+ str bb_bit_count, [bb, offset_m_bit_count]
+ str bb_out_buf, [bb, offset_m_out_buf]
+
+ cmp end_in, next_in
+ bhi .loop
+
+.done:
+ ret
+ .size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64
diff --git a/src/isa-l/igzip/aarch64/gen_icf_map.S b/src/isa-l/igzip/aarch64/gen_icf_map.S
new file mode 100644
index 000000000..5ee2532e6
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/gen_icf_map.S
@@ -0,0 +1,266 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc+crypto
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro tzbytecnt param0:req,param1:req
+ rbit x_\param1, x_\param0
+ cmp x_\param0, 0
+ clz x_\param1, x_\param1
+ mov w_\param0, 8
+ lsr w_\param1, w_\param1, 3
+ csel w_\param0, w_\param1, w_\param0, ne
+.endm
+
+.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req
+ orr w_\param1, w_\param1, w_\param3, lsl 19
+ orr w_\param1, w_\param1, w_\param2, lsl 10
+ str w_\param1, [x_\param0]
+.endm
+
+ .align 2
+ .global gen_icf_map_h1_aarch64
+ .type gen_icf_map_h1_aarch64, %function
+
+ /* arguments */
+ declare_generic_reg stream_param, 0,x
+ declare_generic_reg matches_icf_lookup_param, 1,x
+ declare_generic_reg input_size_param, 2,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+ declare_generic_reg param3, 3,x
+
+ /* return */
+ declare_generic_reg ret_val, 0,x
+
+ /* variables */
+ declare_generic_reg input_size, 3,x
+ declare_generic_reg next_in, 4,x
+ declare_generic_reg matches_icf_lookup, 6,x
+ declare_generic_reg hash_table, 7,x
+ declare_generic_reg end_in, 8,x
+ declare_generic_reg file_start, 9,x
+ declare_generic_reg hash_mask, 10,w
+ declare_generic_reg hist_size, 11,w
+ declare_generic_reg stream_saved, 12,x
+ declare_generic_reg literal_32, 13,w
+ declare_generic_reg literal_1, 14,w
+ declare_generic_reg dist, 15,w
+
+ declare_generic_reg tmp_has_hist, 0,w
+ declare_generic_reg tmp_offset_hash_table, 1,x
+ declare_generic_reg tmp0, 0,x
+ declare_generic_reg tmp1, 1,x
+ declare_generic_reg tmp2, 2,x
+ declare_generic_reg tmp3, 3,x
+ declare_generic_reg tmp5, 5,x
+
+/* constant */
+.equ ISAL_LOOK_AHEAD, 288
+.equ SHORTEST_MATCH, 4
+.equ LEN_OFFSET, 254
+
+/* mask */
+.equ mask_10bit, 1023
+.equ mask_lit_dist, 0x7800
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_dist_mask, 76
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_hash_map_hash_table, 4712
+
+/*
+uint64_t gen_icf_map_h1_base(struct isal_zstream *stream,
+ struct deflate_icf *matches_icf_lookup, uint64_t input_size)
+*/
+
+gen_icf_map_h1_aarch64:
+ cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287
+ bls .fast_exit
+ stp x29, x30, [sp, -16]!
+
+ mov stream_saved, stream_param
+ mov matches_icf_lookup, matches_icf_lookup_param
+ mov x29, sp
+
+ ldrb tmp_has_hist, [stream_saved, offset_state_has_hist]
+ mov tmp_offset_hash_table, offset_hash_map_hash_table
+ ldr end_in, [stream_saved, offset_next_in]
+ mov input_size, input_size_param
+ ldr hash_table, [stream_saved, offset_level_buf]
+ ldr w_file_start, [stream_saved, offset_total_in]
+ ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask]
+ add hash_table, hash_table, tmp_offset_hash_table
+ sub file_start, end_in, file_start
+ cbz tmp_has_hist, .igzip_no_hist
+ b .while_check1
+
+ .align 3
+.igzip_no_hist:
+ ldrb w_tmp1, [end_in]
+ add next_in, end_in, 1
+ ldrh w_tmp0, [matches_icf_lookup]
+ bfi w_tmp0, w_tmp1, 0, 10
+ strh w_tmp0, [matches_icf_lookup]
+ ldr w_tmp0, [matches_icf_lookup]
+ and w_tmp0, w_tmp0, mask_10bit
+ orr w_tmp0, w_tmp0, mask_lit_dist
+ str w_tmp0, [matches_icf_lookup], 4
+ ldr w_tmp0, [end_in]
+ crc32cw w_tmp0, wzr, w_tmp0
+
+ and w_tmp5, w_tmp0, hash_mask
+ sub x_tmp1, end_in, file_start
+ mov w_tmp2, 1
+ mov x_tmp0, 1
+ strh w_tmp1, [hash_table, x_tmp5, lsl 1]
+ strb w_tmp2, [stream_saved, offset_state_has_hist]
+ b .while_check2
+
+.while_check1:
+ mov next_in, end_in
+ mov x_tmp0, 0
+
+.while_check2:
+ sub input_size, input_size, #288
+ add end_in, end_in, input_size
+ cmp next_in, end_in
+ bcs .exit
+ mov literal_32, 32
+ mov literal_1, 1
+ b .while_loop
+
+ .align 3
+.new_match_found:
+ clz w_tmp5, w_tmp2
+ add w_tmp1, w_tmp0, LEN_OFFSET
+ sub w_tmp5, literal_32, w_tmp5
+ cmp dist, 2
+ sub w_tmp5, w_tmp5, #2
+ bls .skip_compute_dist_icf_code
+
+ lsl w_tmp3, literal_1, w_tmp5
+ sub w_tmp3, w_tmp3, #1
+ lsr w_tmp0, w_tmp2, w_tmp5
+ and w_tmp3, w_tmp3, w_tmp2
+ add w_tmp2, w_tmp0, w_tmp5, lsl 1
+
+.skip_compute_dist_icf_code:
+ mov param0, matches_icf_lookup
+ write_deflate_icf param0,param1,param2,param3
+
+ add next_in, next_in, 1
+ add matches_icf_lookup, matches_icf_lookup, 4
+ cmp next_in, end_in
+ beq .save_with_exit
+
+.while_loop:
+ ldr w_tmp0, [next_in]
+ crc32cw w_tmp0, wzr, w_tmp0
+
+ and w_tmp0, w_tmp0, hash_mask
+ sub x_tmp1, next_in, file_start
+ lsl x_tmp0, x_tmp0, 1
+ sub w_tmp2, w_tmp1, #1
+ ldrh w_tmp3, [hash_table, x_tmp0]
+ strh w_tmp1, [hash_table, x_tmp0]
+ sub w_tmp2, w_tmp2, w_tmp3
+ and w_tmp2, w_tmp2, hist_size
+ add dist, w_tmp2, 1
+ ldr x_tmp0, [next_in]
+ sub x_tmp1, next_in, x_dist, uxtw
+ ldr x_tmp1, [x_tmp1]
+ eor x_tmp0, x_tmp1, x_tmp0
+ tzbytecnt param0,param1
+
+ cmp w_tmp0, (SHORTEST_MATCH-1)
+ mov w_tmp3, 0
+ bhi .new_match_found
+
+ ldrb w_param1, [next_in]
+ mov x_param0, matches_icf_lookup
+ mov w_param3, 0
+ mov w_param2, 0x1e
+ write_deflate_icf param0,param1,param2,param3
+
+ add next_in, next_in, 1
+ add matches_icf_lookup, matches_icf_lookup, 4
+ cmp next_in, end_in
+ bne .while_loop
+
+.save_with_exit:
+ ldr ret_val, [stream_saved, offset_next_in]
+ sub ret_val, next_in, ret_val
+
+.exit:
+ ldp x29, x30, [sp], 16
+ ret
+
+ .align 3
+.fast_exit:
+ mov ret_val, 0
+ ret
+ .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64
diff --git a/src/isa-l/igzip/aarch64/huffman_aarch64.h b/src/isa-l/igzip/aarch64/huffman_aarch64.h
new file mode 100644
index 000000000..4ceae23f4
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/huffman_aarch64.h
@@ -0,0 +1,173 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __HUFFMAN_AARCH64_H__
+#define __HUFFMAN_AARCH64_H__
+
+#ifdef __ASSEMBLY__
+#ifdef LONGER_HUFFTABLE
+ #if (D > 8192)
+ #error History D is larger than 8K
+ #else
+ #define DIST_TABLE_SIZE 8192
+ #define DECODE_OFFSET 26
+ #endif
+#else
+ #define DIST_TABLE_SIZE 2
+ #define DECODE_OFFSET 0
+#endif
+
+#define LEN_TABLE_SIZE 256
+#define LIT_TABLE_SIZE 257
+
+#define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) //328+8
+#define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) //336-4
+#define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) //332 + 2*4 -4*3 =328
+#define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE)
+#define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE)
+#define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2)
+#define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET)
+
+#define IGZIP_DECODE_OFFSET 0
+#define IGZIP_DIST_TABLE_SIZE 2
+
+.macro get_len_code hufftables:req,length:req,code:req,code_len:req,tmp0:req
+ add x_\tmp0,\hufftables,LEN_TABLE_OFFSET
+ ldr w_\code_len,[x_\tmp0,x_\length,lsl 2]
+ lsr w_\code, w_\code_len , 5
+ and x_\code_len,x_\code_len,0x1f
+.endm
+
+.macro get_lit_code hufftables:req,lit:req,code:req,code_len:req
+ add x_\code,\hufftables,LIT_TABLE_OFFSET
+ ldrh w_\code,[x_\code,x_\lit,lsl 1]
+ add x_\code_len,\hufftables,LIT_TABLE_SIZES_OFFSET
+ ldrb w_\code_len,[x_\code_len,x_\lit]
+.endm
+
+.macro get_dist_code hufftables:req,dist:req,code:req,code_len:req,tmp0:req,tmp1:req,tmp2:req
+ cmp dist,DIST_TABLE_SIZE
+ bhi _compute_dist_code
+ add x_\tmp0,\hufftables,DIST_TABLE_OFFSET
+ ldr w_\code_len,[x_\tmp0,x_\dist,lsl 2]
+ lsr w_\code, w_\code_len , 5
+ and x_\code_len,x_\code_len,0x1f
+ b _end_get_dist_code
+_compute_dist_code:
+ and w_\dist,w_\dist,0xffff
+ sub w_\dist,w_\dist,1
+ clz w_\tmp0,w_\dist
+ mov w_\tmp1,30
+ sub w_\tmp0,w_\tmp1,w_\tmp0 //tmp0== num_extra_bists
+ mov w_\tmp1,1
+ lsl w_\tmp1,w_\tmp1,w_\tmp0
+ sub w_\tmp1,w_\tmp1,1
+ and w_\tmp1,w_\tmp1,w_\dist //tmp1=extra_bits
+ asr w_\dist,w_\dist,w_\tmp0
+ lsl w_\tmp2,w_\tmp0,1
+ add w_\tmp2,w_\dist,w_\tmp2 //tmp2=sym
+
+ add x_\code,\hufftables,DCODE_TABLE_OFFSET - IGZIP_DECODE_OFFSET*2
+ add x_\code_len,\hufftables,DCODE_TABLE_SIZE_OFFSET - IGZIP_DECODE_OFFSET
+ ldrh w_\code,[x_\code,x_\tmp2,lsl 1]
+ ldrb w_\code_len,[x_\code_len,x_\tmp2]
+ lsl w_\tmp1,w_\tmp1,w_\code_len
+ orr w_\code,w_\code,w_\tmp1
+ add w_\code_len,w_\code_len,w_\tmp0
+
+ //compute_dist_code
+_end_get_dist_code:
+.endm
+
+
+.macro compare_258_bytes str0:req,str1:req,match_length:req,tmp0:req,tmp1:req
+ mov x_\match_length,0
+_compare_258_loop:
+ ldr x_\tmp0,[x_\str0,x_\match_length]
+ ldr x_\tmp1,[x_\str1,x_\match_length]
+ eor x_\tmp0,x_\tmp1,x_\tmp0
+ rbit x_\tmp0,x_\tmp0
+ clz x_\tmp0,x_\tmp0
+ lsr x_\tmp0,x_\tmp0,3
+ add x_\match_length,x_\match_length,x_\tmp0
+
+
+ cmp x_\match_length,257
+ ccmp x_\tmp0,8,0,ls
+ beq _compare_258_loop
+
+ cmp x_\match_length,258
+ mov x_\tmp1,258
+ csel x_\match_length,x_\match_length,x_\tmp1,ls
+.endm
+
+.macro compare_max_258_bytes str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
+ mov x_\match_length,0
+ mov x_\tmp0,258
+ cmp x_\max_length,x_\tmp0
+ csel x_\max_length,x_\max_length,x_\tmp0,ls
+_compare_258_loop:
+ ldr x_\tmp0,[x_\str0,x_\match_length]
+ ldr x_\tmp1,[x_\str1,x_\match_length]
+ eor x_\tmp0,x_\tmp1,x_\tmp0
+ rbit x_\tmp0,x_\tmp0
+ clz x_\tmp0,x_\tmp0
+ lsr x_\tmp0,x_\tmp0,3
+ add x_\match_length,x_\match_length,x_\tmp0
+
+
+ cmp x_\max_length,x_\match_length
+ ccmp x_\tmp0,8,0,hi
+ beq _compare_258_loop
+
+ cmp x_\match_length,x_\max_length
+ csel x_\match_length,x_\match_length,x_\max_length,ls
+.endm
+
+.macro compare_aarch64 str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req
+ mov x_\match_length,0
+_compare_loop:
+ ldr x_\tmp0,[x_\str0,x_\match_length]
+ ldr x_\tmp1,[x_\str1,x_\match_length]
+ eor x_\tmp0,x_\tmp1,x_\tmp0
+ rbit x_\tmp0,x_\tmp0
+ clz x_\tmp0,x_\tmp0
+ lsr x_\tmp0,x_\tmp0,3
+ add x_\match_length,x_\match_length,x_\tmp0
+
+ cmp x_\max_length,x_\match_length
+ ccmp x_\tmp0,8,0,hi
+ beq _compare_loop
+
+ cmp x_\match_length,x_\max_length
+ csel x_\match_length,x_\match_length,x_\max_length,ls
+.endm
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S b/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S
new file mode 100644
index 000000000..46847d344
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S
@@ -0,0 +1,689 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+#define ENABLE_TBL_INSTRUCTION 1
+
+#define FIELD(name,size,align) \
+ .set _FIELD_OFFSET,(_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)); \
+ .equ name,_FIELD_OFFSET ; \
+ .set _FIELD_OFFSET,_FIELD_OFFSET + size; \
+ .if align > _STRUCT_ALIGN; \
+ .set _STRUCT_ALIGN, align; \
+ .endif;
+
+#define START_STRUCT(name) .set _FIELD_OFFSET,0;.set _STRUCT_ALIGN,0;
+
+#define END_STRUCT(name) .set _##name##_size , _FIELD_OFFSET;\
+ .set _##name##_align,_STRUCT_ALIGN
+
+#define CONST(name,value) .equ name,value
+
+#define ISAL_DECODE_LONG_BITS 12
+#define ISAL_DECODE_SHORT_BITS 10
+
+#define L_REM (21 - ISAL_DECODE_LONG_BITS)
+#define S_REM (15 - ISAL_DECODE_SHORT_BITS)
+#define L_DUP ((1 << L_REM) - (L_REM + 1))
+#define S_DUP ((1 << S_REM) - (S_REM + 1))
+#define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1)
+#define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1)
+#define L_SIZE (286 + L_DUP + L_UNUSED)
+#define S_SIZE (30 + S_DUP + S_UNUSED)
+#define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf))
+#define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf))
+#define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf))
+#define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf))
+#define LARGE_SHORT_CODE_SIZE 4
+#define LARGE_LONG_CODE_SIZE 2
+#define SMALL_SHORT_CODE_SIZE 2
+#define SMALL_LONG_CODE_SIZE 2
+
+
+// inflate_huff_code
+START_STRUCT( inflate_huff_code_large )
+ // name size align
+ FIELD ( _short_code_lookup_large, LARGE_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE )
+ FIELD ( _long_code_lookup_large, LARGE_LONG_CODE_SIZE*MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE )
+END_STRUCT(inflate_huff_code_large)
+
+// inflate_huff_code
+START_STRUCT( inflate_huff_code_small )
+ // name size align
+ FIELD ( _short_code_lookup_small, SMALL_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE )
+ FIELD ( _long_code_lookup_small, SMALL_LONG_CODE_SIZE*MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE )
+END_STRUCT(inflate_huff_code_small)
+
+
+// inflate_state
+START_STRUCT( inflate_state )
+ // name size align
+ FIELD ( _next_out, 8, 8 )
+ FIELD ( _avail_out, 4, 4 )
+ FIELD ( _total_out, 4, 4 )
+ FIELD ( _next_in, 8, 8 )
+ FIELD ( _read_in, 8, 8 )
+ FIELD ( _avail_in, 4, 4 )
+ FIELD ( _read_in_length, 4, 4 )
+ FIELD ( _lit_huff_code, _inflate_huff_code_large_size, _inflate_huff_code_large_align )
+ FIELD ( _dist_huff_code, _inflate_huff_code_small_size, _inflate_huff_code_small_align )
+ FIELD ( _block_state, 4, 4 )
+ FIELD ( _dict_length, 4, 4 )
+ FIELD ( _bfinal, 4, 4 )
+ FIELD ( _crc_flag, 4, 4 )
+ FIELD ( _crc, 4, 4 )
+ FIELD ( _hist_bits, 4, 4 )
+ FIELD ( _type0_block_len, 4, 4 )
+ FIELD ( _write_overflow_lits, 4, 4 )
+ FIELD ( _write_overflow_len, 4, 4 )
+ FIELD ( _copy_overflow_len, 4, 4 )
+ FIELD ( _copy_overflow_dist, 4, 4 )
+END_STRUCT(inflate_state)
+
+CONST( _lit_huff_code_short_code_lookup , _lit_huff_code+_short_code_lookup_large )
+CONST( _lit_huff_code_long_code_lookup , _lit_huff_code+_long_code_lookup_large )
+CONST( _dist_huff_code_short_code_lookup , _dist_huff_code+_short_code_lookup_small )
+CONST( _dist_huff_code_long_code_lookup , _dist_huff_code+_long_code_lookup_small )
+CONST( ISAL_BLOCK_NEW_HDR , 0 )
+CONST( ISAL_BLOCK_HDR , 1 )
+CONST( ISAL_BLOCK_TYPE0 , 2 )
+CONST( ISAL_BLOCK_CODED , 3 )
+CONST( ISAL_BLOCK_INPUT_DONE , 4 )
+CONST( ISAL_BLOCK_FINISH , 5 )
+
+/* Inflate Return values */
+#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */
+#define ISAL_END_INPUT 1 /* End of input reached */
+#define ISAL_OUT_OVERFLOW 2 /* End of output reached */
+#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */
+#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */
+#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */
+#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */
+#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */
+#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */
+#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */
+#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */
+#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */
+#define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */
+
+
+#define ISAL_DEF_MAX_CODE_LEN 15
+#define LARGE_SHORT_SYM_LEN 25
+#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1)
+#define LARGE_LONG_SYM_LEN 10
+#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1)
+#define LARGE_SHORT_CODE_LEN_OFFSET 28
+#define LARGE_LONG_CODE_LEN_OFFSET 10
+#define LARGE_FLAG_BIT_OFFSET 25
+#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET)
+#define LARGE_SYM_COUNT_OFFSET 26
+#define LARGE_SYM_COUNT_LEN 2
+#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1)
+#define LARGE_SHORT_MAX_LEN_OFFSET 26
+
+#define SMALL_SHORT_SYM_LEN 9
+#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1)
+#define SMALL_LONG_SYM_LEN 9
+#define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1)
+#define SMALL_SHORT_CODE_LEN_OFFSET 11
+#define SMALL_LONG_CODE_LEN_OFFSET 10
+#define SMALL_FLAG_BIT_OFFSET 10
+#define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET)
+
+#define DIST_SYM_OFFSET 0
+#define DIST_SYM_LEN 5
+#define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1)
+#define DIST_SYM_EXTRA_OFFSET 5
+#define DIST_SYM_EXTRA_LEN 4
+#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1)
+
+#define MAX_LIT_LEN_CODE_LEN 21
+#define MAX_LIT_LEN_COUNT (MAX_LIT_LEN_CODE_LEN + 2)
+#define MAX_LIT_LEN_SYM 512
+#define LIT_LEN_ELEMS 514
+
+#define INVALID_SYMBOL 0x1FFF
+#define INVALID_CODE 0xFFFFFF
+
+#define MIN_DEF_MATCH 3
+
+#define TRIPLE_SYM_FLAG 0
+#define DOUBLE_SYM_FLAG TRIPLE_SYM_FLAG + 1
+#define SINGLE_SYM_FLAG DOUBLE_SYM_FLAG + 1
+#define DEFAULT_SYM_FLAG TRIPLE_SYM_FLAG
+
+#define SINGLE_SYM_THRESH (2 * 1024)
+#define DOUBLE_SYM_THRESH (4 * 1024)
+
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+
+.macro inflate_in_load_read_byte
+ cmp read_in_length,56
+ bgt 1f
+ cbz avail_in,1f
+ ldrb w_temp,[next_in],1
+ sub avail_in,avail_in,1
+ lsl temp,temp,x_read_in_length
+ orr read_in,read_in,temp
+ add read_in_length,read_in_length,8
+ uxtw read_in_length,read_in_length
+
+.endm
+
+.macro inflate_in_load
+
+ cmp read_in_length, 63
+ bgt 1f
+
+ /*if (state->avail_in >= 8) */
+ cmp avail_in, 7
+ bhi 2f
+
+ // loop max 7 times
+ // while (state->read_in_length < 57 && state->avail_in > 0)
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ inflate_in_load_read_byte
+ b 1f
+2:
+ add new_bytes,read_in_length,7
+ mov w_temp,8
+ lsr new_bytes,new_bytes,3
+ sub new_bytes,w_temp,new_bytes
+ ldr temp,[next_in]
+ lsl temp,temp,x_read_in_length
+ orr read_in,read_in,temp
+ add next_in,next_in,new_bytes,uxtb
+ add read_in_length,read_in_length,new_bytes,lsl 3
+ sub avail_in,avail_in,new_bytes
+
+1:
+.endm
+
+.macro copy_word
+ sub repeat_length,repeat_length,#4
+ ldr w_arg0, [arg1],4
+ cmp repeat_length, 3
+ str w_arg0, [next_out],4
+ bls load_byte_less_than_4
+.endm
+
+
+ .global decode_huffman_code_block_stateless_aarch64
+ .type decode_huffman_code_block_stateless_aarch64, %function
+/*
+ void decode_huffman_code_block_stateless_aarch64(
+ struct inflate_state *state,
+ uint8_t * start_out)
+*/
+ declare_generic_reg arg0 0, x
+ declare_generic_reg arg1 1, x
+ declare_generic_reg arg2 2, x
+
+ declare_generic_reg state, 11,x
+ declare_generic_reg start_out, 18,x
+
+ declare_generic_reg read_in, 3,x
+ declare_generic_reg read_in_length, 4,w
+ declare_generic_reg sym_count, 5,w
+ declare_generic_reg next_bits, 6,w
+ declare_generic_reg next_lits, 6,w
+ declare_generic_reg avail_in, 20,w
+ declare_generic_reg next_in, 23,x
+
+ declare_generic_reg temp, 16,x //local temp variable
+ declare_generic_reg new_bytes, 7,w //temp variable
+ declare_generic_reg copy_overflow_length, 28,w
+
+
+
+ declare_generic_reg block_state, 8,w
+ declare_generic_reg block_state_adr,9,x
+ declare_generic_reg look_back_dist, 10,w
+ declare_generic_reg bfinal, 22,x
+
+ declare_generic_reg next_out, 12,x
+ declare_generic_reg avail_out, 13,w
+ declare_generic_reg total_out, 14,w
+
+ declare_generic_reg rfc_table, 15,x
+ declare_generic_reg next_sym, 17,w
+ declare_generic_reg next_dist, 17,w
+ declare_generic_reg bit_count, 19,w
+
+ declare_generic_reg bit_mask, 21,w
+ declare_generic_reg next_lit, 24,w
+ declare_generic_reg write_overflow_len,25,w
+ declare_generic_reg write_overflow_lits,26,w
+ declare_generic_reg repeat_length,27,w
+
+decode_huffman_code_block_stateless_aarch64:
+ //save registers
+ push_stack
+
+ //load variables
+ mov state,arg0
+ mov block_state,_block_state
+ mov start_out,arg1
+ add block_state_adr,state,block_state,uxtw
+ ldr block_state, [block_state_adr]
+ ldr bfinal, [block_state_adr,_bfinal-_block_state]
+
+ ldr next_out, [state]
+ ldp avail_out,total_out,[state,_avail_out]
+ ldp next_in, read_in, [state,_next_in]
+ ldp avail_in, read_in_length, [state,_avail_in]
+ ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+
+ //init rfc_table
+ adrp rfc_table,rfc_lookup_table
+ add rfc_table,rfc_table,:lo12:rfc_lookup_table
+#if ENABLE_TBL_INSTRUCTION
+ ld1 {v1.16b,v2.16b,v3.16b},[rfc_table]
+ add rfc_table,rfc_table,48
+ ld1 {v4.16b-v7.16b},[rfc_table]
+
+#endif
+
+ /*
+ state->copy_overflow_length = 0;
+ state->copy_overflow_distance = 0;
+ */
+ mov x_copy_overflow_length,xzr
+ str xzr,[block_state_adr,_copy_overflow_len-_block_state]
+
+ /* while (state->block_state == ISAL_BLOCK_CODED) */
+block_state_loop:
+ cmp block_state ,ISAL_BLOCK_CODED
+ bne exit_func_success
+
+ inflate_in_load
+
+ /* save state here */
+ str next_out, [state]
+ stp avail_out,total_out,[state,_avail_out]
+ stp next_in, read_in, [state,_next_in]
+ stp avail_in, read_in_length, [state,_avail_in]
+ stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+
+ /*
+ decode_next_lit_len(&next_lits, &sym_count,
+ state, &state->lit_huff_code,
+ &temp_dat, &temp_bytes);
+ */
+ cmp read_in_length,ISAL_DEF_MAX_CODE_LEN
+ ble inflate_in_load_decode
+decode_next_lit_len_start:
+ and x_next_bits,read_in,((1 << ISAL_DECODE_LONG_BITS) - 1)
+ /*next_sym = huff_code->short_code_lookup[next_bits];*/
+ add next_bits,next_bits,_lit_huff_code>>2
+ ldr next_sym,[state,x_next_bits,lsl 2]
+ /*if ((next_sym & LARGE_FLAG_BIT) == 0) {*/
+ tbnz next_sym,LARGE_FLAG_BIT_OFFSET,long_code_lookup_routine
+ lsr bit_count,next_sym,LARGE_SHORT_CODE_LEN_OFFSET
+ sub read_in_length,read_in_length,bit_count
+ lsr read_in,read_in,x_bit_count
+ mov temp,0x1fff
+ cmp bit_count,0
+ csel next_sym,next_sym,w_temp,ne
+ ubfx sym_count,next_sym,LARGE_SYM_COUNT_OFFSET,LARGE_SYM_COUNT_LEN
+ and next_lits,next_sym,LARGE_SHORT_SYM_MASK
+ b decode_next_lit_len_end
+long_code_lookup_routine:
+ lsr bit_mask,next_sym,LARGE_SHORT_MAX_LEN_OFFSET
+ mov sym_count,1
+ and next_sym,next_sym,LARGE_SHORT_SYM_MASK
+ mov temp,1023
+ lsl bit_mask,sym_count,bit_mask
+ sub bit_mask,bit_mask,1
+ and x_next_bits,read_in,x_bit_mask
+ add next_bits,next_sym,next_bits,lsr ISAL_DECODE_LONG_BITS
+ mov next_sym,(_lit_huff_code+_long_code_lookup_large)>>1
+ add next_bits,next_bits,next_sym
+ ldrh next_sym,[state,x_next_bits,lsl 1]
+ lsr bit_count,next_sym,10
+ sub read_in_length,read_in_length,bit_count
+ and next_lits,next_sym,w_temp
+ lsr read_in,read_in,x_bit_count
+ cmp bit_count,0
+ csel next_lits,next_lits,w_temp,ne
+decode_next_lit_len_end:
+
+ /* if (sym_count == 0) */
+ cbz sym_count,invalid_symbol
+ tbnz read_in_length,31, end_input
+
+ /* while (sym_count > 0) start */
+sym_count_loop:
+ and next_lit,next_lits , 0xffff
+
+ /*if (next_lit < 256 || sym_count > 1) {*/
+ cmp next_lit,255
+ ccmp sym_count,1,0,hi
+ beq next_lit_256
+
+ /* if (state->avail_out < 1) { */
+ cbnz avail_out,sym_count_adjust
+
+ mov write_overflow_len,sym_count
+ lsl sym_count,sym_count,3
+ mov write_overflow_lits,next_lits
+ sub sym_count,sym_count,8
+ lsr next_lits,next_lits,sym_count
+ mov sym_count,1
+ cmp next_lits,255
+ bls isal_out_overflow
+ cmp next_lits,256
+ sub write_overflow_len,write_overflow_len,1
+ beq isal_out_overflow_1
+ b sym_count_loop
+
+sym_count_adjust:
+ /*
+ while (sym_count > 0) end
+ next_lits >>= 8;
+ sym_count--;
+ */
+ subs sym_count,sym_count,1
+ lsr next_lits,next_lits,8
+ strb next_lit,[next_out],1
+ sub avail_out,avail_out,1
+ add total_out,total_out,1
+ bne sym_count_loop
+ b block_state_loop
+
+next_lit_256:
+ /* if (next_lit == 256) { */
+ cmp next_lit,256
+ beq next_lit_eq_256
+
+
+ /*
+ if (next_lit <= MAX_LIT_LEN_SYM)
+ sym_count must be 1
+ */
+ cmp next_lit,MAX_LIT_LEN_SYM
+ bhi invalid_symbol
+ sub repeat_length,next_lit,254
+ /*
+ next_dist =
+ decode_next_dist(state, &state->dist_huff_code, &temp_dat,
+ &temp_bytes);
+ */
+ cmp read_in_length,ISAL_DEF_MAX_CODE_LEN
+ ble inflate_in_load_decode_next_dist
+decode_next_dist_start:
+ and x_next_bits,read_in,((1 << ISAL_DECODE_SHORT_BITS) - 1)
+ mov next_sym,_dist_huff_code>>1
+ add next_bits,next_bits,next_sym
+ ldrh next_sym, [state,x_next_bits,lsl 1]
+ tbz next_sym,SMALL_FLAG_BIT_OFFSET,decode_next_dist_flag
+ sub bit_mask,next_sym,SMALL_FLAG_BIT
+ mov temp,1
+ asr bit_mask,bit_mask,SMALL_SHORT_CODE_LEN_OFFSET
+ and next_sym,next_sym,SMALL_SHORT_SYM_MASK
+ lsl bit_mask,w_temp,bit_mask
+ sub bit_mask,bit_mask,1
+ and x_next_bits,read_in,x_bit_mask
+ add next_bits,next_sym,next_bits,lsr ISAL_DECODE_SHORT_BITS
+ mov next_sym,(_dist_huff_code + _long_code_lookup_small)>>1
+ add next_bits,next_bits,next_sym
+ ldrh next_sym,[state,x_next_bits,lsl 1]
+ lsr bit_count,next_sym,SMALL_LONG_CODE_LEN_OFFSET
+ b decode_next_dist_adjust
+decode_next_dist_flag:
+ lsr bit_count,next_sym,SMALL_SHORT_CODE_LEN_OFFSET
+decode_next_dist_adjust:
+ sub read_in_length,read_in_length,bit_count
+ lsr read_in,read_in,x_bit_count
+ cbnz bit_count,decode_next_dist_end
+ sub read_in_length,read_in_length,next_sym
+ mov next_sym,INVALID_SYMBOL
+decode_next_dist_end:
+ and next_sym,next_sym,DIST_SYM_MASK
+
+ tbnz read_in_length,31,end_input_1
+ cmp next_dist,29
+ bhi invalid_symbol
+
+
+#if ENABLE_TBL_INSTRUCTION
+ ins v0.b[0],next_dist
+ tbl v0.8b,{v2.16b,v3.16b},v0.8b
+ umov bit_count,v0.b[0]
+#else
+ ldrb bit_count,[rfc_table,next_dist,sxtw]
+#endif
+
+ /*inflate_in_read_bits(state,
+ dist_extra_bit_count, &temp_dat,
+ &temp_bytes);
+ */
+ inflate_in_load
+ mov temp,1
+ lsl temp,temp,x_bit_count
+ sub read_in_length,read_in_length,bit_count
+ sub temp,temp,1
+ and x_look_back_dist,temp,read_in
+ lsr read_in,read_in,x_bit_count
+#if ENABLE_TBL_INSTRUCTION
+ dup v0.8b,next_dist
+ add v0.8b,v1.8b,v0.8b
+ tbl v0.8b,{v4.16b-v7.16b},v0.8b
+ umov next_dist,v0.h[0]
+#else
+ add next_dist,next_dist,16
+ ldrh next_dist,[rfc_table,x_next_dist,lsl 1]
+#endif
+ add look_back_dist,look_back_dist,next_dist
+
+ /*
+ if (state->read_in_length < 0) {
+ */
+ tbnz read_in_length,31,end_input_1
+
+ /*
+ if (state->next_out - look_back_dist < start_out) {
+ */
+ sub temp,next_out,x_look_back_dist
+ cmp temp,start_out
+ bcc isal_invalid_lookback
+ /*
+ if (state->avail_out < repeat_length) {
+ */
+ cmp avail_out , repeat_length
+ bcs decompress_data_start
+ sub copy_overflow_length,repeat_length,avail_out
+ stp copy_overflow_length,look_back_dist,[block_state_adr,_copy_overflow_len-_block_state]
+ mov repeat_length,avail_out
+
+decompress_data_start:
+ add total_out,total_out,repeat_length
+ sub avail_out,avail_out,repeat_length
+ sub arg1,next_out,x_look_back_dist
+ #if 1
+ cmp look_back_dist,repeat_length
+ bls byte_copy_start
+ #else
+ b byte_copy_start
+ #endif
+
+
+ cbz repeat_length,decompress_data_end
+ cmp repeat_length, 3
+ bls load_byte_less_than_4 //0.5% will jump
+load_byte_4:
+ sub repeat_length, repeat_length, #4
+ ldr w_arg0, [arg1],4
+ cmp repeat_length, 3
+ str w_arg0, [next_out],4
+ bls load_byte_less_than_4
+ .rept 62
+ copy_word
+ .endr
+ sub repeat_length, repeat_length, #4
+ ldr w_arg0, [arg1],4
+ cmp repeat_length, 4
+ str w_arg0, [next_out],4
+ bge load_byte_4
+load_byte_less_than_4:
+ tbz repeat_length,0,load_byte_2
+ ldrb w_arg0, [arg1],1
+ sub repeat_length, repeat_length, #1
+ strb w_arg0, [next_out],1
+load_byte_2:
+ tbz repeat_length,1,decompress_data_end
+ ldrh w_arg0, [arg1],2
+ strh w_arg0, [next_out],2
+decompress_data_end:
+
+
+
+ /*
+ if (state->copy_overflow_length > 0)
+ */
+ cmp copy_overflow_length,0
+ bgt isal_out_overflow
+ b block_state_loop
+next_lit_eq_256:
+ /*
+ state->block_state = state->bfinal ?
+ ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR;
+ */
+ mov block_state, ISAL_BLOCK_INPUT_DONE
+ cmp w_bfinal,0
+ csel block_state, block_state, w_bfinal, ne
+ str block_state, [block_state_adr]
+
+ b block_state_loop
+exit_func_success:
+ mov w0 , 0
+exit_func:
+ str next_out, [state]
+ stp avail_out,total_out,[state,_avail_out]
+ stp next_in, read_in, [state,_next_in]
+ stp avail_in, read_in_length, [state,_avail_in]
+ stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+
+ pop_stack
+ ret
+end_input_1:
+end_input:
+ mov w0,ISAL_END_INPUT
+ pop_stack
+ ret
+
+invalid_symbol:
+ /*
+ below variable was changed
+ */
+ str next_out, [state]
+ stp avail_out,total_out,[state,_avail_out]
+ stp next_in, read_in, [state,_next_in]
+ stp avail_in, read_in_length, [state,_avail_in]
+ stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state]
+ mov w0, ISAL_INVALID_SYMBOL
+ b exit_func
+isal_out_overflow_1:
+
+ cmp bfinal,0
+ mov block_state, ISAL_BLOCK_INPUT_DONE
+ csel block_state, block_state, wzr, ne
+ str block_state, [block_state_adr]
+isal_out_overflow:
+ mov w0, ISAL_OUT_OVERFLOW
+
+ b exit_func
+isal_invalid_lookback:
+ mov w0, ISAL_INVALID_LOOKBACK
+ b exit_func
+inflate_in_load_decode:
+ inflate_in_load
+ b decode_next_lit_len_start
+inflate_in_load_decode_next_dist:
+ inflate_in_load
+ b decode_next_dist_start
+byte_copy_start:
+ add arg2,next_out,x_repeat_length
+ cmp arg2, next_out
+ beq decompress_data_end
+ sub arg2,arg2,1
+byte_copy_loop:
+ ldrb w_arg0, [arg1] , 1
+ cmp arg2, next_out
+ strb w_arg0, [next_out],1
+ bne byte_copy_loop
+ b decompress_data_end
+ .size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64
+
+ .type rfc_lookup_table, %object
+
+rfc_lookup_table:
+#if ENABLE_TBL_INSTRUCTION
+ .byte 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
+ .zero 8
+#endif
+ //dist_extra_bit_count
+ .byte 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02
+ .byte 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06
+ .byte 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a
+ .byte 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00
+ //dist_start
+#if ENABLE_TBL_INSTRUCTION
+ .byte 0x01,0x02,0x03,0x04,0x05,0x07,0x09,0x0d,0x11,0x19,0x21,0x31,0x41,0x61,0x81,0xc1
+ .byte 0x01,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00
+ .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00
+ .byte 0x01,0x01,0x02,0x03,0x04,0x06,0x08,0x0c,0x10,0x18,0x20,0x30,0x40,0x60,0x00,0x00
+#else
+ .short 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ .short 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ .short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ .short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
+#endif
+ .size rfc_lookup_table, . - rfc_lookup_table
diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S
new file mode 100644
index 000000000..254f74c61
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S
@@ -0,0 +1,261 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
+ m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
+
+ //m_out_buf=bytes_written
+ sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
+ cmp next_in,start_in
+ bls skip_has_hist
+ mov w_\tmp0,1
+ strb w_\tmp0,[x_\stream,_internal_state_has_hist]
+skip_has_hist:
+ ldr w_\tmp0,[\stream,_total_in]
+ ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
+
+ str x_\next_in,[\stream,_next_in]
+ sub x_\start_in,x_\next_in,x_\start_in
+ sub x_\end_in,x_\end_in,x_\next_in
+ add w_\tmp0,w_\tmp0,w_\start_in
+ stp w_\end_in,w_\tmp0,[\stream,_avail_in]
+ //next_in=avail_out,start_in=total_out
+ ldp w_\next_in,w_\start_in,[\stream,_avail_out]
+ add x_\m_out_start,x_\m_out_start,x_\m_out_buf
+ str x_\m_out_start,[\stream,_next_out]
+ add w_\start_in,w_\start_in,w_\m_out_buf
+ sub w_\next_in,w_\next_in,w_\m_out_buf
+ stp w_\next_in,w_\start_in,[\stream,_avail_out]
+.endm
+
+
+ .global isal_deflate_body_aarch64
+ .type isal_deflate_body_aarch64, %function
+/*
+ void isal_deflate_body_aarch64(struct isal_zstream *stream)
+*/
+ declare_generic_reg stream, 0,x //struct isal_zstream *stream
+ declare_generic_reg state, 8,x //&stream->state
+ declare_generic_reg avail_in, 9,w
+ declare_generic_reg end_of_stream, 10,w //can be used in loop
+
+ declare_generic_reg hash_mask, 11,w
+ declare_generic_reg match_length, 12,w
+ declare_generic_reg hufftables, 13,x
+
+ declare_generic_reg m_out_buf, 14,x
+ declare_generic_reg m_out_start, 15,x
+ declare_generic_reg m_out_end, 16,x
+ declare_generic_reg m_bits, 17,x
+ declare_generic_reg m_bit_count, 18,w
+
+ declare_generic_reg start_in, 19,x
+ declare_generic_reg end_in, 20,x
+ declare_generic_reg next_in, 21,x
+ declare_generic_reg loop_end_cnt, 22,x
+
+ declare_generic_reg literal, 23,w
+ declare_generic_reg hash, 24,w
+ declare_generic_reg dist, 25,w
+
+ declare_generic_reg last_seen, 26,x
+ declare_generic_reg file_start, 27,x
+ declare_generic_reg hist_size, 28,w
+
+ declare_generic_reg tmp0, 5 ,w
+ declare_generic_reg tmp1, 6 ,w
+ declare_generic_reg tmp2, 7 ,w
+
+ declare_generic_reg code, 3,x
+ declare_generic_reg code_len, 24,x
+ declare_generic_reg code2, 10,x
+ declare_generic_reg code_len2, 4,x
+
+
+isal_deflate_body_aarch64:
+ //save registers
+ push_stack
+ ldr avail_in, [stream, _avail_in]
+ cbz avail_in, exit_save_state
+
+ // set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+ ldr w_m_out_end,[stream,_avail_out]
+ ldr m_out_buf,[stream,_next_out]
+ add m_out_end,m_out_buf,w_m_out_end,uxtw
+ sub m_out_end,m_out_end , 8
+ mov m_out_start,m_out_buf
+ stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
+ str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
+ ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
+ ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
+
+
+ //init variables
+ //last_seen=&stream.internal_state.head = _internal_state+_head
+ add last_seen,stream,65536
+ add last_seen,last_seen,_internal_state+_head -65536
+
+
+ //start_in=stream->next_in;next_in=start_in
+ ldr start_in,[stream,_next_in]
+ mov next_in,start_in
+ add end_in,start_in,avail_in,uxtw //avail_in reg is free now
+ sub loop_end_cnt,end_in,289 //loop end
+ cmp next_in,loop_end_cnt
+
+
+ //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ ldr w_file_start,[stream,_total_in]
+ sub file_start,next_in,w_file_start,uxtw
+
+ //uint32_t hist_size = state->dist_mask;
+ ldr hist_size,[stream,_internal_state + _dist_mask]
+
+ //uint32_t hash_mask = state->hash_mask;
+ ldr hash_mask,[stream,_internal_state + _hash_mask]
+
+ ldr hufftables,[stream,_hufftables]
+
+ bhi main_loop_end
+main_loop_start:
+ //is_full(&state->bitbuf)
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+
+ ldr literal,[next_in]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+
+ ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
+ sub x_dist,next_in,file_start
+ //last_seen[hash] = (uint64_t) (next_in - file_start);
+ strh dist,[last_seen,x_hash,lsl 1]
+ sub dist,dist,w_tmp0
+ and dist,dist,0xffff
+
+ sub w_tmp0,dist,1
+ cmp hist_size,w_tmp0
+ bls get_lit_code
+
+ ///match_length = compare258(next_in - dist, next_in, 258);
+ sub x_tmp2,next_in,x_dist
+ compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1
+ cmp match_length,3
+ bls get_lit_code
+
+ sub x_tmp0,next_in,file_start
+ ldr literal,[next_in,1]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+ add tmp0,tmp0,1
+ strh tmp0,[last_seen,x_hash,lsl 1]
+ //call_print_b hash,dist,last_seen
+
+ ldr literal,[next_in,2]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+ add tmp0,tmp0,1
+ strh tmp0,[last_seen,x_hash,lsl 1]
+
+ //get_len_code(stream->hufftables, match_length, &code,
+ // &code_len);
+ get_len_code hufftables,match_length,code,code_len,tmp0
+
+ //get_dist_code(stream->hufftables, dist, &code2, &code_len2);
+ get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
+
+ //code |= code2 << code_len;
+ //code_len += code_len2;
+ lsl code2,code2,code_len
+ orr code,code,code2
+ add code_len,code_len,code_len2
+
+ //next_in += match_length;
+ add next_in,next_in,match_length,uxtw
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+
+
+
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+ b main_loop_end
+get_lit_code:
+ //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ and literal,literal,0xff
+ get_lit_code hufftables,literal,code,code_len
+
+ //next_in++;
+ add next_in,next_in,1
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+
+main_loop_end:
+ //update state here
+
+ //load end_of_stream and flush together
+ ldr w_end_of_stream, [stream, _end_of_stream]
+ //(stream->end_of_stream || stream->flush != 0)
+ cbz w_end_of_stream, update_state_exit
+ mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
+ str w_tmp0, [stream, _internal_state+_state]
+update_state_exit:
+ update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
+exit_ret:
+ pop_stack
+ ret
+exit_save_state:
+ ldr w_end_of_stream, [stream, _end_of_stream]
+ cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0)
+ mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER
+ str w_tmp0, [stream, _internal_state+_state]
+ b exit_ret
+ .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64
diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S
new file mode 100644
index 000000000..e5842b5bc
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S
@@ -0,0 +1,264 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \
+ m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req
+
+ //m_out_buf=bytes_written
+ sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start
+ cmp next_in,start_in
+ bls skip_has_hist
+ mov w_\tmp0,1
+ strb w_\tmp0,[x_\stream,_internal_state_has_hist]
+skip_has_hist:
+ ldr w_\tmp0,[\stream,_total_in]
+ ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out
+
+ str x_\next_in,[\stream,_next_in]
+ sub x_\start_in,x_\next_in,x_\start_in
+ sub x_\end_in,x_\end_in,x_\next_in
+ add w_\tmp0,w_\tmp0,w_\start_in
+ stp w_\end_in,w_\tmp0,[\stream,_avail_in]
+ //next_in=avail_out,start_in=total_out
+ ldp w_\next_in,w_\start_in,[\stream,_avail_out]
+ add x_\m_out_start,x_\m_out_start,x_\m_out_buf
+ str x_\m_out_start,[\stream,_next_out]
+ add w_\start_in,w_\start_in,w_\m_out_buf
+ sub w_\next_in,w_\next_in,w_\m_out_buf
+ stp w_\next_in,w_\start_in,[\stream,_avail_out]
+.endm
+ .global isal_deflate_finish_aarch64
+ .arch armv8-a+crc
+ .type isal_deflate_finish_aarch64, %function
+/*
+ void isal_deflate_finish_aarch64(struct isal_zstream *stream)
+*/
+ declare_generic_reg stream, 0,x //struct isal_zstream *stream
+ declare_generic_reg state, 8,x //&stream->state
+ declare_generic_reg avail_in, 9,w
+ declare_generic_reg end_of_stream, 10,w //can be used in loop
+
+ declare_generic_reg hash_mask, 11,w
+ declare_generic_reg match_length, 12,w
+ declare_generic_reg hufftables, 13,x
+
+ declare_generic_reg m_out_buf, 14,x
+ declare_generic_reg m_out_start, 15,x
+ declare_generic_reg m_out_end, 16,x
+ declare_generic_reg m_bits, 17,x
+ declare_generic_reg m_bit_count, 18,w
+
+ declare_generic_reg start_in, 19,x
+ declare_generic_reg end_in, 20,x
+ declare_generic_reg next_in, 21,x
+ declare_generic_reg loop_end_cnt, 22,x
+
+ declare_generic_reg literal, 23,w
+ declare_generic_reg hash, 24,w
+ declare_generic_reg dist, 25,w
+
+ declare_generic_reg last_seen, 26,x
+ declare_generic_reg file_start, 27,x
+ declare_generic_reg hist_size, 28,w
+
+ declare_generic_reg tmp0, 5 ,w
+ declare_generic_reg tmp1, 6 ,w
+ declare_generic_reg tmp2, 7 ,w
+
+ declare_generic_reg code, 3,x
+ declare_generic_reg code_len, 24,x
+ declare_generic_reg code2, 10,x
+ declare_generic_reg code_len2, 4,x
+
+
+isal_deflate_finish_aarch64:
+ //save registers
+ push_stack
+
+ // set_buf(&state->bitbuf, stream->next_out, stream->avail_out);
+ ldr w_m_out_end,[stream,_avail_out]
+ ldr m_out_buf,[stream,_next_out]
+ add m_out_end,m_out_buf,w_m_out_end,uxtw
+ sub m_out_end,m_out_end , 8
+ mov m_out_start,m_out_buf
+ stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf]
+ str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start]
+ ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count]
+ ldr m_bits ,[stream,_internal_state_bitbuf_m_bits]
+
+ //init variables
+ //last_seen=&stream.internal_state.head = _internal_state+_head
+ add last_seen,stream,65536
+ add last_seen,last_seen,_internal_state+_head -65536
+
+
+ //start_in=stream->next_in;next_in=start_in
+ ldr avail_in, [stream, _avail_in]
+ ldr start_in,[stream,_next_in]
+ mov next_in,start_in
+ add end_in,start_in,avail_in,uxtw //avail_in reg is free now
+ ldr hufftables,[stream,_hufftables]
+ cbz avail_in, update_not_full
+
+
+ sub loop_end_cnt,end_in,4 //loop end
+ cmp next_in,loop_end_cnt
+
+
+ //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in);
+ ldr w_file_start,[stream,_total_in]
+ sub file_start, next_in, w_file_start, uxtw
+
+ //uint32_t hist_size = state->dist_mask;
+ ldr hist_size,[stream,_internal_state + _dist_mask]
+
+ //uint32_t hash_mask = state->hash_mask;
+ ldr hash_mask,[stream,_internal_state + _hash_mask]
+
+ bhi main_loop_end
+main_loop_start:
+ //is_full(&state->bitbuf)
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+
+ ldr literal,[next_in]
+ crc32cw hash,wzr,literal
+ and hash,hash,hash_mask
+
+ ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF;
+ ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash]
+ sub x_dist,next_in,file_start
+ //last_seen[hash] = (uint64_t) (next_in - file_start);
+ strh dist,[last_seen,x_hash,lsl 1]
+ sub dist,dist,w_tmp0
+ and dist,dist,0xffff
+
+ sub w_tmp0,dist,1
+ cmp hist_size,w_tmp0
+ bls get_lit_code
+
+ /// match_length = compare258(next_in - dist, next_in, 258);
+ sub x_tmp2,next_in,x_dist
+ sub x_hash,end_in,next_in
+ compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1
+ cmp match_length,3
+ bls get_lit_code
+
+ get_len_code hufftables,match_length,code,code_len,tmp0
+ get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2
+
+ //code |= code2 << code_len;
+ //code_len += code_len2;
+ lsl code2,code2,code_len
+ orr code,code,code2
+ add code_len,code_len,code_len2
+
+ //next_in += match_length;
+ add next_in,next_in,match_length,uxtw
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+ b main_loop_end
+get_lit_code:
+ //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len);
+ and literal,literal,0xff
+ get_lit_code hufftables,literal,code,code_len
+
+ //next_in++;
+ add next_in,next_in,1
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ cmp next_in,loop_end_cnt
+ bls main_loop_start
+main_loop_end:
+ sub loop_end_cnt,end_in,1
+ cmp next_in,loop_end_cnt
+ bhi update_not_full
+second_loop_start:
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+ ldr literal,[next_in]
+ and literal,literal,0xff
+ get_lit_code hufftables,literal,code,code_len
+ //next_in++;
+ add next_in,next_in,1
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ cmp next_in,loop_end_cnt
+ bls second_loop_start
+
+update_not_full:
+ cmp m_out_buf,m_out_end
+ bhi update_state_exit
+
+ mov literal,256
+ get_lit_code hufftables,literal,code,code_len
+
+ //write_bits(&state->bitbuf, code, code_len);
+ update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf
+ ldrh w_end_of_stream, [stream, _end_of_stream]
+ mov w_tmp0,1
+ strb w_tmp0,[stream,_internal_state_has_eob]
+ cmp w_end_of_stream,w_tmp0
+ mov w_tmp0, ZSTATE_TRL
+ mov w_tmp1, ZSTATE_SYNC_FLUSH
+ csel w_tmp0,w_tmp0,w_tmp1,eq
+ str w_tmp0, [stream, _internal_state+_state]
+
+update_state_exit:
+ update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1
+ pop_stack
+ ret
+
+ .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64
diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S
new file mode 100644
index 000000000..40251dab4
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S
@@ -0,0 +1,95 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+
+
+ .global isal_deflate_hash_aarch64
+ .type isal_deflate_hash_aarch64, %function
+/*
+ void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask,
+ uint32_t current_index, uint8_t * dict, uint32_t dict_len)
+*/
+ declare_generic_reg hash_table, 0,x
+ declare_generic_reg hash_mask, 1,w
+ declare_generic_reg current_index, 2,w
+ declare_generic_reg dict, 3,x
+ declare_generic_reg dict_len, 4,w
+
+ declare_generic_reg next_in 3,x
+ declare_generic_reg end_in 6,x
+ declare_generic_reg ind 5,w
+ declare_generic_reg hash 2,w
+ declare_generic_reg literal 2,w
+#define SHORTEST_MATCH #4
+
+isal_deflate_hash_aarch64:
+ sub ind, current_index, dict_len
+ and ind,ind,0xffff
+
+
+ uxtw x_dict_len, dict_len
+ sub x_dict_len, x_dict_len, SHORTEST_MATCH
+ add end_in, dict, x_dict_len
+
+
+
+ cmp next_in, end_in
+ bcs exit_func
+
+ mov w7, 0
+loop_start:
+ ldr literal, [next_in]
+ add next_in, next_in, 1
+ cmp next_in, end_in
+ crc32cw hash, w7, literal
+ and hash, hash, hash_mask
+ strh ind, [hash_table, x_hash, lsl 1]
+ add ind,ind,1
+ bne loop_start
+exit_func:
+
+ ret
+ .size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64
diff --git a/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S b/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S
new file mode 100644
index 000000000..4f2fe22aa
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S
@@ -0,0 +1,32 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "aarch64_multibinary.h"
+
+mbin_interface decode_huffman_code_block_stateless
diff --git a/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S b/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S
new file mode 100644
index 000000000..78d23940d
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S
@@ -0,0 +1,178 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 3
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+ \name\()_d .req d\reg
+.endm
+
+.macro mod_adler dest:req,tmp:req
+ umull \tmp\()_x,\dest,const_div1
+ lsr \tmp\()_x,\tmp\()_x,47
+ msub \dest,\tmp,const_div2,\dest
+.endm
+
+/*
+ uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length);
+*/
+/*
+Arguements list
+*/
+ adler32 .req w0
+ start .req x1
+ length .req x2
+ .global adler32_neon
+ .type adler32_neon, %function
+adler32_neon:
+/*
+local variables
+*/
+ declare_var_vector_reg factor0 , 6
+ declare_var_vector_reg factor1 , 7
+ declare_var_vector_reg d0 , 4
+ declare_var_vector_reg d1 , 5
+ declare_var_vector_reg adacc , 2
+ declare_var_vector_reg s2acc , 3
+ declare_var_vector_reg zero , 16
+ declare_var_vector_reg adler , 17
+ declare_var_vector_reg back_d0 , 18
+ declare_var_vector_reg back_d1 , 19
+ declare_var_vector_reg sum2 , 20
+ declare_var_vector_reg tmp2 , 20
+
+ adler0 .req w4
+ adler1 .req w5
+ adler0_x .req x4
+ adler1_x .req x5
+ end .req x0
+ tmp .req w8
+ tmp_x .req x8
+ tmp1_x .req x9
+ loop_cnt .req x10
+ loop_const .req x11
+ const_div1 .req w6
+ const_div2 .req w7
+ mov const_div1, 32881
+ movk const_div1, 0x8007, lsl 16
+ mov const_div2, 65521
+ and adler0, adler32, 0xffff
+ lsr adler1, adler32, 16
+
+ lsr loop_cnt,length,5
+ adrp x3,factors
+ add x3,x3,:lo12:factors
+ ld1 {factor0_v.16b-factor1_v.16b},[x3]
+
+ add end,start,length
+ cbz loop_cnt,final_accum32
+ ld1 {back_d0_v.16b-back_d1_v.16b},[start]
+ mov loop_const,173
+
+ movi v16.4s,0
+
+
+
+
+great_than_32:
+ cmp loop_cnt,173
+ csel loop_const,loop_cnt,loop_const,le
+ mov adacc_v.16b,zero_v.16b
+ mov s2acc_v.16b,zero_v.16b
+ ins adacc_v.s[0],adler0
+ ins s2acc_v.s[0],adler1
+ add tmp_x,start,loop_const,lsl 5
+
+accum32_neon:
+ add start,start,32
+ mov d0_v.16b,back_d0_v.16b
+ mov d1_v.16b,back_d1_v.16b
+ ld1 {back_d0_v.16b-back_d1_v.16b},[start]
+
+ shl tmp2_v.4s,adacc_v.4s,5
+ add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s
+
+ uaddlp adler_v.8h,d0_v.16b
+ uadalp adler_v.8h,d1_v.16b
+ uadalp adacc_v.4s,adler_v.8h
+
+ umull sum2_v.8h,factor0_v.8b ,d0_v.8b
+ umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b
+ umlal sum2_v.8h,factor1_v.8b ,d1_v.8b
+ umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b
+ uadalp s2acc_v.4s,sum2_v.8h
+
+ cmp start,tmp_x
+ bne accum32_neon
+
+ uaddlv adacc_d,adacc_v.4s
+ uaddlv s2acc_d,s2acc_v.4s
+ fmov adler0_x,adacc_d
+ fmov adler1_x,s2acc_d
+
+ mod_adler adler0,tmp
+ mod_adler adler1,tmp
+ sub loop_cnt,loop_cnt,loop_const
+ cbnz loop_cnt,great_than_32
+
+final_accum32:
+ and length,length,31
+ cbz length,end_func
+
+accum32_body:
+ cmp start,end
+ beq end_func
+ ldrb tmp,[start],1
+ add adler0,adler0,tmp
+ add adler1,adler1,adler0
+ b accum32_body
+
+end_func:
+ mod_adler adler0,tmp
+ mod_adler adler1,tmp
+ orr w0,adler0,adler1,lsl 16
+ ret
+
+ .size adler32_neon, .-adler32_neon
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+factors:
+ .quad 0x191a1b1c1d1e1f20
+ .quad 0x1112131415161718
+ .quad 0x090a0b0c0d0e0f10
+ .quad 0x0102030405060708
+
diff --git a/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c b/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c
new file mode 100644
index 000000000..183010c22
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c
@@ -0,0 +1,188 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(isal_adler32)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(adler32_neon);
+
+ return PROVIDER_BASIC(adler32);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_body)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_body_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_body);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_finish_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_finish);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
+
+ return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base);
+
+ return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map);
+}
+
+DEFINE_INTERFACE_DISPATCHER(set_long_icf_fg)
+{
+ return PROVIDER_INFO(set_long_icf_fg_aarch64);
+}
+
+DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf)
+{
+ return PROVIDER_INFO(encode_deflate_icf_aarch64);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_update_histogram)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_update_histogram_aarch64);
+
+ return PROVIDER_BASIC(isal_update_histogram);
+}
+
+DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32) {
+ return PROVIDER_INFO(gen_icf_map_h1_aarch64);
+ }
+
+ return PROVIDER_BASIC(gen_icf_map_h1);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(isal_deflate_hash_aarch64);
+
+ return PROVIDER_BASIC(isal_deflate_hash);
+}
+
+DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_CRC32)
+ return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64);
+
+ return PROVIDER_BASIC(decode_huffman_code_block_stateless);
+}
diff --git a/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S b/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S
new file mode 100644
index 000000000..57d5230a5
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S
@@ -0,0 +1,50 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface isal_deflate_icf_body_lvl1
+mbin_interface isal_deflate_icf_body_lvl2
+mbin_interface isal_deflate_icf_body_lvl3
+mbin_interface isal_deflate_icf_finish_lvl1
+mbin_interface isal_deflate_icf_finish_lvl2
+mbin_interface isal_deflate_icf_finish_lvl3
+mbin_interface isal_update_histogram
+mbin_interface encode_deflate_icf
+mbin_interface set_long_icf_fg
+mbin_interface gen_icf_map_lh1
+mbin_interface isal_deflate_hash_lvl0
+mbin_interface isal_deflate_hash_lvl1
+mbin_interface isal_deflate_hash_lvl2
+mbin_interface isal_deflate_hash_lvl3
+
+mbin_interface isal_deflate_body
+mbin_interface isal_deflate_finish
+mbin_interface isal_adler32
diff --git a/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
new file mode 100644
index 000000000..13f9b087d
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
@@ -0,0 +1,194 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .text
+ .align 2
+ .global set_long_icf_fg_aarch64
+ .type set_long_icf_fg_aarch64, %function
+
+/*
+void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
+ struct deflate_icf *match_lookup)
+*/
+
+ /* arguments */
+ declare_generic_reg next_in_param, 0,x
+ declare_generic_reg processed_param, 1,x
+ declare_generic_reg input_size_param, 2,x
+ declare_generic_reg match_lookup_param, 3,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local variable */
+ declare_generic_reg len, 7,w
+ declare_generic_reg dist_code, 8,w
+ declare_generic_reg shortest_match_len 9,w
+ declare_generic_reg len_max, 10,w
+ declare_generic_reg dist_extra, 11,w
+ declare_generic_reg const_8, 13,x
+ declare_generic_reg next_in, 20,x
+ declare_generic_reg dist_start, 21,x
+ declare_generic_reg end_processed, 22,x
+ declare_generic_reg end_in, 23,x
+ declare_generic_reg match_lookup, 19,x
+
+ declare_generic_reg match_length, 4,w
+ declare_generic_reg tmp0, 5,w
+ declare_generic_reg tmp1, 6,w
+
+/* constant */
+.equ DIST_START_SIZE, 128
+.equ ISAL_LOOK_AHEAD, 288
+.equ LEN_OFFSET, 254
+.equ SHORTEST_MATCH, 4
+.equ LEN_MAX_CONST, 512
+
+set_long_icf_fg_aarch64:
+ stp x29, x30, [sp, -192]!
+ add x29, sp, 0
+ stp x21, x22, [sp, 32]
+ add x21, x29, 64
+ stp x19, x20, [sp, 16]
+ str x23, [sp, 48]
+
+ add end_processed, next_in_param, processed_param
+ mov next_in, next_in_param
+ add end_in, next_in_param, input_size_param
+ mov match_lookup, match_lookup_param
+
+ adrp x1, .data_dist_start
+ mov x2, DIST_START_SIZE // 128
+ add x1, x1, :lo12:.data_dist_start
+ mov x0, dist_start
+ bl memcpy
+
+ add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
+ cmp end_in, x_tmp0
+ csel end_in, end_in, x_tmp0, cc
+ cmp next_in, end_processed
+ bcs .done
+
+ mov const_8, 8
+ mov len_max, LEN_MAX_CONST // 512
+ mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
+ b .while_outer_loop
+
+ .align 2
+.while_outer_check:
+ add next_in, next_in, 1
+ add match_lookup, match_lookup, 4
+ cmp end_processed, next_in
+ bls .done
+
+.while_outer_loop:
+ ldrh len, [match_lookup]
+ and len, len, LIT_LEN_MASK // 1023
+ cmp len, (LEN_OFFSET + 8 - 1) // 261
+ bls .while_outer_check
+
+ ldr dist_code, [match_lookup]
+ add x1, next_in, 8
+ ldrh dist_extra, [match_lookup, 2]
+ sub w2, w_end_in, w1
+ ubfx x_dist_code, x_dist_code, 10, 9
+ ubfx x_dist_extra, x_dist_extra, 3, 13
+ uxtw x0, dist_code
+ ldr w0, [dist_start, x0, lsl 2]
+ add w0, dist_extra, w0
+ sub x0, const_8, x0
+ add x0, next_in, x0
+
+ compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
+ mov w0, w_match_length
+
+ add w0, w0, (LEN_OFFSET + 8) // 262
+ cmp w0, len
+ bls .while_outer_check
+
+ lsl w2, dist_extra, 19
+ orr w2, w2, dist_code, lsl 10
+
+ .align 3
+.while_inner_loop:
+ cmp w0, LEN_MAX_CONST // 512
+ add next_in, next_in, 1
+ csel w1, w0, len_max, ls
+ sub w0, w0, #1
+ orr w1, w1, w2
+ str w1, [match_lookup]
+ ldrh w1, [match_lookup, 4]!
+
+ and w1, w1, LIT_LEN_MASK // 1023
+ cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
+ csel w1, w1, shortest_match_len, cs
+ cmp w1, w0
+ bcc .while_inner_loop
+
+ add next_in, next_in, 1
+ add match_lookup, match_lookup, 4
+ cmp end_processed, next_in
+ bhi .while_outer_loop
+
+.done:
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldr x23, [sp, 48]
+ ldp x29, x30, [sp], 192
+ ret
+ .size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
+
+ .section .rodata
+ .align 3
+ .set .data_dist_start,. + 0
+.real_data_dist_start:
+ .word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ .word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ .word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ .word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000
diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S
new file mode 100644
index 000000000..3daaa1ba3
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S
@@ -0,0 +1,364 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .global isal_deflate_icf_body_hash_hist_aarch64
+ .type isal_deflate_icf_body_hash_hist_aarch64, %function
+/*
+void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream);
+*/
+
+/* constant */
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_encode_tables, 0
+.equ offset_hist, 2176
+.equ offset_hist_d_hist, 2176
+.equ offset_hist_ll_hist, 2296
+.equ offset_deflate_hdr_count, 4348
+.equ offset_deflate_hdr_extra_bits, 4352
+.equ offset_deflate_hdr, 4356
+.equ offset_icf_buf_next, 4688
+.equ offset_icf_buf_avail_out, 4696
+.equ offset_icf_buf_start, 4704
+.equ offset_hash8k, 4712
+.equ offset_hash_hist, 4712
+
+/* offset of struct isal_zstate */
+.equ offset_dist_mask, 12
+.equ offset_hash_mask, 16
+
+/* macros*/
+.equ ISAL_LOOK_AHEAD, 288
+
+ /* arguments */
+ declare_generic_reg stream, 0,x
+ declare_generic_reg stream_saved, 11,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local varibale */
+ declare_generic_reg level_buf, 18,x
+ declare_generic_reg avail_in, 13,w
+ declare_generic_reg end_in, 13,x
+ declare_generic_reg start_in, 19,x
+ declare_generic_reg next_in, 9,x
+ declare_generic_reg next_in_iter, 14,x
+ declare_generic_reg state, 24,x
+ declare_generic_reg hist_size, 22,w
+ declare_generic_reg hash_mask, 21,w
+ declare_generic_reg start_out, 12,x
+ declare_generic_reg end_out, 12,x
+ declare_generic_reg next_out, 8,x
+ declare_generic_reg file_start, 20,x
+ declare_generic_reg last_seen, 15,x
+ declare_generic_reg total_in, 25,x
+ declare_generic_reg NULL_DIST_SYM, 23,w
+ declare_generic_reg match_length, 3,x
+ declare_generic_reg dist, 7,x
+ declare_generic_reg dist_inc, 26,w // dist - 1
+ declare_generic_reg literal, 10,x
+
+ declare_generic_reg tmp0, 4,x
+ declare_generic_reg tmp1, 5,x
+
+isal_deflate_icf_body_hash_hist_aarch64:
+ stp x29, x30, [sp, -80]!
+ add x29, sp, 0
+ str x24, [sp, 56]
+
+ ldr avail_in, [stream, offset_avail_in]
+ cbnz avail_in, .stream_available
+
+ ldr w1, [stream, offset_end_of_stream] // w1 keeps two values of end_of_stream and flush
+ cbz w1, .done
+
+ add state, stream, offset_state
+ b .state_flush_read_buffer
+
+ .align 2
+.stream_available:
+ stp x19, x20, [x29, 16]
+ stp x21, x22, [x29, 32]
+ str x23, [x29, 48]
+ stp x25, x26, [x29, 64]
+
+ ldr level_buf, [stream, offset_level_buf]
+ add state, stream, offset_state // 64
+ mov stream_saved, stream
+ ldr start_in, [stream, offset_next_in] // 0
+ ldr w_total_in, [stream, offset_total_in]
+
+ mov x0, offset_hash_hist
+ add last_seen, level_buf, x0
+
+ ldr x0, [level_buf, offset_icf_buf_avail_out] // 4696
+ ldr start_out, [level_buf, offset_icf_buf_next] // 4688
+
+ mov next_in, start_in
+ and x0, x0, -4
+ ldp hist_size, hash_mask, [state, offset_dist_mask] // 12
+ add end_in, start_in, avail_in, uxtw
+ mov next_out, start_out
+ add end_out, start_out, x0
+
+ add x0, next_in, ISAL_LOOK_AHEAD // 288
+ sub file_start, start_in, w_total_in, uxtw
+ mov NULL_DIST_SYM, 30
+ add next_in_iter, next_in, 1
+ cmp end_in, x0
+ bls .while_loop_end
+
+ .align 3
+.while_loop:
+ cmp next_out, end_out
+ bcs .state_create_hdr
+
+ ldr w_literal, [next_in]
+ mov w0, w_literal
+ crc32cw w0, wzr, w0
+
+ and w0, w0, hash_mask
+ sub x1, next_in, file_start
+ lsl x0, x0, 1
+
+ ldrh w_dist, [last_seen, x0]
+ strh w1, [last_seen, x0]
+ sub w1, w1, w_dist
+ and w_dist, w1, 65535
+
+ sub dist_inc, w_dist, #1
+ cmp dist_inc, hist_size
+ bcc .dist_vs_hist_size
+
+.while_latter_part:
+ and w_literal, w_literal, 255
+ mov next_in, next_in_iter
+ add next_out, next_out, 4
+ add x1, level_buf, w_literal, uxtb 2
+ ldr w0, [x1, 2296]
+ add w0, w0, 1
+ str w0, [x1, 2296]
+ ldrh w0, [next_out, -4]
+ bfi w0, w_literal, 0, 10
+ strh w0, [next_out, -4]
+ ldr w0, [next_out, -4]
+ bfi w0, NULL_DIST_SYM, 10, 9
+ str w0, [next_out, -4]
+ ubfx x0, x0, 16, 3
+ strh w0, [next_out, -2]
+
+.while_loop_check:
+ add x0, next_in, ISAL_LOOK_AHEAD // 288
+ add next_in_iter, next_in, 1
+ cmp end_in, x0
+ bhi .while_loop
+ b .while_loop_end
+
+ .align 2
+.dist_vs_hist_size:
+ mov x1, next_in
+ mov w2, 258
+ sub x0, next_in, w_dist, uxth
+ compare_258_bytes param0,param1,match_length,tmp0,tmp1
+
+ and w1, w_match_length, 65535 // 0xffff
+ cmp w1, 3
+ bls .while_latter_part
+
+ ldr w0, [next_in, 1]
+ mov x4, next_in
+ add next_in, next_in, w1, uxth
+ crc32cw w0, wzr, w0
+
+ and w0, hash_mask, w0
+ sub next_in_iter, next_in_iter, file_start
+ strh w_next_in_iter, [last_seen, x0, lsl 1]
+ ldr w0, [x4, 2]!
+ crc32cw w0, wzr, w0
+
+ and w0, hash_mask, w0
+ and w_match_length, w_match_length, 65535 // 0xffff
+ sub x4, x4, file_start
+
+ // get_len_icf_code
+ add w_match_length, w_match_length, 254
+ // get_dist_icf_code, first part
+ mov w1, 0 // w1 => dist_extra
+ strh w4, [last_seen, x0, lsl 1]
+ cmp w_dist, 2
+ ubfiz x0, match_length, 2, 17
+ add x0, level_buf, x0
+ bhi .compute_dist_icf_code
+
+.match_length_end:
+ // handle level_buf->hist
+ ldr w2, [x0, offset_hist_ll_hist] // 2296, ll_hist
+ add x4, level_buf, dist_inc, uxtw 2 // d_hist
+ add next_out, next_out, 4
+ add w2, w2, 1 // ll_hist
+ str w2, [x0, offset_hist_ll_hist] // 2296, ll_hist
+ ldr w0, [x4, offset_hist_d_hist] // 2176, d_hist
+ add w0, w0, 1 // d_hist
+ str w0, [x4, offset_hist_d_hist] // 2176, d_hist
+
+ // write_deflate_icf
+ ldrh w0, [next_out, -4]
+ bfi w0, w3, 0, 10
+ strh w0, [next_out, -4]
+ ldr w0, [next_out, -4]
+ bfi w0, dist_inc, 10, 9
+ str w0, [next_out, -4]
+ lsr w0, w0, 16
+ bfi w0, w1, 3, 13 // w1 => dist_extra
+ strh w0, [next_out, -2]
+ b .while_loop_check
+
+ .align 2
+// get_dist_icf_code, 2nd part
+.compute_dist_icf_code:
+ clz w1, dist_inc
+ mov w2, 30
+ sub w2, w2, w1
+ mov w1, 1
+ lsl w1, w1, w2
+ sub w1, w1, #1
+ and w1, w1, dist_inc
+ lsr dist_inc, dist_inc, w2
+ add dist_inc, dist_inc, w2, lsl 1
+ and w1, w1, 8191
+ b .match_length_end
+
+.while_loop_end:
+ sub x19, next_in, x19
+ cmp x19, 0
+ ble .skip_igzip_hist2
+
+ mov w0, 1
+ strb w0, [stream_saved, offset_state_has_hist] // 135
+
+.skip_igzip_hist2:
+ add w19, w_total_in, w19
+ ldr w0, [stream_saved, offset_end_of_stream] // 56
+ sub x12, end_out, next_out
+ asr x12, x12, 2 // x12 => end_out - next_out
+ str next_in, [stream_saved]
+ str w19, [stream_saved, offset_total_in] // 12
+ sub next_in, end_in, next_in
+ str w19, [stream_saved, offset_state_block_end] // 72
+
+ ldp x25, x26, [x29, 64]
+ ldr x23, [x29, 48]
+ ldp x21, x22, [x29, 32]
+ ldp x19, x20, [x29, 16]
+
+ str w9, [stream_saved, offset_avail_in] // 8
+ str next_out, [level_buf, offset_icf_buf_next] // 4688
+ str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out
+ cbnz w0, .state_flush_read_buffer
+ b .done
+
+ .align 2
+.state_create_hdr:
+ mov w0, 2
+ str w0, [x24, 20]
+ sub start_in, next_in, start_in
+ cmp start_in, 0
+ ble .skip_igzip_hist
+
+ mov w0, 1
+ strb w0, [stream_saved, offset_state_has_hist] // 135
+
+.skip_igzip_hist:
+ add w_total_in, w_total_in, w19
+ sub x12, end_out, next_out
+ asr x12, x12, 2 // x12 => end_out - next_out
+ str next_in, [stream_saved]
+ sub next_in, end_in, next_in
+ str w_total_in, [stream_saved, offset_total_in] // 12
+ str w_total_in, [stream_saved, offset_state_block_end] // 72
+
+ ldp x25, x26, [x29, 64]
+ ldr x23, [x29, 48]
+ ldp x21, x22, [x29, 32]
+ ldp x19, x20, [x29, 16]
+
+ str w9, [stream_saved, offset_avail_in] // 8
+ str next_out, [level_buf, offset_icf_buf_next] // 4688
+ str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out
+ b .done
+
+.state_flush_read_buffer:
+ mov w0, 4
+ str w0, [x24, 20]
+
+.done:
+ ldr x24, [sp, 56]
+ ldp x29, x30, [sp], 80
+ ret
+
+ .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64
diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
new file mode 100644
index 000000000..bb2baa22f
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
@@ -0,0 +1,397 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+/*
+void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
+*/
+
+/* constant */
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_state, 84
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_encode_tables, 0
+.equ offset_hist, 2176
+.equ offset_hist_d_hist, 2176
+.equ offset_hist_ll_hist, 2296
+.equ offset_deflate_hdr_count, 4348
+.equ offset_deflate_hdr_extra_bits, 4352
+.equ offset_deflate_hdr, 4356
+.equ offset_icf_buf_next, 4688
+.equ offset_icf_buf_avail_out, 4696
+.equ offset_icf_buf_start, 4704
+.equ offset_hash8k, 4712
+.equ offset_hash_hist, 4712
+
+/* offset of struct isal_zstate */
+.equ offset_dist_mask, 12
+.equ offset_hash_mask, 16
+.equ offset_state_of_zstate, 20
+
+/* macros*/
+.equ ISAL_LOOK_AHEAD, 288
+
+ /* arguments */
+ declare_generic_reg stream, 0,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+ declare_generic_reg param3, 3,x
+ declare_generic_reg param4, 4,x
+ declare_generic_reg param5, 5,x
+ declare_generic_reg param6, 6,x
+
+ /* local variable */
+ declare_generic_reg stream_saved, 15,x
+ declare_generic_reg level_buf, 13,x
+ declare_generic_reg start_in, 21,x
+ declare_generic_reg start_out, 22,x
+ declare_generic_reg state, 23,x
+ declare_generic_reg end_out, 12,x
+ declare_generic_reg end_in, 11,x
+ declare_generic_reg next_in, 8,x
+ declare_generic_reg next_out, 10,x
+ declare_generic_reg next_out_iter, 5,x
+ declare_generic_reg file_start, 18,x
+ declare_generic_reg last_seen, 14,x
+
+ declare_generic_reg literal_code, 9,w
+ declare_generic_reg hash_mask, 19,w
+ declare_generic_reg hist_size, 20,w
+ declare_generic_reg dist, 7,w
+ declare_generic_reg dist_inc, 24,w
+
+ declare_generic_reg tmp0, 25,x
+ declare_generic_reg tmp1, 26,x
+ declare_generic_reg tmp2, 27,x
+ declare_generic_reg tmp3, 28,x
+
+ .align 2
+ .type write_deflate_icf_constprop, %function
+write_deflate_icf_constprop:
+ ldrh w2, [x0]
+ mov w3, 30
+ bfi w2, w1, 0, 10
+ strh w2, [x0]
+ ldr w1, [x0]
+ bfi w1, w3, 10, 9
+ str w1, [x0]
+ ubfx x1, x1, 16, 3
+ strh w1, [x0, 2]
+ ret
+ .size write_deflate_icf_constprop, .-write_deflate_icf_constprop
+
+ .align 2
+ .type write_deflate_icf, %function
+write_deflate_icf:
+ ldrh w4, [x0]
+ bfi w4, w1, 0, 10
+ strh w4, [x0]
+ ldr w1, [x0]
+ bfi w1, w2, 10, 9
+ str w1, [x0]
+ lsr w1, w1, 16
+ bfi w1, w3, 3, 13
+ strh w1, [x0, 2]
+ ret
+ .size write_deflate_icf, .-write_deflate_icf
+
+ .align 2
+ .type update_state, %function
+update_state:
+ sub x7, x2, x1
+ ldr x4, [x0, 48]
+ cmp x7, 0
+ ble .L48
+ mov w1, 1
+ strb w1, [x0, 135]
+.L48:
+ ldr w1, [x0, 12]
+ sub x6, x6, x5
+ str x2, [x0]
+ sub x3, x3, x2
+ add w1, w1, w7
+ stp w3, w1, [x0, 8]
+ str w1, [x0, 72]
+ asr x6, x6, 2
+ str x5, [x4, 4688]
+ str x6, [x4, 4696]
+ ret
+ .size update_state, .-update_state
+
+ .align 2
+ .global isal_deflate_icf_finish_hash_hist_aarch64
+ .type isal_deflate_icf_finish_hash_hist_aarch64, %function
+isal_deflate_icf_finish_hash_hist_aarch64:
+ ldr w_end_in, [stream, 8] // stream->avail_in
+ cbz w_end_in, .stream_not_available
+
+ stp x29, x30, [sp, -96]!
+ add x29, sp, 0
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ stp x23, x24, [sp, 48]
+ stp x25, x26, [sp, 64]
+ stp x27, x28, [sp, 80]
+
+ mov stream_saved, stream
+ ldr level_buf, [stream, offset_level_buf] // 48
+ ldr start_in, [stream, offset_next_in] // 0
+ ldr start_out, [level_buf, offset_icf_buf_next] // 4688
+ add state, stream, offset_state // 64
+ ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696
+ mov next_in, start_in
+ ldr w_file_start, [stream, offset_total_in] // 12
+ mov tmp0, offset_hash_hist // 4712
+ add last_seen, level_buf, tmp0
+ add end_in, start_in, w_end_in, uxtw
+ and end_out, end_out, -4
+ mov next_out, start_out
+ ldp hist_size, hash_mask, [state, offset_dist_mask] // 12
+ sub file_start, start_in, file_start
+ add end_out, start_out, end_out
+ mov next_out_iter, next_out
+
+ add x0, next_in, 3
+ cmp end_in, x0 // x0 <= next_in + 3
+ bls .while_first_end
+
+ .p2align 3
+.while_first:
+ cmp next_out, end_out
+ bcs .save_and_update_state
+ ldr literal_code, [next_in]
+ mov w0, literal_code
+ crc32cw w0, wzr, w0
+ and w0, w0, hash_mask
+ sub x2, next_in, file_start
+ lsl x0, x0, 1
+ ldrh dist, [last_seen, x0]
+ strh w2, [last_seen, x0]
+ sub w2, w2, dist
+ and w_dist, w2, 65535
+ sub dist_inc, dist, #1
+ cmp dist_inc, hist_size
+ bcs .skip_compare258
+
+ mov x2, 0
+ sub w2, w_end_in, w8
+ mov x1, next_in
+ sub x0, next_in, w_dist, uxth
+
+ compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1
+ mov w0, w_tmp2
+ and w2, w0, 65535
+
+ cmp w2, 3
+ bhi .while_first_match_length
+
+.skip_compare258:
+ and literal_code, literal_code, 255 // get_lit_icf_code
+ add next_in, next_in, 1
+ mov w1, literal_code
+ mov x0, next_out
+ add x_literal_code, level_buf, literal_code, uxtb 2 // level_buf->hist.ll_hist
+
+ ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
+
+ bl write_deflate_icf_constprop // write_deflate_icf
+
+ add next_out, next_out, 4
+.while_first_check:
+ add x0, next_in, 3
+ mov next_out_iter, next_out
+ cmp end_in, x0
+ bhi .while_first
+
+.while_first_end:
+ cmp next_in, end_in
+ bcs .while_2nd_end
+
+ cmp next_out, end_out
+ bcc .while_2nd_handle
+ b .save_and_update_state_2nd
+
+ .p2align 2
+.while_2nd:
+ cmp end_out, next_out_iter
+ bls .save_and_update_state_2nd
+
+.while_2nd_handle:
+ ldrb w2, [next_in], 1
+ mov x0, next_out_iter
+ add next_out_iter, next_out_iter, 4
+ mov w1, w2
+ add x2, level_buf, w2, uxtb 2
+
+ ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x2, offset_hist_ll_hist] // 2296
+
+ bl write_deflate_icf_constprop
+ cmp end_in, next_in
+ bne .while_2nd
+
+ mov next_in, end_in
+ b .end_of_stream_check_and_exit
+
+ .p2align 2
+.while_first_match_length:
+ and w0, w0, 65535
+ mov w3, 0
+ add w1, w0, 254 // get_len_icf_code
+ cmp dist, 2
+ bhi .compute_dist_icf_code
+
+.while_first_match_length_end:
+ ubfiz x_tmp2, x1, 2, 17
+ add x_tmp1, level_buf, dist_inc, uxtw 2
+ add x_tmp2, level_buf, x_tmp2
+
+ add next_in, next_in, w2, uxth
+ mov w2, dist_inc
+
+ ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
+
+ mov x0, next_out
+ ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
+
+ bl write_deflate_icf
+ add next_out, next_out, 4
+ b .while_first_check
+
+// compute_dist_icf_code
+ .p2align 2
+.compute_dist_icf_code:
+ clz w3, dist_inc
+ mov w0, 30
+ sub w0, w0, w3
+
+ mov w3, 1
+ lsl w3, w3, w0
+ sub w3, w3, #1
+ and w3, w3, dist_inc
+ lsl w4, w0, 1
+ lsr dist_inc, dist_inc, w0
+ add dist_inc, dist_inc, w4
+ b .while_first_match_length_end
+
+.while_2nd_end:
+ beq .end_of_stream_check_and_exit
+ mov param6, end_out
+ b .update_state
+
+.end_of_stream_check_and_exit:
+ ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56
+ cbz w_tmp0, .update_state_2nd
+ b .save_and_update_state_2nd
+
+ .p2align 3
+.save_and_update_state_2nd:
+ mov w_tmp0, 2
+ str w_tmp0, [state, offset_state_of_zstate] // 20
+.update_state_2nd:
+ mov param6, end_out
+ b .update_state
+
+ .p2align 2
+.save_and_update_state:
+ mov param6, end_out
+ mov param5, next_out
+ mov w_tmp0, 2
+ str w_tmp0, [state, offset_state_of_zstate] // 20
+.update_state:
+ mov param4, start_out
+ mov param1, start_in
+ mov param3, end_in
+ mov param2, next_in
+ mov param0, stream_saved
+
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x23, x24, [sp, 48]
+ ldp x25, x26, [sp, 64]
+ ldp x27, x28, [sp, 80]
+ ldp x29, x30, [sp], 96
+
+ b update_state
+
+ .p2align 2
+.stream_not_available:
+ ldr w1, [stream, offset_end_of_stream] // 56
+ cbz w1, .done
+
+ mov w1, 2
+ str w1, [stream, offset_state_state] // 84
+.done:
+ ret
+
+ .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64
diff --git a/src/isa-l/igzip/aarch64/isal_update_histogram.S b/src/isa-l/igzip/aarch64/isal_update_histogram.S
new file mode 100644
index 000000000..abcec0f14
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_update_histogram.S
@@ -0,0 +1,311 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req
+ mov w_\tmp0, w_\dist
+ mov w_\dist, -1
+ cmp w_\tmp0, 32768
+ bhi .dist2code_done
+ sub w_\dist, w_\tmp0, #1
+ cmp w_\tmp0, 4
+ bls .dist2code_done
+ clz w_\tmp1, w_\dist
+ mov w_\tmp0, 30
+ sub w_\tmp0, w_\tmp0, w_\tmp1
+ lsr w_\dist, w_\dist, w_\tmp0
+ add w_\dist, w_\dist, w_\tmp0, lsl 1
+.dist2code_done:
+.endm
+
+.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req
+ adrp x_\tmp0, .len_to_code_tab_lanchor
+ add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor
+ ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2]
+ add w_\length_out, w_\length_out, 256
+.endm
+
+ .section .rodata
+ .align 4
+.len_to_code_tab_lanchor = . + 0
+ .type len_to_code_tab, %object
+ .size len_to_code_tab, 1056
+len_to_code_tab:
+ .word 0x00, 0x00, 0x00
+ .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08
+ .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c
+ .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e
+ .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10
+ .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11
+ .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12
+ .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13
+ .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14
+ .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
+ .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15
+ .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
+ .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16
+ .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
+ .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17
+ .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c
+ .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d
+ .word 0x00, 0x00, 0x00, 0x00, 0x00
+
+ .text
+ .global isal_update_histogram_aarch64
+ .arch armv8-a+crc
+ .type isal_update_histogram_aarch64, %function
+
+/*
+void isal_update_histogram_aarch64(uint8_t * start_stream, int length,
+ struct isal_huff_histogram *histogram);
+*/
+
+ /* arguments */
+ declare_generic_reg start_stream, 0,x
+ declare_generic_reg length, 1,x
+ declare_generic_reg histogram, 2,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local variable */
+ declare_generic_reg start_stream_saved, 10,x
+ declare_generic_reg histogram_saved, 23,x
+ declare_generic_reg current, 19,x
+ declare_generic_reg last_seen, 20,x
+ declare_generic_reg end_stream, 21,x
+ declare_generic_reg loop_end_iter, 22,x
+ declare_generic_reg dist_histogram, 12,x
+ declare_generic_reg lit_len_histogram, 23,x
+ declare_generic_reg literal, 8,x
+ declare_generic_reg next_hash, 9,x
+ declare_generic_reg end, 4,x
+ declare_generic_reg dist, 7,x
+ declare_generic_reg D, 11,w
+ declare_generic_reg match_length, 3,w
+
+ declare_generic_reg tmp0, 5,w
+ declare_generic_reg tmp1, 6,w
+
+/* constant */
+.equ LIT_LEN, 286
+.equ DIST_LEN, 30
+
+.equ lit_len_offset, 0
+.equ dist_offset, (8*LIT_LEN) // 2288
+.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528
+.equ hash_table_size, (8*1024*2) // 16384
+
+isal_update_histogram_aarch64:
+ cmp w_length, 0
+ ble .done
+
+ stp x29, x30, [sp, -64]!
+ add x29, sp, 0
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ str x23, [sp, 48]
+
+ add last_seen, histogram, hash_offset
+ add end_stream, start_stream, w_length, sxtw
+ mov current, start_stream
+ sub loop_end_iter, end_stream, #3
+ mov histogram_saved, histogram
+
+ mov x0, last_seen
+ mov w1, 0
+ mov x2, hash_table_size
+ bl memset
+
+ cmp current, loop_end_iter
+ bcs .loop_end
+
+ mov start_stream_saved, current
+ add dist_histogram, histogram_saved, dist_offset
+ mov D, 32766
+ b .loop
+
+ .align 2
+.loop_2nd_stream:
+ and literal, literal, 0xff
+ mov current, next_hash
+ cmp loop_end_iter, current
+
+ ldr x0, [lit_len_histogram, literal, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, literal, lsl 3]
+ bls .loop_end
+
+.loop:
+ ldr w_literal, [current]
+ add next_hash, current, 1
+
+ mov w0, w_literal
+ crc32cw w0, wzr, w0
+
+ ubfiz x0, x0, 1, 13
+ sub x2, current, start_stream_saved
+ ldrh w_dist, [last_seen, x0]
+ strh w2, [last_seen, x0]
+ sub w2, w2, w_dist
+ and w_dist, w2, 65535
+
+ sub w0, w_dist, #1
+ cmp w0, D
+ bhi .loop_2nd_stream
+
+ sub w2, w_end_stream, w_current
+ mov x1, current
+ sub x0, current, w_dist, uxth
+ compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1
+
+ cmp match_length, 3
+ bls .loop_2nd_stream
+
+ add end, current, 3
+ cmp end, loop_end_iter
+ csel end, end, loop_end_iter, ls
+ cmp end, next_hash
+ bls .skip_inner_loop
+
+ .align 3
+.inner_loop:
+ ldr w0, [next_hash]
+ crc32cw w0, wzr, w0
+
+ ubfiz x0, x0, 1, 13
+ sub x1, next_hash, start_stream_saved
+ add next_hash, next_hash, 1
+ cmp next_hash, end
+ strh w1, [last_seen, x0]
+ bne .inner_loop
+
+.skip_inner_loop:
+ convert_dist_to_dist_sym dist, tmp0, tmp1
+ uxtw x2, w_dist
+ ldr x1, [dist_histogram, x2, lsl 3]
+ add x1, x1, 1
+ str x1, [dist_histogram, x2, lsl 3]
+
+ convert_length_to_len_sym match_length,tmp1,tmp0
+ uxtw x0, w_tmp1
+ ldr x1, [lit_len_histogram, x0, lsl 3]
+ add x1, x1, 1
+ str x1, [lit_len_histogram, x0, lsl 3]
+
+ sub match_length, match_length, #1
+ add x3, x3, 1
+ add current, current, x3
+ cmp loop_end_iter, current
+ bhi .loop
+
+ .align 3
+// fold the last for loop
+.loop_end:
+ cmp end_stream, current
+ bls .loop_fold_end
+
+ mov x0, current
+ ldrb w1, [x0], 1
+ cmp end_stream, x0
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+ bls .loop_fold_end
+
+ ldrb w1, [current, 1]
+ add x0, current, 2
+ cmp end_stream, x0
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+ bls .loop_fold_end
+
+ ldrb w1, [current, 2]
+ add x0, current, 3
+ cmp end_stream, x0
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+ bls .loop_fold_end
+
+ ldrb w1, [current, 3]
+ ldr x0, [lit_len_histogram, x1, lsl 3]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, x1, lsl 3]
+
+.loop_fold_end:
+ ldr x0, [lit_len_histogram, (256*8)]
+ add x0, x0, 1
+ str x0, [lit_len_histogram, (256*8)]
+
+ ldr x23, [sp, 48]
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x29, x30, [sp], 64
+ ret
+ .align 2
+.done:
+ ret
+ .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64
diff --git a/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h b/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h
new file mode 100644
index 000000000..d55ec09dc
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h
@@ -0,0 +1,72 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __LZ0A_CONST_AARCH64_H__
+#define __LZ0A_CONST_AARCH64_H__
+#include "options_aarch64.h"
+
+#ifdef __ASSEMBLY__
+.set K , 1024
+.set D , IGZIP_HIST_SIZE // Amount of history
+.set LA , 18 * 16 // Max look-ahead, rounded up to 32 byte boundary
+.set BSIZE , 2*IGZIP_HIST_SIZE + LA // Nominal buffer size
+
+/// Constants for stateless compression
+#define LAST_BYTES_COUNT 3 // Bytes to prevent reading out of array bounds
+#define LA_STATELESS 258 // No round up since no data is copied to a buffer
+
+.set IGZIP_LVL0_HASH_SIZE , (8 * K)
+.set IGZIP_HASH8K_HASH_SIZE , (8 * K)
+.set IGZIP_HASH_HIST_HASH_SIZE , IGZIP_HIST_SIZE
+.set IGZIP_HASH_MAP_HASH_SIZE , IGZIP_HIST_SIZE
+
+#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1)
+#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1)
+#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1)
+#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1)
+
+.set MIN_DEF_MATCH , 3 // Minimum length of a match in deflate
+.set SHORTEST_MATCH , 4
+
+.set SLOP , 8
+
+#define ICF_CODE_BYTES 4
+#define LIT_LEN_BIT_COUNT 10
+#define DIST_LIT_BIT_COUNT 9
+
+#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1)
+#define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1)
+
+#define DIST_OFFSET LIT_LEN_BIT_COUNT
+#define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT)
+#define LIT (0x1E << DIST_OFFSET)
+
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/options_aarch64.h b/src/isa-l/igzip/aarch64/options_aarch64.h
new file mode 100644
index 000000000..32db918f3
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/options_aarch64.h
@@ -0,0 +1,71 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __OPTIONS_AARCH64_H__
+#define __OPTIONS_AARCH64_H__
+
+
+#ifdef __ASSEMBLY__
+
+/// Options:dir
+/// m - reschedule mem reads
+/// e b - bitbuff style
+/// t s x - compare style
+/// h - limit hash updates
+/// l - use longer huffman table
+/// f - fix cache read
+
+#ifndef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (32 * 1024)
+#endif
+
+#if (IGZIP_HIST_SIZE > (32 * 1024))
+#undef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (32 * 1024)
+#endif
+
+#ifdef LONGER_HUFFTABLE
+#if (IGZIP_HIST_SIZE > 8 * 1024)
+#undef IGZIP_HIST_SIZE
+#define IGZIP_HIST_SIZE (8 * 1024)
+#endif
+#endif
+
+/// (h) limit hash update
+#define LIMIT_HASH_UPDATE
+
+/// (f) fix cache read problem
+#define FIX_CACHE_READ
+
+#define ISAL_DEF_MAX_HDR_SIZE 328
+
+
+
+#endif
+#endif
diff --git a/src/isa-l/igzip/aarch64/stdmac_aarch64.h b/src/isa-l/igzip/aarch64/stdmac_aarch64.h
new file mode 100644
index 000000000..39afbc640
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/stdmac_aarch64.h
@@ -0,0 +1,57 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef __STDMAC_AARCH64_H__
+#define __STDMAC_AARCH64_H__
+
+#ifdef __ASSEMBLY__
+
+#define DEBUG_STACK 144
+
+.macro push_stack
+ stp x29, x30,[sp,0-DEBUG_STACK]!
+ mov x29, sp
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ stp x23, x24, [sp, 48]
+ stp x25, x26, [sp, 64]
+ stp x27, x28, [sp, 80]
+.endm
+.macro pop_stack
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x23, x24, [sp, 48]
+ ldp x25, x26, [sp, 64]
+ ldp x27, x28, [sp, 80]
+
+ ldp x29, x30, [sp], DEBUG_STACK
+.endm
+
+#endif
+#endif