diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/isa-l/igzip/aarch64 | |
parent | Initial commit. (diff) | |
download | ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l/igzip/aarch64')
20 files changed, 4093 insertions, 0 deletions
diff --git a/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h b/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h new file mode 100644 index 000000000..88eb18dfd --- /dev/null +++ b/src/isa-l/igzip/aarch64/bitbuf2_aarch64.h @@ -0,0 +1,57 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __BITBUF2_AARCH64_H__ +#define __BITBUF2_AARCH64_H__ +#include "options_aarch64.h" + +#ifdef __ASSEMBLY__ +.macro update_bits stream:req,code:req,code_len:req,m_bits:req,m_bit_count:req \ + m_out_buf:req + + lsl x_\code,x_\code,x_\m_bit_count + orr x_\m_bits,x_\code,x_\m_bits + add x_\m_bit_count,x_\code_len,x_\m_bit_count + + str x_\m_bits,[x_\m_out_buf] + + and w_\code,w_\m_bit_count,-8 + lsr w_\code_len,w_\m_bit_count,3 + add x_\m_out_buf,x_\m_out_buf,w_\code_len,uxtw + sub w_\m_bit_count,w_\m_bit_count,w_\code + lsr x_\m_bits,x_\m_bits,x_\code + + str x_\m_bits,[stream,_internal_state_bitbuf_m_bits] + str w_\m_bit_count,[stream,_internal_state_bitbuf_m_bit_count] + str x_\m_out_buf,[stream,_internal_state_bitbuf_m_out_buf] + + +.endm +#endif +#endif diff --git a/src/isa-l/igzip/aarch64/data_struct_aarch64.h b/src/isa-l/igzip/aarch64/data_struct_aarch64.h new file mode 100644 index 000000000..5f8676d34 --- /dev/null +++ b/src/isa-l/igzip/aarch64/data_struct_aarch64.h @@ -0,0 +1,215 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef __AARCH64_DATA_STRUCT_H__ +#define __AARCH64_DATA_STRUCT_H__ +#ifdef __ASSEMBLY__ + +.macro start_struct name:req + .set _FIELD_OFFSET,0 + .set _STRUCT_ALIGN,0 +.endm +.macro end_struct name:req + .set _\name\()_size,_FIELD_OFFSET + .set _\name\()_align,_STRUCT_ALIGN +.endm +.macro field name:req, size:req, align:req + .set _FIELD_OFFSET,(_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + .set \name,_FIELD_OFFSET + .set _FIELD_OFFSET,_FIELD_OFFSET + \size + .if \align > _STRUCT_ALIGN + .set _STRUCT_ALIGN, \align + .endif +.endm + +/// BitBuf2 +start_struct BitBuf2 + /// name size align + field _m_bits, 8, 8 + field _m_bit_count, 4, 4 + field _m_out_buf, 8, 8 + field _m_out_end, 8, 8 + field _m_out_start, 8, 8 +end_struct BitBuf2 + +/// isal_mod_hist +#define HIST_ELEM_SIZE 4 +start_struct isal_mod_hist + /// name size align + field _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE + field _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE +end_struct isal_mod_hist + +/// hufftables_icf +#define HUFF_CODE_SIZE 4 +start_struct hufftables_icf + /// name size align + field _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE + field _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE +end_struct hufftables_icf + +/// hash8k_buf +start_struct hash8k_buf + /// name size align + field _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2 +end_struct hash8k_buf + +/// hash_map_buf +start_struct hash_map_buf + /// name size align + field _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2 + field _matches_next, 8, 8 + field _matches_end, 8, 8 + field _matches, 4*4*1024, 4 + field _overflow, 4*LA, 4 +end_struct hash_map_buf + +/// level_buf +#define DEF_MAX_HDR_SIZE 328 +start_struct level_buf + /// name size align + field _encode_tables, _hufftables_icf_size, _hufftables_icf_align + field _hist, _isal_mod_hist_size, _isal_mod_hist_align + field _deflate_hdr_count, 4, 4 + field _deflate_hdr_extra_bits,4, 4 + field _deflate_hdr, DEF_MAX_HDR_SIZE, 1 + field _icf_buf_next, 8, 8 + field _icf_buf_avail_out, 8, 8 + field _icf_buf_start, 8, 8 + field _lvl_extra, _hash_map_buf_size, _hash_map_buf_align +end_struct level_buf + +.set _hash8k_hash_table , _lvl_extra + _hash8k_table +.set _hash_map_hash_table , _lvl_extra + _hash_table +.set _hash_map_matches_next , _lvl_extra + _matches_next +.set _hash_map_matches_end , _lvl_extra + _matches_end +.set _hash_map_matches , _lvl_extra + _matches +.set _hist_lit_len , _hist+_ll_hist +.set _hist_dist , _hist+_d_hist + +/// isal_zstate +start_struct isal_zstate + /// name size align + field _total_in_start,4, 4 + field _block_next, 4, 4 + field _block_end, 4, 4 + field _dist_mask, 4, 4 + field _hash_mask, 4, 4 + field _state, 4, 4 + field _bitbuf, _BitBuf2_size, _BitBuf2_align + field _crc, 4, 4 + field _has_wrap_hdr, 1, 1 + field _has_eob_hdr, 1, 1 + field _has_eob, 1, 1 + field _has_hist, 1, 1 + field _has_level_buf_init, 2, 2 + field _count, 4, 4 + field _tmp_out_buff, 16, 1 + field _tmp_out_start, 4, 4 + field _tmp_out_end, 4, 4 + field _b_bytes_valid, 4, 4 + field _b_bytes_processed, 4, 4 + field _buffer, BSIZE, 1 + field _head, IGZIP_LVL0_HASH_SIZE*2, 2 +end_struct isal_zstate + +.set _bitbuf_m_bits , _bitbuf+_m_bits +.set _bitbuf_m_bit_count , _bitbuf+_m_bit_count +.set _bitbuf_m_out_buf , _bitbuf+_m_out_buf +.set _bitbuf_m_out_end , _bitbuf+_m_out_end +.set _bitbuf_m_out_start , _bitbuf+_m_out_start + +/// isal_zstream +start_struct isal_zstream + /// name size align + field _next_in, 8, 8 + field _avail_in, 4, 4 + field _total_in, 4, 4 + field _next_out, 8, 8 + field _avail_out, 4, 4 + field _total_out, 4, 4 + field _hufftables, 8, 8 + field _level, 4, 4 + field _level_buf_size, 4, 4 + field _level_buf, 8, 8 + field _end_of_stream, 2, 2 + field _flush, 2, 2 + field _gzip_flag, 2, 2 + field _hist_bits, 2, 2 + field _internal_state, _isal_zstate_size, _isal_zstate_align +end_struct isal_zstream + +.set _internal_state_total_in_start , _internal_state+_total_in_start +.set _internal_state_block_next , _internal_state+_block_next +.set _internal_state_block_end , _internal_state+_block_end +.set _internal_state_b_bytes_valid , _internal_state+_b_bytes_valid +.set _internal_state_b_bytes_processed , _internal_state+_b_bytes_processed +.set _internal_state_crc , _internal_state+_crc +.set _internal_state_dist_mask , _internal_state+_dist_mask +.set _internal_state_hash_mask , _internal_state+_hash_mask +.set _internal_state_bitbuf , _internal_state+_bitbuf +.set _internal_state_state , _internal_state+_state +.set _internal_state_count , _internal_state+_count +.set _internal_state_tmp_out_buff , _internal_state+_tmp_out_buff +.set _internal_state_tmp_out_start , _internal_state+_tmp_out_start +.set _internal_state_tmp_out_end , _internal_state+_tmp_out_end +.set _internal_state_has_wrap_hdr , _internal_state+_has_wrap_hdr +.set _internal_state_has_eob , _internal_state+_has_eob +.set _internal_state_has_eob_hdr , _internal_state+_has_eob_hdr +.set _internal_state_has_hist , _internal_state+_has_hist +.set _internal_state_has_level_buf_init , _internal_state+_has_level_buf_init +.set _internal_state_buffer , _internal_state+_buffer +.set _internal_state_head , _internal_state+_head +.set _internal_state_bitbuf_m_bits , _internal_state+_bitbuf_m_bits +.set _internal_state_bitbuf_m_bit_count , _internal_state+_bitbuf_m_bit_count +.set _internal_state_bitbuf_m_out_buf , _internal_state+_bitbuf_m_out_buf +.set _internal_state_bitbuf_m_out_end , _internal_state+_bitbuf_m_out_end +.set _internal_state_bitbuf_m_out_start , _internal_state+_bitbuf_m_out_start + +/// Internal States +.set ZSTATE_NEW_HDR , 0 +.set ZSTATE_HDR , (ZSTATE_NEW_HDR + 1) +.set ZSTATE_CREATE_HDR , (ZSTATE_HDR + 1) +.set ZSTATE_BODY , (ZSTATE_CREATE_HDR + 1) +.set ZSTATE_FLUSH_READ_BUFFER , (ZSTATE_BODY + 1) +.set ZSTATE_FLUSH_ICF_BUFFER , (ZSTATE_FLUSH_READ_BUFFER + 1) +.set ZSTATE_TYPE0_HDR , (ZSTATE_FLUSH_ICF_BUFFER + 1) +.set ZSTATE_TYPE0_BODY , (ZSTATE_TYPE0_HDR + 1) +.set ZSTATE_SYNC_FLUSH , (ZSTATE_TYPE0_BODY + 1) +.set ZSTATE_FLUSH_WRITE_BUFFER , (ZSTATE_SYNC_FLUSH + 1) +.set ZSTATE_TRL , (ZSTATE_FLUSH_WRITE_BUFFER + 1) + +.set _NO_FLUSH , 0 +.set _SYNC_FLUSH , 1 +.set _FULL_FLUSH , 2 +.set _STORED_BLK , 0 +.set IGZIP_NO_HIST , 0 +.set IGZIP_HIST , 1 +.set IGZIP_DICT_HIST , 2 +#endif +#endif diff --git a/src/isa-l/igzip/aarch64/encode_df.S b/src/isa-l/igzip/aarch64/encode_df.S new file mode 100644 index 000000000..6dddddf0a --- /dev/null +++ b/src/isa-l/igzip/aarch64/encode_df.S @@ -0,0 +1,159 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a+crc + .text + .align 2 +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + .global encode_deflate_icf_aarch64 + .type encode_deflate_icf_aarch64, %function + +/* + struct deflate_icf *encode_deflate_icf_base(struct deflate_icf *next_in, + struct deflate_icf *end_in, struct BitBuf2 *bb, + struct hufftables_icf *hufftables) + +*/ + + // parameters + declare_generic_reg next_in, 0,x + declare_generic_reg end_in, 1,x + declare_generic_reg bb, 2,x + declare_generic_reg hufftables, 3,x + + // local variable + declare_generic_reg bb_out_end, 4,x + declare_generic_reg bb_bit_count, 5,w + declare_generic_reg dist_extra, 6,x + declare_generic_reg dist_lit_table, 7,x + declare_generic_reg code_and_extra, 8,x + declare_generic_reg bb_out_buf, 9,x + declare_generic_reg bb_bits, 10,x + declare_generic_reg d_length, 11,x + declare_generic_reg l_length, 12,x + declare_generic_reg d_extra_bit_count, 13,x + + declare_generic_reg code_sum, 4,x + declare_generic_reg count_sum, 7,x + + declare_generic_reg tmp0, 14,x + declare_generic_reg tmp1, 15,x + +// bit buffer offset +.equ offset_m_bits, 0 +.equ offset_m_bit_count, 8 +.equ offset_m_out_buf, 16 +.equ offset_m_out_end, 24 + +encode_deflate_icf_aarch64: + cmp next_in, end_in + bcs .done + + ldp bb_out_buf, bb_out_end, [bb, offset_m_out_buf] + cmp bb_out_end, bb_out_buf + bcc .done + + ldr bb_bit_count, [bb, offset_m_bit_count] + ldr bb_bits, [bb, offset_m_bits] + b .loop_start + + .align 3 +.loop: + ldr bb_out_end, [bb, offset_m_out_end] + cmp bb_out_end, bb_out_buf + bcc .done + +.loop_start: + ldrh w_code_and_extra, [next_in] + add next_in, next_in, 4 + ldr w_dist_lit_table, [next_in, -4] + and code_and_extra, code_and_extra, 1023 + + ldrh w_dist_extra, [next_in, -2] + add code_and_extra, code_and_extra, 31 + ubfx x_dist_lit_table, x_dist_lit_table, 10, 9 + add x_tmp0, hufftables, code_and_extra, lsl 2 + ubfx x_dist_extra, x_dist_extra, 3, 13 + lsl x_dist_lit_table, x_dist_lit_table, 2 + + ldr w_code_and_extra, [hufftables, code_and_extra, lsl 2] + add x_d_extra_bit_count, hufftables, x_dist_lit_table + ldrb w_l_length, [x_tmp0, 3] + and code_and_extra, code_and_extra, 0xffffff + ldrh w_code_sum, [hufftables, x_dist_lit_table] + ldrb w_d_length, [x_d_extra_bit_count, 3] + add w_l_length, w_l_length, bb_bit_count + ldrb w_d_extra_bit_count, [x_d_extra_bit_count, 2] + + lsl x_tmp0, code_and_extra, x_bb_bit_count + add bb_bit_count, w_d_length, w_l_length + lsl x_code_sum, x_code_sum, x_l_length + orr x_code_sum, x_code_sum, x_tmp0 + add w_count_sum, w_d_extra_bit_count, bb_bit_count + lsl x_bb_bit_count, x_dist_extra, x_bb_bit_count + + orr x_bb_bit_count, x_bb_bit_count, bb_bits + orr x_tmp0, x_code_sum, x_bb_bit_count // me->m_bits => x_tmp0 + str x_tmp0, [bb, offset_m_bits] // me->m_bits => x_tmp0 + str w_count_sum, [bb, offset_m_bit_count] + + str x_tmp0, [bb_out_buf] // me->m_bits => x_tmp0 + ldr bb_bit_count, [bb, offset_m_bit_count] + ldr bb_bits, [bb, offset_m_bits] + and w_tmp0, bb_bit_count, -8 // bits => w_tmp0 + ldr bb_out_buf, [bb, offset_m_out_buf] + lsr w_tmp1, bb_bit_count, 3 // bits/8 => w_tmp1 + lsr bb_bits, bb_bits, x_tmp0 // bits => x_tmp0 + sub bb_bit_count, bb_bit_count, w_tmp0 // bits => w_tmp0 + add bb_out_buf, bb_out_buf, x_tmp1 // bits/8 => x_tmp1 + str bb_bits, [bb,offset_m_bits] + str bb_bit_count, [bb, offset_m_bit_count] + str bb_out_buf, [bb, offset_m_out_buf] + + cmp end_in, next_in + bhi .loop + +.done: + ret + .size encode_deflate_icf_aarch64, .-encode_deflate_icf_aarch64 diff --git a/src/isa-l/igzip/aarch64/gen_icf_map.S b/src/isa-l/igzip/aarch64/gen_icf_map.S new file mode 100644 index 000000000..fe04ee4c3 --- /dev/null +++ b/src/isa-l/igzip/aarch64/gen_icf_map.S @@ -0,0 +1,266 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc+crypto + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro tzbytecnt param0:req,param1:req + rbit x_\param1, x_\param0 + cmp x_\param0, 0 + clz x_\param1, x_\param1 + mov w_\param0, 8 + lsr w_\param1, w_\param1, 3 + csel w_\param0, w_\param1, w_\param0, ne +.endm + +.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req + orr w_\param1, w_\param1, w_\param3, lsl 19 + orr w_\param1, w_\param1, w_\param2, lsl 10 + str w_\param1, [x_\param0] +.endm + + .align 2 + .global gen_icf_map_h1_aarch64 + .type gen_icf_map_h1_aarch64, %function + + /* arguments */ + declare_generic_reg stream_param, 0,x + declare_generic_reg matches_icf_lookup_param, 1,x + declare_generic_reg input_size_param, 2,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + declare_generic_reg param3, 3,x + + /* return */ + declare_generic_reg ret_val, 0,x + + /* variables */ + declare_generic_reg input_size, 3,x + declare_generic_reg next_in, 4,x + declare_generic_reg matches_icf_lookup, 6,x + declare_generic_reg hash_table, 7,x + declare_generic_reg end_in, 8,x + declare_generic_reg file_start, 9,x + declare_generic_reg hash_mask, 10,w + declare_generic_reg hist_size, 11,w + declare_generic_reg stream_saved, 12,x + declare_generic_reg literal_32, 13,w + declare_generic_reg literal_1, 14,w + declare_generic_reg dist, 15,w + + declare_generic_reg tmp_has_hist, 0,w + declare_generic_reg tmp_offset_hash_table, 1,x + declare_generic_reg tmp0, 0,x + declare_generic_reg tmp1, 1,x + declare_generic_reg tmp2, 2,x + declare_generic_reg tmp3, 3,x + declare_generic_reg tmp5, 5,x + +/* constant */ +.equ ISAL_LOOK_AHEAD, 288 +.equ SHORTEST_MATCH, 4 +.equ LEN_OFFSET, 254 + +/* mask */ +.equ mask_10bit, 1023 +.equ mask_lit_dist, 0x7800 + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_dist_mask, 76 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_hash_map_hash_table, 4712 + +/* +uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size) +*/ + +gen_icf_map_h1_aarch64: + cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 + bls .fast_exit + stp x29, x30, [sp, -16]! + + mov stream_saved, stream_param + mov matches_icf_lookup, matches_icf_lookup_param + mov x29, sp + + ldrb tmp_has_hist, [stream_saved, offset_state_has_hist] + mov tmp_offset_hash_table, offset_hash_map_hash_table + ldr end_in, [stream_saved, offset_next_in] + mov input_size, input_size_param + ldr hash_table, [stream_saved, offset_level_buf] + ldr w_file_start, [stream_saved, offset_total_in] + ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask] + add hash_table, hash_table, tmp_offset_hash_table + sub file_start, end_in, file_start + cbz tmp_has_hist, .igzip_no_hist + b .while_check1 + + .align 3 +.igzip_no_hist: + ldrb w_tmp1, [end_in] + add next_in, end_in, 1 + ldrh w_tmp0, [matches_icf_lookup] + bfi w_tmp0, w_tmp1, 0, 10 + strh w_tmp0, [matches_icf_lookup] + ldr w_tmp0, [matches_icf_lookup] + and w_tmp0, w_tmp0, mask_10bit + orr w_tmp0, w_tmp0, mask_lit_dist + str w_tmp0, [matches_icf_lookup], 4 + ldr w_tmp0, [end_in] + crc32cw w_tmp0, wzr, w_tmp0 + + and w_tmp5, w_tmp0, hash_mask + sub x_tmp1, end_in, file_start + mov w_tmp2, 1 + mov x_tmp0, 1 + strh w_tmp1, [hash_table, x_tmp5, lsl 1] + strb w_tmp2, [stream_saved, offset_state_has_hist] + b .while_check2 + +.while_check1: + mov next_in, end_in + mov x_tmp0, 0 + +.while_check2: + sub input_size, input_size, #288 + add end_in, end_in, input_size + cmp next_in, end_in + bcs .exit + mov literal_32, 32 + mov literal_1, 1 + b .while_loop + + .align 3 +.new_match_found: + clz w_tmp5, w_tmp2 + add w_tmp1, w_tmp0, LEN_OFFSET + sub w_tmp5, literal_32, w_tmp5 + cmp dist, 2 + sub w_tmp5, w_tmp5, #2 + bls .skip_compute_dist_icf_code + + lsl w_tmp3, literal_1, w_tmp5 + sub w_tmp3, w_tmp3, #1 + lsr w_tmp0, w_tmp2, w_tmp5 + and w_tmp3, w_tmp3, w_tmp2 + add w_tmp2, w_tmp0, w_tmp5, lsl 1 + +.skip_compute_dist_icf_code: + mov param0, matches_icf_lookup + write_deflate_icf param0,param1,param2,param3 + + add next_in, next_in, 1 + add matches_icf_lookup, matches_icf_lookup, 4 + cmp next_in, end_in + beq .save_with_exit + +.while_loop: + ldr w_tmp0, [next_in] + crc32cw w_tmp0, wzr, w_tmp0 + + and w_tmp0, w_tmp0, hash_mask + sub x_tmp1, next_in, file_start + lsl x_tmp0, x_tmp0, 1 + sub w_tmp2, w_tmp1, #1 + ldrh w_tmp3, [hash_table, x_tmp0] + strh w_tmp1, [hash_table, x_tmp0] + sub w_tmp2, w_tmp2, w_tmp3 + and w_tmp2, w_tmp2, hist_size + add dist, w_tmp2, 1 + ldr x_tmp0, [next_in] + sub x_tmp1, next_in, w_dist, uxtw + ldr x_tmp1, [x_tmp1] + eor x_tmp0, x_tmp1, x_tmp0 + tzbytecnt param0,param1 + + cmp w_tmp0, (SHORTEST_MATCH-1) + mov w_tmp3, 0 + bhi .new_match_found + + ldrb w_param1, [next_in] + mov x_param0, matches_icf_lookup + mov w_param3, 0 + mov w_param2, 0x1e + write_deflate_icf param0,param1,param2,param3 + + add next_in, next_in, 1 + add matches_icf_lookup, matches_icf_lookup, 4 + cmp next_in, end_in + bne .while_loop + +.save_with_exit: + ldr ret_val, [stream_saved, offset_next_in] + sub ret_val, next_in, ret_val + +.exit: + ldp x29, x30, [sp], 16 + ret + + .align 3 +.fast_exit: + mov ret_val, 0 + ret + .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 diff --git a/src/isa-l/igzip/aarch64/huffman_aarch64.h b/src/isa-l/igzip/aarch64/huffman_aarch64.h new file mode 100644 index 000000000..4ceae23f4 --- /dev/null +++ b/src/isa-l/igzip/aarch64/huffman_aarch64.h @@ -0,0 +1,173 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __HUFFMAN_AARCH64_H__ +#define __HUFFMAN_AARCH64_H__ + +#ifdef __ASSEMBLY__ +#ifdef LONGER_HUFFTABLE + #if (D > 8192) + #error History D is larger than 8K + #else + #define DIST_TABLE_SIZE 8192 + #define DECODE_OFFSET 26 + #endif +#else + #define DIST_TABLE_SIZE 2 + #define DECODE_OFFSET 0 +#endif + +#define LEN_TABLE_SIZE 256 +#define LIT_TABLE_SIZE 257 + +#define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) //328+8 +#define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) //336-4 +#define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) //332 + 2*4 -4*3 =328 +#define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) +#define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) +#define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) +#define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) + +#define IGZIP_DECODE_OFFSET 0 +#define IGZIP_DIST_TABLE_SIZE 2 + +.macro get_len_code hufftables:req,length:req,code:req,code_len:req,tmp0:req + add x_\tmp0,\hufftables,LEN_TABLE_OFFSET + ldr w_\code_len,[x_\tmp0,x_\length,lsl 2] + lsr w_\code, w_\code_len , 5 + and x_\code_len,x_\code_len,0x1f +.endm + +.macro get_lit_code hufftables:req,lit:req,code:req,code_len:req + add x_\code,\hufftables,LIT_TABLE_OFFSET + ldrh w_\code,[x_\code,x_\lit,lsl 1] + add x_\code_len,\hufftables,LIT_TABLE_SIZES_OFFSET + ldrb w_\code_len,[x_\code_len,x_\lit] +.endm + +.macro get_dist_code hufftables:req,dist:req,code:req,code_len:req,tmp0:req,tmp1:req,tmp2:req + cmp dist,DIST_TABLE_SIZE + bhi _compute_dist_code + add x_\tmp0,\hufftables,DIST_TABLE_OFFSET + ldr w_\code_len,[x_\tmp0,x_\dist,lsl 2] + lsr w_\code, w_\code_len , 5 + and x_\code_len,x_\code_len,0x1f + b _end_get_dist_code +_compute_dist_code: + and w_\dist,w_\dist,0xffff + sub w_\dist,w_\dist,1 + clz w_\tmp0,w_\dist + mov w_\tmp1,30 + sub w_\tmp0,w_\tmp1,w_\tmp0 //tmp0== num_extra_bists + mov w_\tmp1,1 + lsl w_\tmp1,w_\tmp1,w_\tmp0 + sub w_\tmp1,w_\tmp1,1 + and w_\tmp1,w_\tmp1,w_\dist //tmp1=extra_bits + asr w_\dist,w_\dist,w_\tmp0 + lsl w_\tmp2,w_\tmp0,1 + add w_\tmp2,w_\dist,w_\tmp2 //tmp2=sym + + add x_\code,\hufftables,DCODE_TABLE_OFFSET - IGZIP_DECODE_OFFSET*2 + add x_\code_len,\hufftables,DCODE_TABLE_SIZE_OFFSET - IGZIP_DECODE_OFFSET + ldrh w_\code,[x_\code,x_\tmp2,lsl 1] + ldrb w_\code_len,[x_\code_len,x_\tmp2] + lsl w_\tmp1,w_\tmp1,w_\code_len + orr w_\code,w_\code,w_\tmp1 + add w_\code_len,w_\code_len,w_\tmp0 + + //compute_dist_code +_end_get_dist_code: +.endm + + +.macro compare_258_bytes str0:req,str1:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 +_compare_258_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + + cmp x_\match_length,257 + ccmp x_\tmp0,8,0,ls + beq _compare_258_loop + + cmp x_\match_length,258 + mov x_\tmp1,258 + csel x_\match_length,x_\match_length,x_\tmp1,ls +.endm + +.macro compare_max_258_bytes str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 + mov x_\tmp0,258 + cmp x_\max_length,x_\tmp0 + csel x_\max_length,x_\max_length,x_\tmp0,ls +_compare_258_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + + cmp x_\max_length,x_\match_length + ccmp x_\tmp0,8,0,hi + beq _compare_258_loop + + cmp x_\match_length,x_\max_length + csel x_\match_length,x_\match_length,x_\max_length,ls +.endm + +.macro compare_aarch64 str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 +_compare_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + cmp x_\max_length,x_\match_length + ccmp x_\tmp0,8,0,hi + beq _compare_loop + + cmp x_\match_length,x_\max_length + csel x_\match_length,x_\match_length,x_\max_length,ls +.endm + +#endif +#endif diff --git a/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S b/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S new file mode 100644 index 000000000..3255ba4c7 --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_decode_huffman_code_block_aarch64.S @@ -0,0 +1,689 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + .text + .align 2 +#include "lz0a_const_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +#define ENABLE_TBL_INSTRUCTION 1 + +.macro start_struct name:req + .set _FIELD_OFFSET,0 + .set _STRUCT_ALIGN,0 +.endm +.macro end_struct name:req + .set _\name\()_size,_FIELD_OFFSET + .set _\name\()_align,_STRUCT_ALIGN +.endm +.macro field name:req, size:req, align:req + .set _FIELD_OFFSET,(_FIELD_OFFSET + (\align) - 1) & (~ ((\align)-1)) + .set \name,_FIELD_OFFSET + .set _FIELD_OFFSET,_FIELD_OFFSET + \size + .if \align > _STRUCT_ALIGN + .set _STRUCT_ALIGN, \align + .endif +.endm + +#define ISAL_DECODE_LONG_BITS 12 +#define ISAL_DECODE_SHORT_BITS 10 + +#define L_REM (21 - ISAL_DECODE_LONG_BITS) +#define S_REM (15 - ISAL_DECODE_SHORT_BITS) +#define L_DUP ((1 << L_REM) - (L_REM + 1)) +#define S_DUP ((1 << S_REM) - (S_REM + 1)) +#define L_UNUSED ((1 << L_REM) - (1 << ((L_REM)/2)) - (1 << ((L_REM + 1)/2)) + 1) +#define S_UNUSED ((1 << S_REM) - (1 << ((S_REM)/2)) - (1 << ((S_REM + 1)/2)) + 1) +#define L_SIZE (286 + L_DUP + L_UNUSED) +#define S_SIZE (30 + S_DUP + S_UNUSED) +#define HUFF_CODE_LARGE_LONG_ALIGNED (L_SIZE + (-L_SIZE & 0xf)) +#define HUFF_CODE_SMALL_LONG_ALIGNED (S_SIZE + (-S_SIZE & 0xf)) +#define MAX_LONG_CODE_LARGE (L_SIZE + (-L_SIZE & 0xf)) +#define MAX_LONG_CODE_SMALL (S_SIZE + (-S_SIZE & 0xf)) +#define LARGE_SHORT_CODE_SIZE 4 +#define LARGE_LONG_CODE_SIZE 2 +#define SMALL_SHORT_CODE_SIZE 2 +#define SMALL_LONG_CODE_SIZE 2 + + +// inflate_huff_code +start_struct inflate_huff_code_large + // name size align + field _short_code_lookup_large, LARGE_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_LONG_BITS)), LARGE_LONG_CODE_SIZE + field _long_code_lookup_large, LARGE_LONG_CODE_SIZE*MAX_LONG_CODE_LARGE, LARGE_SHORT_CODE_SIZE +end_struct inflate_huff_code_large + +// inflate_huff_code +start_struct inflate_huff_code_small + // name size align + field _short_code_lookup_small, SMALL_SHORT_CODE_SIZE*(1<<(ISAL_DECODE_SHORT_BITS)), SMALL_LONG_CODE_SIZE + field _long_code_lookup_small, SMALL_LONG_CODE_SIZE*MAX_LONG_CODE_SMALL, SMALL_SHORT_CODE_SIZE +end_struct inflate_huff_code_small + +// inflate_state +start_struct inflate_state + // name size align + field _next_out, 8, 8 + field _avail_out, 4, 4 + field _total_out, 4, 4 + field _next_in, 8, 8 + field _read_in, 8, 8 + field _avail_in, 4, 4 + field _read_in_length, 4, 4 + field _lit_huff_code, _inflate_huff_code_large_size, _inflate_huff_code_large_align + field _dist_huff_code, _inflate_huff_code_small_size, _inflate_huff_code_small_align + field _block_state, 4, 4 + field _dict_length, 4, 4 + field _bfinal, 4, 4 + field _crc_flag, 4, 4 + field _crc, 4, 4 + field _hist_bits, 4, 4 + field _type0_block_len, 4, 4 + field _write_overflow_lits, 4, 4 + field _write_overflow_len, 4, 4 + field _copy_overflow_len, 4, 4 + field _copy_overflow_dist, 4, 4 +end_struct inflate_state + +.set _lit_huff_code_short_code_lookup , _lit_huff_code+_short_code_lookup_large +.set _lit_huff_code_long_code_lookup , _lit_huff_code+_long_code_lookup_large +.set _dist_huff_code_short_code_lookup , _dist_huff_code+_short_code_lookup_small +.set _dist_huff_code_long_code_lookup , _dist_huff_code+_long_code_lookup_small +.set ISAL_BLOCK_NEW_HDR , 0 +.set ISAL_BLOCK_HDR , 1 +.set ISAL_BLOCK_TYPE0 , 2 +.set ISAL_BLOCK_CODED , 3 +.set ISAL_BLOCK_INPUT_DONE , 4 +.set ISAL_BLOCK_FINISH , 5 + +/* Inflate Return values */ +#define ISAL_DECOMP_OK 0 /* No errors encountered while decompressing */ +#define ISAL_END_INPUT 1 /* End of input reached */ +#define ISAL_OUT_OVERFLOW 2 /* End of output reached */ +#define ISAL_NAME_OVERFLOW 3 /* End of gzip name buffer reached */ +#define ISAL_COMMENT_OVERFLOW 4 /* End of gzip name buffer reached */ +#define ISAL_EXTRA_OVERFLOW 5 /* End of extra buffer reached */ +#define ISAL_NEED_DICT 6 /* Stream needs a dictionary to continue */ +#define ISAL_INVALID_BLOCK -1 /* Invalid deflate block found */ +#define ISAL_INVALID_SYMBOL -2 /* Invalid deflate symbol found */ +#define ISAL_INVALID_LOOKBACK -3 /* Invalid lookback distance found */ +#define ISAL_INVALID_WRAPPER -4 /* Invalid gzip/zlib wrapper found */ +#define ISAL_UNSUPPORTED_METHOD -5 /* Gzip/zlib wrapper specifies unsupported compress method */ +#define ISAL_INCORRECT_CHECKSUM -6 /* Incorrect checksum found */ + + +#define ISAL_DEF_MAX_CODE_LEN 15 +#define LARGE_SHORT_SYM_LEN 25 +#define LARGE_SHORT_SYM_MASK ((1 << LARGE_SHORT_SYM_LEN) - 1) +#define LARGE_LONG_SYM_LEN 10 +#define LARGE_LONG_SYM_MASK ((1 << LARGE_LONG_SYM_LEN) - 1) +#define LARGE_SHORT_CODE_LEN_OFFSET 28 +#define LARGE_LONG_CODE_LEN_OFFSET 10 +#define LARGE_FLAG_BIT_OFFSET 25 +#define LARGE_FLAG_BIT (1 << LARGE_FLAG_BIT_OFFSET) +#define LARGE_SYM_COUNT_OFFSET 26 +#define LARGE_SYM_COUNT_LEN 2 +#define LARGE_SYM_COUNT_MASK ((1 << LARGE_SYM_COUNT_LEN) - 1) +#define LARGE_SHORT_MAX_LEN_OFFSET 26 + +#define SMALL_SHORT_SYM_LEN 9 +#define SMALL_SHORT_SYM_MASK ((1 << SMALL_SHORT_SYM_LEN) - 1) +#define SMALL_LONG_SYM_LEN 9 +#define SMALL_LONG_SYM_MASK ((1 << SMALL_LONG_SYM_LEN) - 1) +#define SMALL_SHORT_CODE_LEN_OFFSET 11 +#define SMALL_LONG_CODE_LEN_OFFSET 10 +#define SMALL_FLAG_BIT_OFFSET 10 +#define SMALL_FLAG_BIT (1 << SMALL_FLAG_BIT_OFFSET) + +#define DIST_SYM_OFFSET 0 +#define DIST_SYM_LEN 5 +#define DIST_SYM_MASK ((1 << DIST_SYM_LEN) - 1) +#define DIST_SYM_EXTRA_OFFSET 5 +#define DIST_SYM_EXTRA_LEN 4 +#define DIST_SYM_EXTRA_MASK ((1 << DIST_SYM_EXTRA_LEN) - 1) + +#define MAX_LIT_LEN_CODE_LEN 21 +#define MAX_LIT_LEN_COUNT (MAX_LIT_LEN_CODE_LEN + 2) +#define MAX_LIT_LEN_SYM 512 +#define LIT_LEN_ELEMS 514 + +#define INVALID_SYMBOL 0x1FFF +#define INVALID_CODE 0xFFFFFF + +#define MIN_DEF_MATCH 3 + +#define TRIPLE_SYM_FLAG 0 +#define DOUBLE_SYM_FLAG TRIPLE_SYM_FLAG + 1 +#define SINGLE_SYM_FLAG DOUBLE_SYM_FLAG + 1 +#define DEFAULT_SYM_FLAG TRIPLE_SYM_FLAG + +#define SINGLE_SYM_THRESH (2 * 1024) +#define DOUBLE_SYM_THRESH (4 * 1024) + + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + +.macro inflate_in_load_read_byte + cmp read_in_length,56 + bgt 1f + cbz avail_in,1f + ldrb w_temp,[next_in],1 + sub avail_in,avail_in,1 + lsl temp,temp,x_read_in_length + orr read_in,read_in,temp + add read_in_length,read_in_length,8 + uxtw x_read_in_length,read_in_length + +.endm + +.macro inflate_in_load + + cmp read_in_length, 63 + bgt 1f + + /*if (state->avail_in >= 8) */ + cmp avail_in, 7 + bhi 2f + + // loop max 7 times + // while (state->read_in_length < 57 && state->avail_in > 0) + inflate_in_load_read_byte + inflate_in_load_read_byte + inflate_in_load_read_byte + inflate_in_load_read_byte + inflate_in_load_read_byte + inflate_in_load_read_byte + inflate_in_load_read_byte + b 1f +2: + add new_bytes,read_in_length,7 + mov w_temp,8 + lsr new_bytes,new_bytes,3 + sub new_bytes,w_temp,new_bytes + ldr temp,[next_in] + lsl temp,temp,x_read_in_length + orr read_in,read_in,temp + add next_in,next_in,new_bytes,uxtb + add read_in_length,read_in_length,new_bytes,lsl 3 + sub avail_in,avail_in,new_bytes + +1: +.endm + +.macro copy_word + sub repeat_length,repeat_length,#4 + ldr w_arg0, [arg1],4 + cmp repeat_length, 3 + str w_arg0, [next_out],4 + bls load_byte_less_than_4 +.endm + + + .global decode_huffman_code_block_stateless_aarch64 + .type decode_huffman_code_block_stateless_aarch64, %function +/* + void decode_huffman_code_block_stateless_aarch64( + struct inflate_state *state, + uint8_t * start_out) +*/ + declare_generic_reg arg0, 0, x + declare_generic_reg arg1, 1, x + declare_generic_reg arg2, 2, x + + declare_generic_reg state, 11,x + declare_generic_reg start_out, 18,x + + declare_generic_reg read_in, 3,x + declare_generic_reg read_in_length, 4,w + declare_generic_reg sym_count, 5,w + declare_generic_reg next_bits, 6,w + declare_generic_reg next_lits, 6,w + declare_generic_reg avail_in, 20,w + declare_generic_reg next_in, 23,x + + declare_generic_reg temp, 16,x //local temp variable + declare_generic_reg new_bytes, 7,w //temp variable + declare_generic_reg copy_overflow_length, 28,w + + + + declare_generic_reg block_state, 8,w + declare_generic_reg block_state_adr,9,x + declare_generic_reg look_back_dist, 10,w + declare_generic_reg bfinal, 22,x + + declare_generic_reg next_out, 12,x + declare_generic_reg avail_out, 13,w + declare_generic_reg total_out, 14,w + + declare_generic_reg rfc_table, 15,x + declare_generic_reg next_sym, 17,w + declare_generic_reg next_dist, 17,w + declare_generic_reg bit_count, 19,w + + declare_generic_reg bit_mask, 21,w + declare_generic_reg next_lit, 24,w + declare_generic_reg write_overflow_len,25,w + declare_generic_reg write_overflow_lits,26,w + declare_generic_reg repeat_length,27,w + +decode_huffman_code_block_stateless_aarch64: + //save registers + push_stack + + //load variables + mov state,arg0 + mov block_state,_block_state + mov start_out,arg1 + add block_state_adr,state,block_state,uxtw + ldr block_state, [block_state_adr] + ldr bfinal, [block_state_adr,_bfinal-_block_state] + + ldr next_out, [state] + ldp avail_out,total_out,[state,_avail_out] + ldp next_in, read_in, [state,_next_in] + ldp avail_in, read_in_length, [state,_avail_in] + ldp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state] + + //init rfc_table + adrp rfc_table,rfc_lookup_table + add rfc_table,rfc_table,:lo12:rfc_lookup_table +#if ENABLE_TBL_INSTRUCTION + ld1 {v1.16b,v2.16b,v3.16b},[rfc_table] + add rfc_table,rfc_table,48 + ld1 {v4.16b-v7.16b},[rfc_table] +#endif + + /* + state->copy_overflow_length = 0; + state->copy_overflow_distance = 0; + */ + mov x_copy_overflow_length,xzr + str xzr,[block_state_adr,_copy_overflow_len-_block_state] + + /* while (state->block_state == ISAL_BLOCK_CODED) */ +block_state_loop: + cmp block_state ,ISAL_BLOCK_CODED + bne exit_func_success + + inflate_in_load + + /* save state here */ + str next_out, [state] + stp avail_out,total_out,[state,_avail_out] + stp next_in, read_in, [state,_next_in] + stp avail_in, read_in_length, [state,_avail_in] + stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state] + + /* + decode_next_lit_len(&next_lits, &sym_count, + state, &state->lit_huff_code, + &temp_dat, &temp_bytes); + */ + cmp read_in_length,ISAL_DEF_MAX_CODE_LEN + ble inflate_in_load_decode +decode_next_lit_len_start: + and x_next_bits,read_in,((1 << ISAL_DECODE_LONG_BITS) - 1) + /*next_sym = huff_code->short_code_lookup[next_bits];*/ + add next_bits,next_bits,_lit_huff_code>>2 + ldr next_sym,[state,x_next_bits,lsl 2] + /*if ((next_sym & LARGE_FLAG_BIT) == 0) {*/ + tbnz next_sym,LARGE_FLAG_BIT_OFFSET,long_code_lookup_routine + lsr bit_count,next_sym,LARGE_SHORT_CODE_LEN_OFFSET + sub read_in_length,read_in_length,bit_count + lsr read_in,read_in,x_bit_count + mov temp,0x1fff + cmp bit_count,0 + csel next_sym,next_sym,w_temp,ne + ubfx sym_count,next_sym,LARGE_SYM_COUNT_OFFSET,LARGE_SYM_COUNT_LEN + and next_lits,next_sym,LARGE_SHORT_SYM_MASK + b decode_next_lit_len_end +long_code_lookup_routine: + lsr bit_mask,next_sym,LARGE_SHORT_MAX_LEN_OFFSET + mov sym_count,1 + and next_sym,next_sym,LARGE_SHORT_SYM_MASK + mov temp,1023 + lsl bit_mask,sym_count,bit_mask + sub bit_mask,bit_mask,1 + and x_next_bits,read_in,x_bit_mask + add next_bits,next_sym,next_bits,lsr ISAL_DECODE_LONG_BITS + mov next_sym,(_lit_huff_code+_long_code_lookup_large)>>1 + add next_bits,next_bits,next_sym + ldrh next_sym,[state,x_next_bits,lsl 1] + lsr bit_count,next_sym,10 + sub read_in_length,read_in_length,bit_count + and next_lits,next_sym,w_temp + lsr read_in,read_in,x_bit_count + cmp bit_count,0 + csel next_lits,next_lits,w_temp,ne +decode_next_lit_len_end: + + /* if (sym_count == 0) */ + cbz sym_count,invalid_symbol + tbnz read_in_length,31, end_input + + /* while (sym_count > 0) start */ +sym_count_loop: + and next_lit,next_lits , 0xffff + + /*if (next_lit < 256 || sym_count > 1) {*/ + cmp next_lit,255 + ccmp sym_count,1,0,hi + beq next_lit_256 + + /* if (state->avail_out < 1) { */ + cbnz avail_out,sym_count_adjust + + mov write_overflow_len,sym_count + lsl sym_count,sym_count,3 + mov write_overflow_lits,next_lits + sub sym_count,sym_count,8 + lsr next_lits,next_lits,sym_count + mov sym_count,1 + cmp next_lits,255 + bls isal_out_overflow + cmp next_lits,256 + sub write_overflow_len,write_overflow_len,1 + beq isal_out_overflow_1 + b sym_count_loop + +sym_count_adjust: + /* + while (sym_count > 0) end + next_lits >>= 8; + sym_count--; + */ + subs sym_count,sym_count,1 + lsr next_lits,next_lits,8 + strb next_lit,[next_out],1 + sub avail_out,avail_out,1 + add total_out,total_out,1 + bne sym_count_loop + b block_state_loop + +next_lit_256: + /* if (next_lit == 256) { */ + cmp next_lit,256 + beq next_lit_eq_256 + + + /* + if (next_lit <= MAX_LIT_LEN_SYM) + sym_count must be 1 + */ + cmp next_lit,MAX_LIT_LEN_SYM + bhi invalid_symbol + sub repeat_length,next_lit,254 + /* + next_dist = + decode_next_dist(state, &state->dist_huff_code, &temp_dat, + &temp_bytes); + */ + cmp read_in_length,ISAL_DEF_MAX_CODE_LEN + ble inflate_in_load_decode_next_dist +decode_next_dist_start: + and x_next_bits,read_in,((1 << ISAL_DECODE_SHORT_BITS) - 1) + mov next_sym,_dist_huff_code>>1 + add next_bits,next_bits,next_sym + ldrh next_sym, [state,x_next_bits,lsl 1] + tbz next_sym,SMALL_FLAG_BIT_OFFSET,decode_next_dist_flag + sub bit_mask,next_sym,SMALL_FLAG_BIT + mov temp,1 + asr bit_mask,bit_mask,SMALL_SHORT_CODE_LEN_OFFSET + and next_sym,next_sym,SMALL_SHORT_SYM_MASK + lsl bit_mask,w_temp,bit_mask + sub bit_mask,bit_mask,1 + and x_next_bits,read_in,x_bit_mask + add next_bits,next_sym,next_bits,lsr ISAL_DECODE_SHORT_BITS + mov next_sym,(_dist_huff_code + _long_code_lookup_small)>>1 + add next_bits,next_bits,next_sym + ldrh next_sym,[state,x_next_bits,lsl 1] + lsr bit_count,next_sym,SMALL_LONG_CODE_LEN_OFFSET + b decode_next_dist_adjust +decode_next_dist_flag: + lsr bit_count,next_sym,SMALL_SHORT_CODE_LEN_OFFSET +decode_next_dist_adjust: + sub read_in_length,read_in_length,bit_count + lsr read_in,read_in,x_bit_count + cbnz bit_count,decode_next_dist_end + sub read_in_length,read_in_length,next_sym + mov next_sym,INVALID_SYMBOL +decode_next_dist_end: + and next_sym,next_sym,DIST_SYM_MASK + + tbnz read_in_length,31,end_input_1 + cmp next_dist,29 + bhi invalid_symbol + + +#if ENABLE_TBL_INSTRUCTION + ins v0.b[0],next_dist + tbl v0.8b,{v2.16b,v3.16b},v0.8b + umov bit_count,v0.b[0] +#else + ldrb bit_count,[rfc_table,next_dist,sxtw] +#endif + + /*inflate_in_read_bits(state, + dist_extra_bit_count, &temp_dat, + &temp_bytes); + */ + inflate_in_load + mov temp,1 + lsl temp,temp,x_bit_count + sub read_in_length,read_in_length,bit_count + sub temp,temp,1 + and x_look_back_dist,temp,read_in + lsr read_in,read_in,x_bit_count +#if ENABLE_TBL_INSTRUCTION + dup v0.8b,next_dist + add v0.8b,v1.8b,v0.8b + tbl v0.8b,{v4.16b-v7.16b},v0.8b + umov next_dist,v0.h[0] +#else + add next_dist,next_dist,16 + ldrh next_dist,[rfc_table,x_next_dist,lsl 1] +#endif + add look_back_dist,look_back_dist,next_dist + + /* + if (state->read_in_length < 0) { + */ + tbnz read_in_length,31,end_input_1 + + /* + if (state->next_out - look_back_dist < start_out) { + */ + sub temp,next_out,x_look_back_dist + cmp temp,start_out + bcc isal_invalid_lookback + /* + if (state->avail_out < repeat_length) { + */ + cmp avail_out , repeat_length + bcs decompress_data_start + sub copy_overflow_length,repeat_length,avail_out + stp copy_overflow_length,look_back_dist,[block_state_adr,_copy_overflow_len-_block_state] + mov repeat_length,avail_out + +decompress_data_start: + add total_out,total_out,repeat_length + sub avail_out,avail_out,repeat_length + sub arg1,next_out,x_look_back_dist + #if 1 + cmp look_back_dist,repeat_length + bls byte_copy_start + #else + b byte_copy_start + #endif + + + cbz repeat_length,decompress_data_end + cmp repeat_length, 3 + bls load_byte_less_than_4 //0.5% will jump +load_byte_4: + sub repeat_length, repeat_length, #4 + ldr w_arg0, [arg1],4 + cmp repeat_length, 3 + str w_arg0, [next_out],4 + bls load_byte_less_than_4 + .rept 62 + copy_word + .endr + sub repeat_length, repeat_length, #4 + ldr w_arg0, [arg1],4 + cmp repeat_length, 4 + str w_arg0, [next_out],4 + bge load_byte_4 +load_byte_less_than_4: + tbz repeat_length,0,load_byte_2 + ldrb w_arg0, [arg1],1 + sub repeat_length, repeat_length, #1 + strb w_arg0, [next_out],1 +load_byte_2: + tbz repeat_length,1,decompress_data_end + ldrh w_arg0, [arg1],2 + strh w_arg0, [next_out],2 +decompress_data_end: + + + + /* + if (state->copy_overflow_length > 0) + */ + cmp copy_overflow_length,0 + bgt isal_out_overflow + b block_state_loop +next_lit_eq_256: + /* + state->block_state = state->bfinal ? + ISAL_BLOCK_INPUT_DONE : ISAL_BLOCK_NEW_HDR; + */ + mov block_state, ISAL_BLOCK_INPUT_DONE + cmp w_bfinal,0 + csel block_state, block_state, w_bfinal, ne + str block_state, [block_state_adr] + + b block_state_loop +exit_func_success: + mov w0 , 0 +exit_func: + str next_out, [state] + stp avail_out,total_out,[state,_avail_out] + stp next_in, read_in, [state,_next_in] + stp avail_in, read_in_length, [state,_avail_in] + stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state] + + pop_stack + ret +end_input_1: +end_input: + mov w0,ISAL_END_INPUT + pop_stack + ret + +invalid_symbol: + /* + below variable was changed + */ + str next_out, [state] + stp avail_out,total_out,[state,_avail_out] + stp next_in, read_in, [state,_next_in] + stp avail_in, read_in_length, [state,_avail_in] + stp write_overflow_lits,write_overflow_len,[block_state_adr,_write_overflow_lits-_block_state] + mov w0, ISAL_INVALID_SYMBOL + b exit_func +isal_out_overflow_1: + + cmp bfinal,0 + mov block_state, ISAL_BLOCK_INPUT_DONE + csel block_state, block_state, wzr, ne + str block_state, [block_state_adr] +isal_out_overflow: + mov w0, ISAL_OUT_OVERFLOW + + b exit_func +isal_invalid_lookback: + mov w0, ISAL_INVALID_LOOKBACK + b exit_func +inflate_in_load_decode: + inflate_in_load + b decode_next_lit_len_start +inflate_in_load_decode_next_dist: + inflate_in_load + b decode_next_dist_start +byte_copy_start: + add arg2,next_out,x_repeat_length + cmp arg2, next_out + beq decompress_data_end + sub arg2,arg2,1 +byte_copy_loop: + ldrb w_arg0, [arg1] , 1 + cmp arg2, next_out + strb w_arg0, [next_out],1 + bne byte_copy_loop + b decompress_data_end + .size decode_huffman_code_block_stateless_aarch64, .-decode_huffman_code_block_stateless_aarch64 + .type rfc_lookup_table, %object + +rfc_lookup_table: +#if ENABLE_TBL_INSTRUCTION + .byte 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + .zero 8 +#endif + //dist_extra_bit_count + .byte 0x00, 0x00, 0x00, 0x00, 0x01, 0x01, 0x02, 0x02 + .byte 0x03, 0x03, 0x04, 0x04, 0x05, 0x05, 0x06, 0x06 + .byte 0x07, 0x07, 0x08, 0x08, 0x09, 0x09, 0x0a, 0x0a + .byte 0x0b, 0x0b, 0x0c, 0x0c, 0x0d, 0x0d, 0x00, 0x00 + //dist_start +#if ENABLE_TBL_INSTRUCTION + .byte 0x01,0x02,0x03,0x04,0x05,0x07,0x09,0x0d,0x11,0x19,0x21,0x31,0x41,0x61,0x81,0xc1 + .byte 0x01,0x81,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x00,0x00 + .byte 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 + .byte 0x01,0x01,0x02,0x03,0x04,0x06,0x08,0x0c,0x10,0x18,0x20,0x30,0x40,0x60,0x00,0x00 +#else + .short 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d + .short 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1 + .short 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01 + .short 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000 +#endif + .size rfc_lookup_table, . - rfc_lookup_table diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S new file mode 100644 index 000000000..254f74c61 --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S @@ -0,0 +1,261 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ + m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req + + //m_out_buf=bytes_written + sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start + cmp next_in,start_in + bls skip_has_hist + mov w_\tmp0,1 + strb w_\tmp0,[x_\stream,_internal_state_has_hist] +skip_has_hist: + ldr w_\tmp0,[\stream,_total_in] + ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out + + str x_\next_in,[\stream,_next_in] + sub x_\start_in,x_\next_in,x_\start_in + sub x_\end_in,x_\end_in,x_\next_in + add w_\tmp0,w_\tmp0,w_\start_in + stp w_\end_in,w_\tmp0,[\stream,_avail_in] + //next_in=avail_out,start_in=total_out + ldp w_\next_in,w_\start_in,[\stream,_avail_out] + add x_\m_out_start,x_\m_out_start,x_\m_out_buf + str x_\m_out_start,[\stream,_next_out] + add w_\start_in,w_\start_in,w_\m_out_buf + sub w_\next_in,w_\next_in,w_\m_out_buf + stp w_\next_in,w_\start_in,[\stream,_avail_out] +.endm + + + .global isal_deflate_body_aarch64 + .type isal_deflate_body_aarch64, %function +/* + void isal_deflate_body_aarch64(struct isal_zstream *stream) +*/ + declare_generic_reg stream, 0,x //struct isal_zstream *stream + declare_generic_reg state, 8,x //&stream->state + declare_generic_reg avail_in, 9,w + declare_generic_reg end_of_stream, 10,w //can be used in loop + + declare_generic_reg hash_mask, 11,w + declare_generic_reg match_length, 12,w + declare_generic_reg hufftables, 13,x + + declare_generic_reg m_out_buf, 14,x + declare_generic_reg m_out_start, 15,x + declare_generic_reg m_out_end, 16,x + declare_generic_reg m_bits, 17,x + declare_generic_reg m_bit_count, 18,w + + declare_generic_reg start_in, 19,x + declare_generic_reg end_in, 20,x + declare_generic_reg next_in, 21,x + declare_generic_reg loop_end_cnt, 22,x + + declare_generic_reg literal, 23,w + declare_generic_reg hash, 24,w + declare_generic_reg dist, 25,w + + declare_generic_reg last_seen, 26,x + declare_generic_reg file_start, 27,x + declare_generic_reg hist_size, 28,w + + declare_generic_reg tmp0, 5 ,w + declare_generic_reg tmp1, 6 ,w + declare_generic_reg tmp2, 7 ,w + + declare_generic_reg code, 3,x + declare_generic_reg code_len, 24,x + declare_generic_reg code2, 10,x + declare_generic_reg code_len2, 4,x + + +isal_deflate_body_aarch64: + //save registers + push_stack + ldr avail_in, [stream, _avail_in] + cbz avail_in, exit_save_state + + // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + ldr w_m_out_end,[stream,_avail_out] + ldr m_out_buf,[stream,_next_out] + add m_out_end,m_out_buf,w_m_out_end,uxtw + sub m_out_end,m_out_end , 8 + mov m_out_start,m_out_buf + stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] + str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] + ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] + ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] + + + //init variables + //last_seen=&stream.internal_state.head = _internal_state+_head + add last_seen,stream,65536 + add last_seen,last_seen,_internal_state+_head -65536 + + + //start_in=stream->next_in;next_in=start_in + ldr start_in,[stream,_next_in] + mov next_in,start_in + add end_in,start_in,avail_in,uxtw //avail_in reg is free now + sub loop_end_cnt,end_in,289 //loop end + cmp next_in,loop_end_cnt + + + //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + ldr w_file_start,[stream,_total_in] + sub file_start,next_in,w_file_start,uxtw + + //uint32_t hist_size = state->dist_mask; + ldr hist_size,[stream,_internal_state + _dist_mask] + + //uint32_t hash_mask = state->hash_mask; + ldr hash_mask,[stream,_internal_state + _hash_mask] + + ldr hufftables,[stream,_hufftables] + + bhi main_loop_end +main_loop_start: + //is_full(&state->bitbuf) + cmp m_out_buf,m_out_end + bhi update_state_exit + + ldr literal,[next_in] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + + ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] + sub x_dist,next_in,file_start + //last_seen[hash] = (uint64_t) (next_in - file_start); + strh dist,[last_seen,x_hash,lsl 1] + sub dist,dist,w_tmp0 + and dist,dist,0xffff + + sub w_tmp0,dist,1 + cmp hist_size,w_tmp0 + bls get_lit_code + + ///match_length = compare258(next_in - dist, next_in, 258); + sub x_tmp2,next_in,x_dist + compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1 + cmp match_length,3 + bls get_lit_code + + sub x_tmp0,next_in,file_start + ldr literal,[next_in,1] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + add tmp0,tmp0,1 + strh tmp0,[last_seen,x_hash,lsl 1] + //call_print_b hash,dist,last_seen + + ldr literal,[next_in,2] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + add tmp0,tmp0,1 + strh tmp0,[last_seen,x_hash,lsl 1] + + //get_len_code(stream->hufftables, match_length, &code, + // &code_len); + get_len_code hufftables,match_length,code,code_len,tmp0 + + //get_dist_code(stream->hufftables, dist, &code2, &code_len2); + get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 + + //code |= code2 << code_len; + //code_len += code_len2; + lsl code2,code2,code_len + orr code,code,code2 + add code_len,code_len,code_len2 + + //next_in += match_length; + add next_in,next_in,match_length,uxtw + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + + + + cmp next_in,loop_end_cnt + bls main_loop_start + b main_loop_end +get_lit_code: + //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls main_loop_start + +main_loop_end: + //update state here + + //load end_of_stream and flush together + ldr w_end_of_stream, [stream, _end_of_stream] + //(stream->end_of_stream || stream->flush != 0) + cbz w_end_of_stream, update_state_exit + mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER + str w_tmp0, [stream, _internal_state+_state] +update_state_exit: + update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 +exit_ret: + pop_stack + ret +exit_save_state: + ldr w_end_of_stream, [stream, _end_of_stream] + cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0) + mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER + str w_tmp0, [stream, _internal_state+_state] + b exit_ret + .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64 diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S new file mode 100644 index 000000000..e5842b5bc --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S @@ -0,0 +1,264 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ + m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req + + //m_out_buf=bytes_written + sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start + cmp next_in,start_in + bls skip_has_hist + mov w_\tmp0,1 + strb w_\tmp0,[x_\stream,_internal_state_has_hist] +skip_has_hist: + ldr w_\tmp0,[\stream,_total_in] + ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out + + str x_\next_in,[\stream,_next_in] + sub x_\start_in,x_\next_in,x_\start_in + sub x_\end_in,x_\end_in,x_\next_in + add w_\tmp0,w_\tmp0,w_\start_in + stp w_\end_in,w_\tmp0,[\stream,_avail_in] + //next_in=avail_out,start_in=total_out + ldp w_\next_in,w_\start_in,[\stream,_avail_out] + add x_\m_out_start,x_\m_out_start,x_\m_out_buf + str x_\m_out_start,[\stream,_next_out] + add w_\start_in,w_\start_in,w_\m_out_buf + sub w_\next_in,w_\next_in,w_\m_out_buf + stp w_\next_in,w_\start_in,[\stream,_avail_out] +.endm + .global isal_deflate_finish_aarch64 + .arch armv8-a+crc + .type isal_deflate_finish_aarch64, %function +/* + void isal_deflate_finish_aarch64(struct isal_zstream *stream) +*/ + declare_generic_reg stream, 0,x //struct isal_zstream *stream + declare_generic_reg state, 8,x //&stream->state + declare_generic_reg avail_in, 9,w + declare_generic_reg end_of_stream, 10,w //can be used in loop + + declare_generic_reg hash_mask, 11,w + declare_generic_reg match_length, 12,w + declare_generic_reg hufftables, 13,x + + declare_generic_reg m_out_buf, 14,x + declare_generic_reg m_out_start, 15,x + declare_generic_reg m_out_end, 16,x + declare_generic_reg m_bits, 17,x + declare_generic_reg m_bit_count, 18,w + + declare_generic_reg start_in, 19,x + declare_generic_reg end_in, 20,x + declare_generic_reg next_in, 21,x + declare_generic_reg loop_end_cnt, 22,x + + declare_generic_reg literal, 23,w + declare_generic_reg hash, 24,w + declare_generic_reg dist, 25,w + + declare_generic_reg last_seen, 26,x + declare_generic_reg file_start, 27,x + declare_generic_reg hist_size, 28,w + + declare_generic_reg tmp0, 5 ,w + declare_generic_reg tmp1, 6 ,w + declare_generic_reg tmp2, 7 ,w + + declare_generic_reg code, 3,x + declare_generic_reg code_len, 24,x + declare_generic_reg code2, 10,x + declare_generic_reg code_len2, 4,x + + +isal_deflate_finish_aarch64: + //save registers + push_stack + + // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + ldr w_m_out_end,[stream,_avail_out] + ldr m_out_buf,[stream,_next_out] + add m_out_end,m_out_buf,w_m_out_end,uxtw + sub m_out_end,m_out_end , 8 + mov m_out_start,m_out_buf + stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] + str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] + ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] + ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] + + //init variables + //last_seen=&stream.internal_state.head = _internal_state+_head + add last_seen,stream,65536 + add last_seen,last_seen,_internal_state+_head -65536 + + + //start_in=stream->next_in;next_in=start_in + ldr avail_in, [stream, _avail_in] + ldr start_in,[stream,_next_in] + mov next_in,start_in + add end_in,start_in,avail_in,uxtw //avail_in reg is free now + ldr hufftables,[stream,_hufftables] + cbz avail_in, update_not_full + + + sub loop_end_cnt,end_in,4 //loop end + cmp next_in,loop_end_cnt + + + //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + ldr w_file_start,[stream,_total_in] + sub file_start, next_in, w_file_start, uxtw + + //uint32_t hist_size = state->dist_mask; + ldr hist_size,[stream,_internal_state + _dist_mask] + + //uint32_t hash_mask = state->hash_mask; + ldr hash_mask,[stream,_internal_state + _hash_mask] + + bhi main_loop_end +main_loop_start: + //is_full(&state->bitbuf) + cmp m_out_buf,m_out_end + bhi update_state_exit + + ldr literal,[next_in] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + + ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] + sub x_dist,next_in,file_start + //last_seen[hash] = (uint64_t) (next_in - file_start); + strh dist,[last_seen,x_hash,lsl 1] + sub dist,dist,w_tmp0 + and dist,dist,0xffff + + sub w_tmp0,dist,1 + cmp hist_size,w_tmp0 + bls get_lit_code + + /// match_length = compare258(next_in - dist, next_in, 258); + sub x_tmp2,next_in,x_dist + sub x_hash,end_in,next_in + compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1 + cmp match_length,3 + bls get_lit_code + + get_len_code hufftables,match_length,code,code_len,tmp0 + get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 + + //code |= code2 << code_len; + //code_len += code_len2; + lsl code2,code2,code_len + orr code,code,code2 + add code_len,code_len,code_len2 + + //next_in += match_length; + add next_in,next_in,match_length,uxtw + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + + cmp next_in,loop_end_cnt + bls main_loop_start + b main_loop_end +get_lit_code: + //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls main_loop_start +main_loop_end: + sub loop_end_cnt,end_in,1 + cmp next_in,loop_end_cnt + bhi update_not_full +second_loop_start: + cmp m_out_buf,m_out_end + bhi update_state_exit + ldr literal,[next_in] + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls second_loop_start + +update_not_full: + cmp m_out_buf,m_out_end + bhi update_state_exit + + mov literal,256 + get_lit_code hufftables,literal,code,code_len + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + ldrh w_end_of_stream, [stream, _end_of_stream] + mov w_tmp0,1 + strb w_tmp0,[stream,_internal_state_has_eob] + cmp w_end_of_stream,w_tmp0 + mov w_tmp0, ZSTATE_TRL + mov w_tmp1, ZSTATE_SYNC_FLUSH + csel w_tmp0,w_tmp0,w_tmp1,eq + str w_tmp0, [stream, _internal_state+_state] + +update_state_exit: + update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 + pop_stack + ret + + .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 diff --git a/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S b/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S new file mode 100644 index 000000000..40251dab4 --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_deflate_hash_aarch64.S @@ -0,0 +1,95 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + + + .global isal_deflate_hash_aarch64 + .type isal_deflate_hash_aarch64, %function +/* + void isal_deflate_hash_aarch64(uint16_t * hash_table, uint32_t hash_mask, + uint32_t current_index, uint8_t * dict, uint32_t dict_len) +*/ + declare_generic_reg hash_table, 0,x + declare_generic_reg hash_mask, 1,w + declare_generic_reg current_index, 2,w + declare_generic_reg dict, 3,x + declare_generic_reg dict_len, 4,w + + declare_generic_reg next_in 3,x + declare_generic_reg end_in 6,x + declare_generic_reg ind 5,w + declare_generic_reg hash 2,w + declare_generic_reg literal 2,w +#define SHORTEST_MATCH #4 + +isal_deflate_hash_aarch64: + sub ind, current_index, dict_len + and ind,ind,0xffff + + + uxtw x_dict_len, dict_len + sub x_dict_len, x_dict_len, SHORTEST_MATCH + add end_in, dict, x_dict_len + + + + cmp next_in, end_in + bcs exit_func + + mov w7, 0 +loop_start: + ldr literal, [next_in] + add next_in, next_in, 1 + cmp next_in, end_in + crc32cw hash, w7, literal + and hash, hash, hash_mask + strh ind, [hash_table, x_hash, lsl 1] + add ind,ind,1 + bne loop_start +exit_func: + + ret + .size isal_deflate_hash_aarch64, .-isal_deflate_hash_aarch64 diff --git a/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S b/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S new file mode 100644 index 000000000..4f2fe22aa --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S @@ -0,0 +1,32 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + +mbin_interface decode_huffman_code_block_stateless diff --git a/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S b/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S new file mode 100644 index 000000000..78d23940d --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S @@ -0,0 +1,178 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 3 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg + \name\()_d .req d\reg +.endm + +.macro mod_adler dest:req,tmp:req + umull \tmp\()_x,\dest,const_div1 + lsr \tmp\()_x,\tmp\()_x,47 + msub \dest,\tmp,const_div2,\dest +.endm + +/* + uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length); +*/ +/* +Arguements list +*/ + adler32 .req w0 + start .req x1 + length .req x2 + .global adler32_neon + .type adler32_neon, %function +adler32_neon: +/* +local variables +*/ + declare_var_vector_reg factor0 , 6 + declare_var_vector_reg factor1 , 7 + declare_var_vector_reg d0 , 4 + declare_var_vector_reg d1 , 5 + declare_var_vector_reg adacc , 2 + declare_var_vector_reg s2acc , 3 + declare_var_vector_reg zero , 16 + declare_var_vector_reg adler , 17 + declare_var_vector_reg back_d0 , 18 + declare_var_vector_reg back_d1 , 19 + declare_var_vector_reg sum2 , 20 + declare_var_vector_reg tmp2 , 20 + + adler0 .req w4 + adler1 .req w5 + adler0_x .req x4 + adler1_x .req x5 + end .req x0 + tmp .req w8 + tmp_x .req x8 + tmp1_x .req x9 + loop_cnt .req x10 + loop_const .req x11 + const_div1 .req w6 + const_div2 .req w7 + mov const_div1, 32881 + movk const_div1, 0x8007, lsl 16 + mov const_div2, 65521 + and adler0, adler32, 0xffff + lsr adler1, adler32, 16 + + lsr loop_cnt,length,5 + adrp x3,factors + add x3,x3,:lo12:factors + ld1 {factor0_v.16b-factor1_v.16b},[x3] + + add end,start,length + cbz loop_cnt,final_accum32 + ld1 {back_d0_v.16b-back_d1_v.16b},[start] + mov loop_const,173 + + movi v16.4s,0 + + + + +great_than_32: + cmp loop_cnt,173 + csel loop_const,loop_cnt,loop_const,le + mov adacc_v.16b,zero_v.16b + mov s2acc_v.16b,zero_v.16b + ins adacc_v.s[0],adler0 + ins s2acc_v.s[0],adler1 + add tmp_x,start,loop_const,lsl 5 + +accum32_neon: + add start,start,32 + mov d0_v.16b,back_d0_v.16b + mov d1_v.16b,back_d1_v.16b + ld1 {back_d0_v.16b-back_d1_v.16b},[start] + + shl tmp2_v.4s,adacc_v.4s,5 + add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s + + uaddlp adler_v.8h,d0_v.16b + uadalp adler_v.8h,d1_v.16b + uadalp adacc_v.4s,adler_v.8h + + umull sum2_v.8h,factor0_v.8b ,d0_v.8b + umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b + umlal sum2_v.8h,factor1_v.8b ,d1_v.8b + umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b + uadalp s2acc_v.4s,sum2_v.8h + + cmp start,tmp_x + bne accum32_neon + + uaddlv adacc_d,adacc_v.4s + uaddlv s2acc_d,s2acc_v.4s + fmov adler0_x,adacc_d + fmov adler1_x,s2acc_d + + mod_adler adler0,tmp + mod_adler adler1,tmp + sub loop_cnt,loop_cnt,loop_const + cbnz loop_cnt,great_than_32 + +final_accum32: + and length,length,31 + cbz length,end_func + +accum32_body: + cmp start,end + beq end_func + ldrb tmp,[start],1 + add adler0,adler0,tmp + add adler1,adler1,adler0 + b accum32_body + +end_func: + mod_adler adler0,tmp + mod_adler adler1,tmp + orr w0,adler0,adler1,lsl 16 + ret + + .size adler32_neon, .-adler32_neon + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +factors: + .quad 0x191a1b1c1d1e1f20 + .quad 0x1112131415161718 + .quad 0x090a0b0c0d0e0f10 + .quad 0x0102030405060708 + diff --git a/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c b/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c new file mode 100644 index 000000000..183010c22 --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c @@ -0,0 +1,188 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <aarch64_multibinary.h> + +DEFINE_INTERFACE_DISPATCHER(isal_adler32) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(adler32_neon); + + return PROVIDER_BASIC(adler32); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_body) +{ + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_body_aarch64); + + return PROVIDER_BASIC(isal_deflate_body); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_finish_aarch64); + + return PROVIDER_BASIC(isal_deflate_finish); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); + + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map); +} + +DEFINE_INTERFACE_DISPATCHER(set_long_icf_fg) +{ + return PROVIDER_INFO(set_long_icf_fg_aarch64); +} + +DEFINE_INTERFACE_DISPATCHER(encode_deflate_icf) +{ + return PROVIDER_INFO(encode_deflate_icf_aarch64); +} + +DEFINE_INTERFACE_DISPATCHER(isal_update_histogram) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_update_histogram_aarch64); + + return PROVIDER_BASIC(isal_update_histogram); +} + +DEFINE_INTERFACE_DISPATCHER(gen_icf_map_lh1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) { + return PROVIDER_INFO(gen_icf_map_h1_aarch64); + } + + return PROVIDER_BASIC(gen_icf_map_h1); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl0) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_hash_aarch64); + + return PROVIDER_BASIC(isal_deflate_hash); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_hash_aarch64); + + return PROVIDER_BASIC(isal_deflate_hash); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_hash_aarch64); + + return PROVIDER_BASIC(isal_deflate_hash); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_hash_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_hash_aarch64); + + return PROVIDER_BASIC(isal_deflate_hash); +} + +DEFINE_INTERFACE_DISPATCHER(decode_huffman_code_block_stateless) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(decode_huffman_code_block_stateless_aarch64); + + return PROVIDER_BASIC(decode_huffman_code_block_stateless); +} diff --git a/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S b/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S new file mode 100644 index 000000000..57d5230a5 --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_multibinary_arm64.S @@ -0,0 +1,50 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + + +mbin_interface isal_deflate_icf_body_lvl1 +mbin_interface isal_deflate_icf_body_lvl2 +mbin_interface isal_deflate_icf_body_lvl3 +mbin_interface isal_deflate_icf_finish_lvl1 +mbin_interface isal_deflate_icf_finish_lvl2 +mbin_interface isal_deflate_icf_finish_lvl3 +mbin_interface isal_update_histogram +mbin_interface encode_deflate_icf +mbin_interface set_long_icf_fg +mbin_interface gen_icf_map_lh1 +mbin_interface isal_deflate_hash_lvl0 +mbin_interface isal_deflate_hash_lvl1 +mbin_interface isal_deflate_hash_lvl2 +mbin_interface isal_deflate_hash_lvl3 + +mbin_interface isal_deflate_body +mbin_interface isal_deflate_finish +mbin_interface isal_adler32 diff --git a/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S new file mode 100644 index 000000000..13f9b087d --- /dev/null +++ b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S @@ -0,0 +1,194 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + .text + .align 2 + .global set_long_icf_fg_aarch64 + .type set_long_icf_fg_aarch64, %function + +/* +void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size, + struct deflate_icf *match_lookup) +*/ + + /* arguments */ + declare_generic_reg next_in_param, 0,x + declare_generic_reg processed_param, 1,x + declare_generic_reg input_size_param, 2,x + declare_generic_reg match_lookup_param, 3,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local variable */ + declare_generic_reg len, 7,w + declare_generic_reg dist_code, 8,w + declare_generic_reg shortest_match_len 9,w + declare_generic_reg len_max, 10,w + declare_generic_reg dist_extra, 11,w + declare_generic_reg const_8, 13,x + declare_generic_reg next_in, 20,x + declare_generic_reg dist_start, 21,x + declare_generic_reg end_processed, 22,x + declare_generic_reg end_in, 23,x + declare_generic_reg match_lookup, 19,x + + declare_generic_reg match_length, 4,w + declare_generic_reg tmp0, 5,w + declare_generic_reg tmp1, 6,w + +/* constant */ +.equ DIST_START_SIZE, 128 +.equ ISAL_LOOK_AHEAD, 288 +.equ LEN_OFFSET, 254 +.equ SHORTEST_MATCH, 4 +.equ LEN_MAX_CONST, 512 + +set_long_icf_fg_aarch64: + stp x29, x30, [sp, -192]! + add x29, sp, 0 + stp x21, x22, [sp, 32] + add x21, x29, 64 + stp x19, x20, [sp, 16] + str x23, [sp, 48] + + add end_processed, next_in_param, processed_param + mov next_in, next_in_param + add end_in, next_in_param, input_size_param + mov match_lookup, match_lookup_param + + adrp x1, .data_dist_start + mov x2, DIST_START_SIZE // 128 + add x1, x1, :lo12:.data_dist_start + mov x0, dist_start + bl memcpy + + add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288 + cmp end_in, x_tmp0 + csel end_in, end_in, x_tmp0, cc + cmp next_in, end_processed + bcs .done + + mov const_8, 8 + mov len_max, LEN_MAX_CONST // 512 + mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1) + b .while_outer_loop + + .align 2 +.while_outer_check: + add next_in, next_in, 1 + add match_lookup, match_lookup, 4 + cmp end_processed, next_in + bls .done + +.while_outer_loop: + ldrh len, [match_lookup] + and len, len, LIT_LEN_MASK // 1023 + cmp len, (LEN_OFFSET + 8 - 1) // 261 + bls .while_outer_check + + ldr dist_code, [match_lookup] + add x1, next_in, 8 + ldrh dist_extra, [match_lookup, 2] + sub w2, w_end_in, w1 + ubfx x_dist_code, x_dist_code, 10, 9 + ubfx x_dist_extra, x_dist_extra, 3, 13 + uxtw x0, dist_code + ldr w0, [dist_start, x0, lsl 2] + add w0, dist_extra, w0 + sub x0, const_8, x0 + add x0, next_in, x0 + + compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1 + mov w0, w_match_length + + add w0, w0, (LEN_OFFSET + 8) // 262 + cmp w0, len + bls .while_outer_check + + lsl w2, dist_extra, 19 + orr w2, w2, dist_code, lsl 10 + + .align 3 +.while_inner_loop: + cmp w0, LEN_MAX_CONST // 512 + add next_in, next_in, 1 + csel w1, w0, len_max, ls + sub w0, w0, #1 + orr w1, w1, w2 + str w1, [match_lookup] + ldrh w1, [match_lookup, 4]! + + and w1, w1, LIT_LEN_MASK // 1023 + cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257 + csel w1, w1, shortest_match_len, cs + cmp w1, w0 + bcc .while_inner_loop + + add next_in, next_in, 1 + add match_lookup, match_lookup, 4 + cmp end_processed, next_in + bhi .while_outer_loop + +.done: + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldr x23, [sp, 48] + ldp x29, x30, [sp], 192 + ret + .size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64 + + .section .rodata + .align 3 + .set .data_dist_start,. + 0 +.real_data_dist_start: + .word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d + .word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1 + .word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01 + .word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000 diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S new file mode 100644 index 000000000..3daaa1ba3 --- /dev/null +++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S @@ -0,0 +1,364 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + .global isal_deflate_icf_body_hash_hist_aarch64 + .type isal_deflate_icf_body_hash_hist_aarch64, %function +/* +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); +*/ + +/* constant */ + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_encode_tables, 0 +.equ offset_hist, 2176 +.equ offset_hist_d_hist, 2176 +.equ offset_hist_ll_hist, 2296 +.equ offset_deflate_hdr_count, 4348 +.equ offset_deflate_hdr_extra_bits, 4352 +.equ offset_deflate_hdr, 4356 +.equ offset_icf_buf_next, 4688 +.equ offset_icf_buf_avail_out, 4696 +.equ offset_icf_buf_start, 4704 +.equ offset_hash8k, 4712 +.equ offset_hash_hist, 4712 + +/* offset of struct isal_zstate */ +.equ offset_dist_mask, 12 +.equ offset_hash_mask, 16 + +/* macros*/ +.equ ISAL_LOOK_AHEAD, 288 + + /* arguments */ + declare_generic_reg stream, 0,x + declare_generic_reg stream_saved, 11,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local varibale */ + declare_generic_reg level_buf, 18,x + declare_generic_reg avail_in, 13,w + declare_generic_reg end_in, 13,x + declare_generic_reg start_in, 19,x + declare_generic_reg next_in, 9,x + declare_generic_reg next_in_iter, 14,x + declare_generic_reg state, 24,x + declare_generic_reg hist_size, 22,w + declare_generic_reg hash_mask, 21,w + declare_generic_reg start_out, 12,x + declare_generic_reg end_out, 12,x + declare_generic_reg next_out, 8,x + declare_generic_reg file_start, 20,x + declare_generic_reg last_seen, 15,x + declare_generic_reg total_in, 25,x + declare_generic_reg NULL_DIST_SYM, 23,w + declare_generic_reg match_length, 3,x + declare_generic_reg dist, 7,x + declare_generic_reg dist_inc, 26,w // dist - 1 + declare_generic_reg literal, 10,x + + declare_generic_reg tmp0, 4,x + declare_generic_reg tmp1, 5,x + +isal_deflate_icf_body_hash_hist_aarch64: + stp x29, x30, [sp, -80]! + add x29, sp, 0 + str x24, [sp, 56] + + ldr avail_in, [stream, offset_avail_in] + cbnz avail_in, .stream_available + + ldr w1, [stream, offset_end_of_stream] // w1 keeps two values of end_of_stream and flush + cbz w1, .done + + add state, stream, offset_state + b .state_flush_read_buffer + + .align 2 +.stream_available: + stp x19, x20, [x29, 16] + stp x21, x22, [x29, 32] + str x23, [x29, 48] + stp x25, x26, [x29, 64] + + ldr level_buf, [stream, offset_level_buf] + add state, stream, offset_state // 64 + mov stream_saved, stream + ldr start_in, [stream, offset_next_in] // 0 + ldr w_total_in, [stream, offset_total_in] + + mov x0, offset_hash_hist + add last_seen, level_buf, x0 + + ldr x0, [level_buf, offset_icf_buf_avail_out] // 4696 + ldr start_out, [level_buf, offset_icf_buf_next] // 4688 + + mov next_in, start_in + and x0, x0, -4 + ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 + add end_in, start_in, avail_in, uxtw + mov next_out, start_out + add end_out, start_out, x0 + + add x0, next_in, ISAL_LOOK_AHEAD // 288 + sub file_start, start_in, w_total_in, uxtw + mov NULL_DIST_SYM, 30 + add next_in_iter, next_in, 1 + cmp end_in, x0 + bls .while_loop_end + + .align 3 +.while_loop: + cmp next_out, end_out + bcs .state_create_hdr + + ldr w_literal, [next_in] + mov w0, w_literal + crc32cw w0, wzr, w0 + + and w0, w0, hash_mask + sub x1, next_in, file_start + lsl x0, x0, 1 + + ldrh w_dist, [last_seen, x0] + strh w1, [last_seen, x0] + sub w1, w1, w_dist + and w_dist, w1, 65535 + + sub dist_inc, w_dist, #1 + cmp dist_inc, hist_size + bcc .dist_vs_hist_size + +.while_latter_part: + and w_literal, w_literal, 255 + mov next_in, next_in_iter + add next_out, next_out, 4 + add x1, level_buf, w_literal, uxtb 2 + ldr w0, [x1, 2296] + add w0, w0, 1 + str w0, [x1, 2296] + ldrh w0, [next_out, -4] + bfi w0, w_literal, 0, 10 + strh w0, [next_out, -4] + ldr w0, [next_out, -4] + bfi w0, NULL_DIST_SYM, 10, 9 + str w0, [next_out, -4] + ubfx x0, x0, 16, 3 + strh w0, [next_out, -2] + +.while_loop_check: + add x0, next_in, ISAL_LOOK_AHEAD // 288 + add next_in_iter, next_in, 1 + cmp end_in, x0 + bhi .while_loop + b .while_loop_end + + .align 2 +.dist_vs_hist_size: + mov x1, next_in + mov w2, 258 + sub x0, next_in, w_dist, uxth + compare_258_bytes param0,param1,match_length,tmp0,tmp1 + + and w1, w_match_length, 65535 // 0xffff + cmp w1, 3 + bls .while_latter_part + + ldr w0, [next_in, 1] + mov x4, next_in + add next_in, next_in, w1, uxth + crc32cw w0, wzr, w0 + + and w0, hash_mask, w0 + sub next_in_iter, next_in_iter, file_start + strh w_next_in_iter, [last_seen, x0, lsl 1] + ldr w0, [x4, 2]! + crc32cw w0, wzr, w0 + + and w0, hash_mask, w0 + and w_match_length, w_match_length, 65535 // 0xffff + sub x4, x4, file_start + + // get_len_icf_code + add w_match_length, w_match_length, 254 + // get_dist_icf_code, first part + mov w1, 0 // w1 => dist_extra + strh w4, [last_seen, x0, lsl 1] + cmp w_dist, 2 + ubfiz x0, match_length, 2, 17 + add x0, level_buf, x0 + bhi .compute_dist_icf_code + +.match_length_end: + // handle level_buf->hist + ldr w2, [x0, offset_hist_ll_hist] // 2296, ll_hist + add x4, level_buf, dist_inc, uxtw 2 // d_hist + add next_out, next_out, 4 + add w2, w2, 1 // ll_hist + str w2, [x0, offset_hist_ll_hist] // 2296, ll_hist + ldr w0, [x4, offset_hist_d_hist] // 2176, d_hist + add w0, w0, 1 // d_hist + str w0, [x4, offset_hist_d_hist] // 2176, d_hist + + // write_deflate_icf + ldrh w0, [next_out, -4] + bfi w0, w3, 0, 10 + strh w0, [next_out, -4] + ldr w0, [next_out, -4] + bfi w0, dist_inc, 10, 9 + str w0, [next_out, -4] + lsr w0, w0, 16 + bfi w0, w1, 3, 13 // w1 => dist_extra + strh w0, [next_out, -2] + b .while_loop_check + + .align 2 +// get_dist_icf_code, 2nd part +.compute_dist_icf_code: + clz w1, dist_inc + mov w2, 30 + sub w2, w2, w1 + mov w1, 1 + lsl w1, w1, w2 + sub w1, w1, #1 + and w1, w1, dist_inc + lsr dist_inc, dist_inc, w2 + add dist_inc, dist_inc, w2, lsl 1 + and w1, w1, 8191 + b .match_length_end + +.while_loop_end: + sub x19, next_in, x19 + cmp x19, 0 + ble .skip_igzip_hist2 + + mov w0, 1 + strb w0, [stream_saved, offset_state_has_hist] // 135 + +.skip_igzip_hist2: + add w19, w_total_in, w19 + ldr w0, [stream_saved, offset_end_of_stream] // 56 + sub x12, end_out, next_out + asr x12, x12, 2 // x12 => end_out - next_out + str next_in, [stream_saved] + str w19, [stream_saved, offset_total_in] // 12 + sub next_in, end_in, next_in + str w19, [stream_saved, offset_state_block_end] // 72 + + ldp x25, x26, [x29, 64] + ldr x23, [x29, 48] + ldp x21, x22, [x29, 32] + ldp x19, x20, [x29, 16] + + str w9, [stream_saved, offset_avail_in] // 8 + str next_out, [level_buf, offset_icf_buf_next] // 4688 + str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out + cbnz w0, .state_flush_read_buffer + b .done + + .align 2 +.state_create_hdr: + mov w0, 2 + str w0, [x24, 20] + sub start_in, next_in, start_in + cmp start_in, 0 + ble .skip_igzip_hist + + mov w0, 1 + strb w0, [stream_saved, offset_state_has_hist] // 135 + +.skip_igzip_hist: + add w_total_in, w_total_in, w19 + sub x12, end_out, next_out + asr x12, x12, 2 // x12 => end_out - next_out + str next_in, [stream_saved] + sub next_in, end_in, next_in + str w_total_in, [stream_saved, offset_total_in] // 12 + str w_total_in, [stream_saved, offset_state_block_end] // 72 + + ldp x25, x26, [x29, 64] + ldr x23, [x29, 48] + ldp x21, x22, [x29, 32] + ldp x19, x20, [x29, 16] + + str w9, [stream_saved, offset_avail_in] // 8 + str next_out, [level_buf, offset_icf_buf_next] // 4688 + str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out + b .done + +.state_flush_read_buffer: + mov w0, 4 + str w0, [x24, 20] + +.done: + ldr x24, [sp, 56] + ldp x29, x30, [sp], 80 + ret + + .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64 diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S new file mode 100644 index 000000000..bb2baa22f --- /dev/null +++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S @@ -0,0 +1,397 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +/* +void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream); +*/ + +/* constant */ + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_state, 84 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_encode_tables, 0 +.equ offset_hist, 2176 +.equ offset_hist_d_hist, 2176 +.equ offset_hist_ll_hist, 2296 +.equ offset_deflate_hdr_count, 4348 +.equ offset_deflate_hdr_extra_bits, 4352 +.equ offset_deflate_hdr, 4356 +.equ offset_icf_buf_next, 4688 +.equ offset_icf_buf_avail_out, 4696 +.equ offset_icf_buf_start, 4704 +.equ offset_hash8k, 4712 +.equ offset_hash_hist, 4712 + +/* offset of struct isal_zstate */ +.equ offset_dist_mask, 12 +.equ offset_hash_mask, 16 +.equ offset_state_of_zstate, 20 + +/* macros*/ +.equ ISAL_LOOK_AHEAD, 288 + + /* arguments */ + declare_generic_reg stream, 0,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + declare_generic_reg param3, 3,x + declare_generic_reg param4, 4,x + declare_generic_reg param5, 5,x + declare_generic_reg param6, 6,x + + /* local variable */ + declare_generic_reg stream_saved, 15,x + declare_generic_reg level_buf, 13,x + declare_generic_reg start_in, 21,x + declare_generic_reg start_out, 22,x + declare_generic_reg state, 23,x + declare_generic_reg end_out, 12,x + declare_generic_reg end_in, 11,x + declare_generic_reg next_in, 8,x + declare_generic_reg next_out, 10,x + declare_generic_reg next_out_iter, 5,x + declare_generic_reg file_start, 18,x + declare_generic_reg last_seen, 14,x + + declare_generic_reg literal_code, 9,w + declare_generic_reg hash_mask, 19,w + declare_generic_reg hist_size, 20,w + declare_generic_reg dist, 7,w + declare_generic_reg dist_inc, 24,w + + declare_generic_reg tmp0, 25,x + declare_generic_reg tmp1, 26,x + declare_generic_reg tmp2, 27,x + declare_generic_reg tmp3, 28,x + + .align 2 + .type write_deflate_icf_constprop, %function +write_deflate_icf_constprop: + ldrh w2, [x0] + mov w3, 30 + bfi w2, w1, 0, 10 + strh w2, [x0] + ldr w1, [x0] + bfi w1, w3, 10, 9 + str w1, [x0] + ubfx x1, x1, 16, 3 + strh w1, [x0, 2] + ret + .size write_deflate_icf_constprop, .-write_deflate_icf_constprop + + .align 2 + .type write_deflate_icf, %function +write_deflate_icf: + ldrh w4, [x0] + bfi w4, w1, 0, 10 + strh w4, [x0] + ldr w1, [x0] + bfi w1, w2, 10, 9 + str w1, [x0] + lsr w1, w1, 16 + bfi w1, w3, 3, 13 + strh w1, [x0, 2] + ret + .size write_deflate_icf, .-write_deflate_icf + + .align 2 + .type update_state, %function +update_state: + sub x7, x2, x1 + ldr x4, [x0, 48] + cmp x7, 0 + ble .L48 + mov w1, 1 + strb w1, [x0, 135] +.L48: + ldr w1, [x0, 12] + sub x6, x6, x5 + str x2, [x0] + sub x3, x3, x2 + add w1, w1, w7 + stp w3, w1, [x0, 8] + str w1, [x0, 72] + asr x6, x6, 2 + str x5, [x4, 4688] + str x6, [x4, 4696] + ret + .size update_state, .-update_state + + .align 2 + .global isal_deflate_icf_finish_hash_hist_aarch64 + .type isal_deflate_icf_finish_hash_hist_aarch64, %function +isal_deflate_icf_finish_hash_hist_aarch64: + ldr w_end_in, [stream, 8] // stream->avail_in + cbz w_end_in, .stream_not_available + + stp x29, x30, [sp, -96]! + add x29, sp, 0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + + mov stream_saved, stream + ldr level_buf, [stream, offset_level_buf] // 48 + ldr start_in, [stream, offset_next_in] // 0 + ldr start_out, [level_buf, offset_icf_buf_next] // 4688 + add state, stream, offset_state // 64 + ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696 + mov next_in, start_in + ldr w_file_start, [stream, offset_total_in] // 12 + mov tmp0, offset_hash_hist // 4712 + add last_seen, level_buf, tmp0 + add end_in, start_in, w_end_in, uxtw + and end_out, end_out, -4 + mov next_out, start_out + ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 + sub file_start, start_in, file_start + add end_out, start_out, end_out + mov next_out_iter, next_out + + add x0, next_in, 3 + cmp end_in, x0 // x0 <= next_in + 3 + bls .while_first_end + + .p2align 3 +.while_first: + cmp next_out, end_out + bcs .save_and_update_state + ldr literal_code, [next_in] + mov w0, literal_code + crc32cw w0, wzr, w0 + and w0, w0, hash_mask + sub x2, next_in, file_start + lsl x0, x0, 1 + ldrh dist, [last_seen, x0] + strh w2, [last_seen, x0] + sub w2, w2, dist + and w_dist, w2, 65535 + sub dist_inc, dist, #1 + cmp dist_inc, hist_size + bcs .skip_compare258 + + mov x2, 0 + sub w2, w_end_in, w8 + mov x1, next_in + sub x0, next_in, w_dist, uxth + + compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1 + mov w0, w_tmp2 + and w2, w0, 65535 + + cmp w2, 3 + bhi .while_first_match_length + +.skip_compare258: + and literal_code, literal_code, 255 // get_lit_icf_code + add next_in, next_in, 1 + mov w1, literal_code + mov x0, next_out + add x_literal_code, level_buf, literal_code, uxtb 2 // level_buf->hist.ll_hist + + ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 + + bl write_deflate_icf_constprop // write_deflate_icf + + add next_out, next_out, 4 +.while_first_check: + add x0, next_in, 3 + mov next_out_iter, next_out + cmp end_in, x0 + bhi .while_first + +.while_first_end: + cmp next_in, end_in + bcs .while_2nd_end + + cmp next_out, end_out + bcc .while_2nd_handle + b .save_and_update_state_2nd + + .p2align 2 +.while_2nd: + cmp end_out, next_out_iter + bls .save_and_update_state_2nd + +.while_2nd_handle: + ldrb w2, [next_in], 1 + mov x0, next_out_iter + add next_out_iter, next_out_iter, 4 + mov w1, w2 + add x2, level_buf, w2, uxtb 2 + + ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x2, offset_hist_ll_hist] // 2296 + + bl write_deflate_icf_constprop + cmp end_in, next_in + bne .while_2nd + + mov next_in, end_in + b .end_of_stream_check_and_exit + + .p2align 2 +.while_first_match_length: + and w0, w0, 65535 + mov w3, 0 + add w1, w0, 254 // get_len_icf_code + cmp dist, 2 + bhi .compute_dist_icf_code + +.while_first_match_length_end: + ubfiz x_tmp2, x1, 2, 17 + add x_tmp1, level_buf, dist_inc, uxtw 2 + add x_tmp2, level_buf, x_tmp2 + + add next_in, next_in, w2, uxth + mov w2, dist_inc + + ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 + + mov x0, next_out + ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 + + bl write_deflate_icf + add next_out, next_out, 4 + b .while_first_check + +// compute_dist_icf_code + .p2align 2 +.compute_dist_icf_code: + clz w3, dist_inc + mov w0, 30 + sub w0, w0, w3 + + mov w3, 1 + lsl w3, w3, w0 + sub w3, w3, #1 + and w3, w3, dist_inc + lsl w4, w0, 1 + lsr dist_inc, dist_inc, w0 + add dist_inc, dist_inc, w4 + b .while_first_match_length_end + +.while_2nd_end: + beq .end_of_stream_check_and_exit + mov param6, end_out + b .update_state + +.end_of_stream_check_and_exit: + ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56 + cbz w_tmp0, .update_state_2nd + b .save_and_update_state_2nd + + .p2align 3 +.save_and_update_state_2nd: + mov w_tmp0, 2 + str w_tmp0, [state, offset_state_of_zstate] // 20 +.update_state_2nd: + mov param6, end_out + b .update_state + + .p2align 2 +.save_and_update_state: + mov param6, end_out + mov param5, next_out + mov w_tmp0, 2 + str w_tmp0, [state, offset_state_of_zstate] // 20 +.update_state: + mov param4, start_out + mov param1, start_in + mov param3, end_in + mov param2, next_in + mov param0, stream_saved + + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp x29, x30, [sp], 96 + + b update_state + + .p2align 2 +.stream_not_available: + ldr w1, [stream, offset_end_of_stream] // 56 + cbz w1, .done + + mov w1, 2 + str w1, [stream, offset_state_state] // 84 +.done: + ret + + .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 diff --git a/src/isa-l/igzip/aarch64/isal_update_histogram.S b/src/isa-l/igzip/aarch64/isal_update_histogram.S new file mode 100644 index 000000000..abcec0f14 --- /dev/null +++ b/src/isa-l/igzip/aarch64/isal_update_histogram.S @@ -0,0 +1,311 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req + mov w_\tmp0, w_\dist + mov w_\dist, -1 + cmp w_\tmp0, 32768 + bhi .dist2code_done + sub w_\dist, w_\tmp0, #1 + cmp w_\tmp0, 4 + bls .dist2code_done + clz w_\tmp1, w_\dist + mov w_\tmp0, 30 + sub w_\tmp0, w_\tmp0, w_\tmp1 + lsr w_\dist, w_\dist, w_\tmp0 + add w_\dist, w_\dist, w_\tmp0, lsl 1 +.dist2code_done: +.endm + +.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req + adrp x_\tmp0, .len_to_code_tab_lanchor + add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor + ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2] + add w_\length_out, w_\length_out, 256 +.endm + + .section .rodata + .align 4 +.len_to_code_tab_lanchor = . + 0 + .type len_to_code_tab, %object + .size len_to_code_tab, 1056 +len_to_code_tab: + .word 0x00, 0x00, 0x00 + .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 + .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c + .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e + .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10 + .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 + .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12 + .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 + .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14 + .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 + .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 + .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 + .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 + .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 + .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 + .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d + .word 0x00, 0x00, 0x00, 0x00, 0x00 + + .text + .global isal_update_histogram_aarch64 + .arch armv8-a+crc + .type isal_update_histogram_aarch64, %function + +/* +void isal_update_histogram_aarch64(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram); +*/ + + /* arguments */ + declare_generic_reg start_stream, 0,x + declare_generic_reg length, 1,x + declare_generic_reg histogram, 2,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local variable */ + declare_generic_reg start_stream_saved, 10,x + declare_generic_reg histogram_saved, 23,x + declare_generic_reg current, 19,x + declare_generic_reg last_seen, 20,x + declare_generic_reg end_stream, 21,x + declare_generic_reg loop_end_iter, 22,x + declare_generic_reg dist_histogram, 12,x + declare_generic_reg lit_len_histogram, 23,x + declare_generic_reg literal, 8,x + declare_generic_reg next_hash, 9,x + declare_generic_reg end, 4,x + declare_generic_reg dist, 7,x + declare_generic_reg D, 11,w + declare_generic_reg match_length, 3,w + + declare_generic_reg tmp0, 5,w + declare_generic_reg tmp1, 6,w + +/* constant */ +.equ LIT_LEN, 286 +.equ DIST_LEN, 30 + +.equ lit_len_offset, 0 +.equ dist_offset, (8*LIT_LEN) // 2288 +.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528 +.equ hash_table_size, (8*1024*2) // 16384 + +isal_update_histogram_aarch64: + cmp w_length, 0 + ble .done + + stp x29, x30, [sp, -64]! + add x29, sp, 0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + str x23, [sp, 48] + + add last_seen, histogram, hash_offset + add end_stream, start_stream, w_length, sxtw + mov current, start_stream + sub loop_end_iter, end_stream, #3 + mov histogram_saved, histogram + + mov x0, last_seen + mov w1, 0 + mov x2, hash_table_size + bl memset + + cmp current, loop_end_iter + bcs .loop_end + + mov start_stream_saved, current + add dist_histogram, histogram_saved, dist_offset + mov D, 32766 + b .loop + + .align 2 +.loop_2nd_stream: + and literal, literal, 0xff + mov current, next_hash + cmp loop_end_iter, current + + ldr x0, [lit_len_histogram, literal, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, literal, lsl 3] + bls .loop_end + +.loop: + ldr w_literal, [current] + add next_hash, current, 1 + + mov w0, w_literal + crc32cw w0, wzr, w0 + + ubfiz x0, x0, 1, 13 + sub x2, current, start_stream_saved + ldrh w_dist, [last_seen, x0] + strh w2, [last_seen, x0] + sub w2, w2, w_dist + and w_dist, w2, 65535 + + sub w0, w_dist, #1 + cmp w0, D + bhi .loop_2nd_stream + + sub w2, w_end_stream, w_current + mov x1, current + sub x0, current, w_dist, uxth + compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1 + + cmp match_length, 3 + bls .loop_2nd_stream + + add end, current, 3 + cmp end, loop_end_iter + csel end, end, loop_end_iter, ls + cmp end, next_hash + bls .skip_inner_loop + + .align 3 +.inner_loop: + ldr w0, [next_hash] + crc32cw w0, wzr, w0 + + ubfiz x0, x0, 1, 13 + sub x1, next_hash, start_stream_saved + add next_hash, next_hash, 1 + cmp next_hash, end + strh w1, [last_seen, x0] + bne .inner_loop + +.skip_inner_loop: + convert_dist_to_dist_sym dist, tmp0, tmp1 + uxtw x2, w_dist + ldr x1, [dist_histogram, x2, lsl 3] + add x1, x1, 1 + str x1, [dist_histogram, x2, lsl 3] + + convert_length_to_len_sym match_length,tmp1,tmp0 + uxtw x0, w_tmp1 + ldr x1, [lit_len_histogram, x0, lsl 3] + add x1, x1, 1 + str x1, [lit_len_histogram, x0, lsl 3] + + sub match_length, match_length, #1 + add x3, x3, 1 + add current, current, x3 + cmp loop_end_iter, current + bhi .loop + + .align 3 +// fold the last for loop +.loop_end: + cmp end_stream, current + bls .loop_fold_end + + mov x0, current + ldrb w1, [x0], 1 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 1] + add x0, current, 2 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 2] + add x0, current, 3 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 3] + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + +.loop_fold_end: + ldr x0, [lit_len_histogram, (256*8)] + add x0, x0, 1 + str x0, [lit_len_histogram, (256*8)] + + ldr x23, [sp, 48] + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x29, x30, [sp], 64 + ret + .align 2 +.done: + ret + .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64 diff --git a/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h b/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h new file mode 100644 index 000000000..d55ec09dc --- /dev/null +++ b/src/isa-l/igzip/aarch64/lz0a_const_aarch64.h @@ -0,0 +1,72 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __LZ0A_CONST_AARCH64_H__ +#define __LZ0A_CONST_AARCH64_H__ +#include "options_aarch64.h" + +#ifdef __ASSEMBLY__ +.set K , 1024 +.set D , IGZIP_HIST_SIZE // Amount of history +.set LA , 18 * 16 // Max look-ahead, rounded up to 32 byte boundary +.set BSIZE , 2*IGZIP_HIST_SIZE + LA // Nominal buffer size + +/// Constants for stateless compression +#define LAST_BYTES_COUNT 3 // Bytes to prevent reading out of array bounds +#define LA_STATELESS 258 // No round up since no data is copied to a buffer + +.set IGZIP_LVL0_HASH_SIZE , (8 * K) +.set IGZIP_HASH8K_HASH_SIZE , (8 * K) +.set IGZIP_HASH_HIST_HASH_SIZE , IGZIP_HIST_SIZE +.set IGZIP_HASH_MAP_HASH_SIZE , IGZIP_HIST_SIZE + +#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1) +#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) + +.set MIN_DEF_MATCH , 3 // Minimum length of a match in deflate +.set SHORTEST_MATCH , 4 + +.set SLOP , 8 + +#define ICF_CODE_BYTES 4 +#define LIT_LEN_BIT_COUNT 10 +#define DIST_LIT_BIT_COUNT 9 + +#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) +#define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) + +#define DIST_OFFSET LIT_LEN_BIT_COUNT +#define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT) +#define LIT (0x1E << DIST_OFFSET) + + +#endif +#endif diff --git a/src/isa-l/igzip/aarch64/options_aarch64.h b/src/isa-l/igzip/aarch64/options_aarch64.h new file mode 100644 index 000000000..32db918f3 --- /dev/null +++ b/src/isa-l/igzip/aarch64/options_aarch64.h @@ -0,0 +1,71 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __OPTIONS_AARCH64_H__ +#define __OPTIONS_AARCH64_H__ + + +#ifdef __ASSEMBLY__ + +/// Options:dir +/// m - reschedule mem reads +/// e b - bitbuff style +/// t s x - compare style +/// h - limit hash updates +/// l - use longer huffman table +/// f - fix cache read + +#ifndef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (32 * 1024) +#endif + +#if (IGZIP_HIST_SIZE > (32 * 1024)) +#undef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (32 * 1024) +#endif + +#ifdef LONGER_HUFFTABLE +#if (IGZIP_HIST_SIZE > 8 * 1024) +#undef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (8 * 1024) +#endif +#endif + +/// (h) limit hash update +#define LIMIT_HASH_UPDATE + +/// (f) fix cache read problem +#define FIX_CACHE_READ + +#define ISAL_DEF_MAX_HDR_SIZE 328 + + + +#endif +#endif diff --git a/src/isa-l/igzip/aarch64/stdmac_aarch64.h b/src/isa-l/igzip/aarch64/stdmac_aarch64.h new file mode 100644 index 000000000..39afbc640 --- /dev/null +++ b/src/isa-l/igzip/aarch64/stdmac_aarch64.h @@ -0,0 +1,57 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __STDMAC_AARCH64_H__ +#define __STDMAC_AARCH64_H__ + +#ifdef __ASSEMBLY__ + +#define DEBUG_STACK 144 + +.macro push_stack + stp x29, x30,[sp,0-DEBUG_STACK]! + mov x29, sp + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] +.endm +.macro pop_stack + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + + ldp x29, x30, [sp], DEBUG_STACK +.endm + +#endif +#endif |