diff options
Diffstat (limited to 'src/spdk/isa-l/igzip/aarch64')
15 files changed, 2617 insertions, 0 deletions
diff --git a/src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h b/src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h new file mode 100644 index 000000000..88eb18dfd --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/bitbuf2_aarch64.h @@ -0,0 +1,57 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __BITBUF2_AARCH64_H__ +#define __BITBUF2_AARCH64_H__ +#include "options_aarch64.h" + +#ifdef __ASSEMBLY__ +.macro update_bits stream:req,code:req,code_len:req,m_bits:req,m_bit_count:req \ + m_out_buf:req + + lsl x_\code,x_\code,x_\m_bit_count + orr x_\m_bits,x_\code,x_\m_bits + add x_\m_bit_count,x_\code_len,x_\m_bit_count + + str x_\m_bits,[x_\m_out_buf] + + and w_\code,w_\m_bit_count,-8 + lsr w_\code_len,w_\m_bit_count,3 + add x_\m_out_buf,x_\m_out_buf,w_\code_len,uxtw + sub w_\m_bit_count,w_\m_bit_count,w_\code + lsr x_\m_bits,x_\m_bits,x_\code + + str x_\m_bits,[stream,_internal_state_bitbuf_m_bits] + str w_\m_bit_count,[stream,_internal_state_bitbuf_m_bit_count] + str x_\m_out_buf,[stream,_internal_state_bitbuf_m_out_buf] + + +.endm +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h b/src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h new file mode 100644 index 000000000..71160fe1b --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/data_struct_aarch64.h @@ -0,0 +1,226 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef __AARCH64_DATA_STRUCT_H__ +#define __AARCH64_DATA_STRUCT_H__ +#ifdef __ASSEMBLY__ + +#define FIELD(name,size,align) \ + .set _FIELD_OFFSET,(_FIELD_OFFSET + (align) - 1) & (~ ((align)-1)); \ + .equ name,_FIELD_OFFSET ; \ + .set _FIELD_OFFSET,_FIELD_OFFSET + size; \ + .if align > _STRUCT_ALIGN; \ + .set _STRUCT_ALIGN, align; \ + .endif; + +#define START_STRUCT(name) .set _FIELD_OFFSET,0;.set _STRUCT_ALIGN,0; + +#define END_STRUCT(name) .set _##name##_size,_FIELD_OFFSET;\ + .set _##name##_align,_STRUCT_ALIGN + +#define CONST(name,value) .equ name,value + + +/// BitBuf2 +START_STRUCT(BitBuf2) + /// name size align + FIELD ( _m_bits, 8, 8 ) + FIELD ( _m_bit_count, 4, 4 ) + FIELD ( _m_out_buf, 8, 8 ) + FIELD ( _m_out_end, 8, 8 ) + FIELD ( _m_out_start, 8, 8 ) +END_STRUCT(BitBuf2) + + +/// isal_mod_hist +#define HIST_ELEM_SIZE 4 +START_STRUCT(isal_mod_hist) + /// name size align + FIELD ( _d_hist, 30*HIST_ELEM_SIZE, HIST_ELEM_SIZE ) + FIELD ( _ll_hist, 513*HIST_ELEM_SIZE, HIST_ELEM_SIZE ) +END_STRUCT(isal_mod_hist) + + +/// hufftables_icf +#define HUFF_CODE_SIZE 4 +START_STRUCT(hufftables_icf) + /// name size align + FIELD ( _dist_table, 31 * HUFF_CODE_SIZE, HUFF_CODE_SIZE ) + FIELD ( _lit_len_table, 513 * HUFF_CODE_SIZE, HUFF_CODE_SIZE ) +END_STRUCT(hufftables_icf) + + +/// hash8k_buf +START_STRUCT(hash8k_buf) + /// name size align + FIELD ( _hash8k_table, 2 * IGZIP_HASH8K_HASH_SIZE, 2 ) +END_STRUCT(hash8k_buf) + + +/// hash_map_buf +START_STRUCT(hash_map_buf) + /// name size align + FIELD ( _hash_table, 2 * IGZIP_HASH_MAP_HASH_SIZE, 2 ) + FIELD ( _matches_next, 8, 8 ) + FIELD ( _matches_end, 8, 8 ) + FIELD ( _matches, 4*4*1024, 4 ) + FIELD ( _overflow, 4*LA, 4 ) +END_STRUCT(hash_map_buf) + + +/// level_buf +#define DEF_MAX_HDR_SIZE 328 +START_STRUCT(level_buf) + /// name size align + FIELD ( _encode_tables, _hufftables_icf_size, _hufftables_icf_align ) + FIELD ( _hist, _isal_mod_hist_size, _isal_mod_hist_align ) + FIELD ( _deflate_hdr_count, 4, 4 ) + FIELD ( _deflate_hdr_extra_bits,4, 4 ) + FIELD ( _deflate_hdr, DEF_MAX_HDR_SIZE, 1 ) + FIELD ( _icf_buf_next, 8, 8 ) + FIELD ( _icf_buf_avail_out, 8, 8 ) + FIELD ( _icf_buf_start, 8, 8 ) + FIELD ( _lvl_extra, _hash_map_buf_size, _hash_map_buf_align ) +END_STRUCT(level_buf) + + +CONST( _hash8k_hash_table , _lvl_extra + _hash8k_table ) +CONST( _hash_map_hash_table , _lvl_extra + _hash_table ) +CONST( _hash_map_matches_next , _lvl_extra + _matches_next ) +CONST( _hash_map_matches_end , _lvl_extra + _matches_end ) +CONST( _hash_map_matches , _lvl_extra + _matches ) +CONST( _hist_lit_len , _hist+_ll_hist ) +CONST( _hist_dist , _hist+_d_hist ) + + +/// isal_zstate +START_STRUCT(isal_zstate) + /// name size align + FIELD ( _total_in_start,4, 4 ) + FIELD ( _block_next, 4, 4 ) + FIELD ( _block_end, 4, 4 ) + FIELD ( _dist_mask, 4, 4 ) + FIELD ( _hash_mask, 4, 4 ) + FIELD ( _state, 4, 4 ) + FIELD ( _bitbuf, _BitBuf2_size, _BitBuf2_align ) + FIELD ( _crc, 4, 4 ) + FIELD ( _has_wrap_hdr, 1, 1 ) + FIELD ( _has_eob_hdr, 1, 1 ) + FIELD ( _has_eob, 1, 1 ) + FIELD ( _has_hist, 1, 1 ) + FIELD ( _has_level_buf_init, 2, 2 ) + FIELD ( _count, 4, 4 ) + FIELD ( _tmp_out_buff, 16, 1 ) + FIELD ( _tmp_out_start, 4, 4 ) + FIELD ( _tmp_out_end, 4, 4 ) + FIELD ( _b_bytes_valid, 4, 4 ) + FIELD ( _b_bytes_processed, 4, 4 ) + FIELD ( _buffer, BSIZE, 1 ) + FIELD ( _head, IGZIP_LVL0_HASH_SIZE*2, 2 ) +END_STRUCT(isal_zstate) + + + +CONST( _bitbuf_m_bits , _bitbuf+_m_bits ) +CONST( _bitbuf_m_bit_count , _bitbuf+_m_bit_count ) +CONST( _bitbuf_m_out_buf , _bitbuf+_m_out_buf ) +CONST( _bitbuf_m_out_end , _bitbuf+_m_out_end ) +CONST( _bitbuf_m_out_start , _bitbuf+_m_out_start ) + + +/// isal_zstream +START_STRUCT(isal_zstream) + /// name size align + FIELD ( _next_in, 8, 8 ) + FIELD ( _avail_in, 4, 4 ) + FIELD ( _total_in, 4, 4 ) + FIELD ( _next_out, 8, 8 ) + FIELD ( _avail_out, 4, 4 ) + FIELD ( _total_out, 4, 4 ) + FIELD ( _hufftables, 8, 8 ) + FIELD ( _level, 4, 4 ) + FIELD ( _level_buf_size, 4, 4 ) + FIELD ( _level_buf, 8, 8 ) + FIELD ( _end_of_stream, 2, 2 ) + FIELD ( _flush, 2, 2 ) + FIELD ( _gzip_flag, 2, 2 ) + FIELD ( _hist_bits, 2, 2 ) + FIELD ( _internal_state, _isal_zstate_size, _isal_zstate_align ) +END_STRUCT(isal_zstream) + + + +CONST( _internal_state_total_in_start , _internal_state+_total_in_start ) +CONST( _internal_state_block_next , _internal_state+_block_next ) +CONST( _internal_state_block_end , _internal_state+_block_end ) +CONST( _internal_state_b_bytes_valid , _internal_state+_b_bytes_valid ) +CONST( _internal_state_b_bytes_processed , _internal_state+_b_bytes_processed ) +CONST( _internal_state_crc , _internal_state+_crc ) +CONST( _internal_state_dist_mask , _internal_state+_dist_mask ) +CONST( _internal_state_hash_mask , _internal_state+_hash_mask ) +CONST( _internal_state_bitbuf , _internal_state+_bitbuf ) +CONST( _internal_state_state , _internal_state+_state ) +CONST( _internal_state_count , _internal_state+_count ) +CONST( _internal_state_tmp_out_buff , _internal_state+_tmp_out_buff ) +CONST( _internal_state_tmp_out_start , _internal_state+_tmp_out_start ) +CONST( _internal_state_tmp_out_end , _internal_state+_tmp_out_end ) +CONST( _internal_state_has_wrap_hdr , _internal_state+_has_wrap_hdr ) +CONST( _internal_state_has_eob , _internal_state+_has_eob ) +CONST( _internal_state_has_eob_hdr , _internal_state+_has_eob_hdr ) +CONST( _internal_state_has_hist , _internal_state+_has_hist ) +CONST( _internal_state_has_level_buf_init , _internal_state+_has_level_buf_init ) +CONST( _internal_state_buffer , _internal_state+_buffer ) +CONST( _internal_state_head , _internal_state+_head ) +CONST( _internal_state_bitbuf_m_bits , _internal_state+_bitbuf_m_bits ) +CONST( _internal_state_bitbuf_m_bit_count , _internal_state+_bitbuf_m_bit_count ) +CONST( _internal_state_bitbuf_m_out_buf , _internal_state+_bitbuf_m_out_buf ) +CONST( _internal_state_bitbuf_m_out_end , _internal_state+_bitbuf_m_out_end ) +CONST( _internal_state_bitbuf_m_out_start , _internal_state+_bitbuf_m_out_start ) + +/// Internal States +CONST( ZSTATE_NEW_HDR , 0 ) +CONST( ZSTATE_HDR , (ZSTATE_NEW_HDR + 1) ) +CONST( ZSTATE_CREATE_HDR , (ZSTATE_HDR + 1) ) +CONST( ZSTATE_BODY , (ZSTATE_CREATE_HDR + 1) ) +CONST( ZSTATE_FLUSH_READ_BUFFER , (ZSTATE_BODY + 1) ) +CONST( ZSTATE_FLUSH_ICF_BUFFER , (ZSTATE_FLUSH_READ_BUFFER + 1) ) +CONST( ZSTATE_TYPE0_HDR , (ZSTATE_FLUSH_ICF_BUFFER + 1) ) +CONST( ZSTATE_TYPE0_BODY , (ZSTATE_TYPE0_HDR + 1) ) +CONST( ZSTATE_SYNC_FLUSH , (ZSTATE_TYPE0_BODY + 1) ) +CONST( ZSTATE_FLUSH_WRITE_BUFFER , (ZSTATE_SYNC_FLUSH + 1) ) +CONST( ZSTATE_TRL , (ZSTATE_FLUSH_WRITE_BUFFER + 1) ) + +CONST( _NO_FLUSH , 0 ) +CONST( _SYNC_FLUSH , 1 ) +CONST( _FULL_FLUSH , 2 ) +CONST( _STORED_BLK , 0 ) +CONST( IGZIP_NO_HIST , 0 ) +CONST( IGZIP_HIST , 1 ) +CONST( IGZIP_DICT_HIST , 2 ) +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h b/src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h new file mode 100644 index 000000000..c5ef2555c --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/huffman_aarch64.h @@ -0,0 +1,154 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __HUFFMAN_AARCH64_H__ +#define __HUFFMAN_AARCH64_H__ + +#ifdef __ASSEMBLY__ +#ifdef LONGER_HUFFTABLE + #if (D > 8192) + #error History D is larger than 8K + #else + #define DIST_TABLE_SIZE 8192 + #define DECODE_OFFSET 26 + #endif +#else + #define DIST_TABLE_SIZE 2 + #define DECODE_OFFSET 0 +#endif + +#define LEN_TABLE_SIZE 256 +#define LIT_TABLE_SIZE 257 + +#define DIST_TABLE_START (ISAL_DEF_MAX_HDR_SIZE + 8) //328+8 +#define DIST_TABLE_OFFSET (DIST_TABLE_START + - 4 * 1) //336-4 +#define LEN_TABLE_OFFSET (DIST_TABLE_START + DIST_TABLE_SIZE * 4 - 4*3) //332 + 2*4 -4*3 =328 +#define LIT_TABLE_OFFSET (DIST_TABLE_START + 4 * DIST_TABLE_SIZE + 4 * LEN_TABLE_SIZE) +#define LIT_TABLE_SIZES_OFFSET (LIT_TABLE_OFFSET + 2 * LIT_TABLE_SIZE) +#define DCODE_TABLE_OFFSET (LIT_TABLE_SIZES_OFFSET + LIT_TABLE_SIZE + 1 - DECODE_OFFSET * 2) +#define DCODE_TABLE_SIZE_OFFSET (DCODE_TABLE_OFFSET + 2 * 30 - DECODE_OFFSET) + +#define IGZIP_DECODE_OFFSET 0 +#define IGZIP_DIST_TABLE_SIZE 2 + +.macro get_len_code hufftables:req,length:req,code:req,code_len:req,tmp0:req + add x_\tmp0,\hufftables,LEN_TABLE_OFFSET + ldr w_\code_len,[x_\tmp0,x_\length,lsl 2] + lsr w_\code, w_\code_len , 5 + and x_\code_len,x_\code_len,0x1f +.endm + +.macro get_lit_code hufftables:req,lit:req,code:req,code_len:req + add x_\code,\hufftables,LIT_TABLE_OFFSET + ldrh w_\code,[x_\code,x_\lit,lsl 1] + add x_\code_len,\hufftables,LIT_TABLE_SIZES_OFFSET + ldrb w_\code_len,[x_\code_len,x_\lit] +.endm + +.macro get_dist_code hufftables:req,dist:req,code:req,code_len:req,tmp0:req,tmp1:req,tmp2:req + cmp dist,DIST_TABLE_SIZE + bhi _compute_dist_code + add x_\tmp0,\hufftables,DIST_TABLE_OFFSET + ldr w_\code_len,[x_\tmp0,x_\dist,lsl 2] + lsr w_\code, w_\code_len , 5 + and x_\code_len,x_\code_len,0x1f + b _end_get_dist_code +_compute_dist_code: + and w_\dist,w_\dist,0xffff + sub w_\dist,w_\dist,1 + clz w_\tmp0,w_\dist + mov w_\tmp1,30 + sub w_\tmp0,w_\tmp1,w_\tmp0 //tmp0== num_extra_bists + mov w_\tmp1,1 + lsl w_\tmp1,w_\tmp1,w_\tmp0 + sub w_\tmp1,w_\tmp1,1 + and w_\tmp1,w_\tmp1,w_\dist //tmp1=extra_bits + asr w_\dist,w_\dist,w_\tmp0 + lsl w_\tmp2,w_\tmp0,1 + add w_\tmp2,w_\dist,w_\tmp2 //tmp2=sym + + add x_\code,\hufftables,DCODE_TABLE_OFFSET - IGZIP_DECODE_OFFSET*2 + add x_\code_len,\hufftables,DCODE_TABLE_SIZE_OFFSET - IGZIP_DECODE_OFFSET + ldrh w_\code,[x_\code,x_\tmp2,lsl 1] + ldrb w_\code_len,[x_\code_len,x_\tmp2] + lsl w_\tmp1,w_\tmp1,w_\code_len + orr w_\code,w_\code,w_\tmp1 + add w_\code_len,w_\code_len,w_\tmp0 + + //compute_dist_code +_end_get_dist_code: +.endm + + +.macro compare_258_bytes str0:req,str1:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 +_compare_258_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + + cmp x_\match_length,257 + ccmp x_\tmp0,8,0,ls + beq _compare_258_loop + + cmp x_\match_length,258 + mov x_\tmp1,258 + csel x_\match_length,x_\match_length,x_\tmp1,ls +.endm + +.macro compare_max_258_bytes str0:req,str1:req,max_length:req,match_length:req,tmp0:req,tmp1:req + mov x_\match_length,0 + mov x_\tmp0,258 + cmp x_\max_length,x_\tmp0 + csel x_\max_length,x_\max_length,x_\tmp0,ls +_compare_258_loop: + ldr x_\tmp0,[x_\str0,x_\match_length] + ldr x_\tmp1,[x_\str1,x_\match_length] + eor x_\tmp0,x_\tmp1,x_\tmp0 + rbit x_\tmp0,x_\tmp0 + clz x_\tmp0,x_\tmp0 + lsr x_\tmp0,x_\tmp0,3 + add x_\match_length,x_\match_length,x_\tmp0 + + + cmp x_\max_length,x_\match_length + ccmp x_\tmp0,8,0,hi + beq _compare_258_loop + + cmp x_\match_length,x_\max_length + csel x_\match_length,x_\match_length,x_\max_length,ls +.endm + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S new file mode 100644 index 000000000..9f0e8cd73 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_body_aarch64.S @@ -0,0 +1,261 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ + m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req + + //m_out_buf=bytes_written + sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start + cmp next_in,start_in + bls skip_has_hist + mov w_\tmp0,1 + strb w_\tmp0,[x_\stream,_internal_state_has_hist] +skip_has_hist: + ldr w_\tmp0,[\stream,_total_in] + ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out + + str x_\next_in,[\stream,_next_in] + sub x_\start_in,x_\next_in,x_\start_in + sub x_\end_in,x_\end_in,x_\next_in + add w_\tmp0,w_\tmp0,w_\start_in + stp w_\end_in,w_\tmp0,[\stream,_avail_in] + //next_in=avail_out,start_in=total_out + ldp w_\next_in,w_\start_in,[\stream,_avail_out] + add x_\m_out_start,x_\m_out_start,x_\m_out_buf + str x_\m_out_start,[\stream,_next_out] + add w_\start_in,w_\start_in,w_\m_out_buf + sub w_\next_in,w_\next_in,w_\m_out_buf + stp w_\next_in,w_\start_in,[\stream,_avail_out] +.endm + + + .global isal_deflate_body_aarch64 + .type isal_deflate_body_aarch64, %function +/* + void isal_deflate_body_aarch64(struct isal_zstream *stream) +*/ + declare_generic_reg stream, 0,x //struct isal_zstream *stream + declare_generic_reg state, 8,x //&stream->state + declare_generic_reg avail_in, 9,w + declare_generic_reg end_of_stream, 10,w //can be used in loop + + declare_generic_reg hash_mask, 11,w + declare_generic_reg match_length, 12,w + declare_generic_reg hufftables, 13,x + + declare_generic_reg m_out_buf, 14,x + declare_generic_reg m_out_start, 15,x + declare_generic_reg m_out_end, 16,x + declare_generic_reg m_bits, 17,x + declare_generic_reg m_bit_count, 18,w + + declare_generic_reg start_in, 19,x + declare_generic_reg end_in, 20,x + declare_generic_reg next_in, 21,x + declare_generic_reg loop_end_cnt, 22,x + + declare_generic_reg literal, 23,w + declare_generic_reg hash, 24,w + declare_generic_reg dist, 25,w + + declare_generic_reg last_seen, 26,x + declare_generic_reg file_start, 27,x + declare_generic_reg hist_size, 28,w + + declare_generic_reg tmp0, 5 ,w + declare_generic_reg tmp1, 6 ,w + declare_generic_reg tmp2, 7 ,w + + declare_generic_reg code, 3,x + declare_generic_reg code_len, 24,x + declare_generic_reg code2, 10,x + declare_generic_reg code_len2, 4,x + + +isal_deflate_body_aarch64: + //save registers + push_stack + ldr avail_in, [stream, _avail_in] + cbz avail_in, exit_save_state + + // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + ldr w_m_out_end,[stream,_avail_out] + ldr m_out_buf,[stream,_next_out] + add m_out_end,m_out_buf,w_m_out_end,uxtw + sub m_out_end,m_out_end , 8 + mov m_out_start,m_out_buf + stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] + str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] + ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] + ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] + + + //init variables + //last_seen=&stream.internal_state.head = _internal_state+_head + add last_seen,stream,65536 + add last_seen,last_seen,_internal_state+_head -65536 + + + //start_in=stream->next_in;next_in=start_in + ldr start_in,[stream,_next_in] + mov next_in,start_in + add end_in,start_in,avail_in,uxtw //avail_in reg is free now + sub loop_end_cnt,end_in,289 //loop end + cmp next_in,loop_end_cnt + + + //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + ldr w_file_start,[stream,_total_in] + sub file_start,next_in,file_start,uxtw + + //uint32_t hist_size = state->dist_mask; + ldr hist_size,[stream,_internal_state + _dist_mask] + + //uint32_t hash_mask = state->hash_mask; + ldr hash_mask,[stream,_internal_state + _hash_mask] + + ldr hufftables,[stream,_hufftables] + + bhi main_loop_end +main_loop_start: + //is_full(&state->bitbuf) + cmp m_out_buf,m_out_end + bhi update_state_exit + + ldr literal,[next_in] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + + ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] + sub x_dist,next_in,file_start + //last_seen[hash] = (uint64_t) (next_in - file_start); + strh dist,[last_seen,x_hash,lsl 1] + sub dist,dist,w_tmp0 + and dist,dist,0xffff + + sub w_tmp0,dist,1 + cmp hist_size,w_tmp0 + bls get_lit_code + + ///match_length = compare258(next_in - dist, next_in, 258); + sub x_tmp2,next_in,x_dist + compare_258_bytes tmp2,next_in,match_length,tmp0,tmp1 + cmp match_length,3 + bls get_lit_code + + sub x_tmp0,next_in,file_start + ldr literal,[next_in,1] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + add tmp0,tmp0,1 + strh tmp0,[last_seen,x_hash,lsl 1] + //call_print_b hash,dist,last_seen + + ldr literal,[next_in,2] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + add tmp0,tmp0,1 + strh tmp0,[last_seen,x_hash,lsl 1] + + //get_len_code(stream->hufftables, match_length, &code, + // &code_len); + get_len_code hufftables,match_length,code,code_len,tmp0 + + //get_dist_code(stream->hufftables, dist, &code2, &code_len2); + get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 + + //code |= code2 << code_len; + //code_len += code_len2; + lsl code2,code2,code_len + orr code,code,code2 + add code_len,code_len,code_len2 + + //next_in += match_length; + add next_in,next_in,match_length,uxtw + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + + + + cmp next_in,loop_end_cnt + bls main_loop_start + b main_loop_end +get_lit_code: + //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls main_loop_start + +main_loop_end: + //update state here + + //load end_of_stream and flush together + ldr w_end_of_stream, [stream, _end_of_stream] + //(stream->end_of_stream || stream->flush != 0) + cbz w_end_of_stream, update_state_exit + mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER + str w_tmp0, [stream, _internal_state+_state] +update_state_exit: + update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 +exit_ret: + pop_stack + ret +exit_save_state: + ldr w_end_of_stream, [stream, _end_of_stream] + cbz w_end_of_stream, exit_ret //(stream->end_of_stream || stream->flush != 0) + mov w_tmp0 , ZSTATE_FLUSH_READ_BUFFER + str w_tmp0, [stream, _internal_state+_state] + b exit_ret + .size isal_deflate_body_aarch64, .-isal_deflate_body_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S new file mode 100644 index 000000000..671091346 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_deflate_finish_aarch64.S @@ -0,0 +1,264 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro update_state stream:req,start_in:req,next_in:req,end_in:req, \ + m_out_buf:req,m_out_start:req,tmp0:req,tmp1:req + + //m_out_buf=bytes_written + sub x_\m_out_buf,x_\m_out_buf,x_\m_out_start + cmp next_in,start_in + bls skip_has_hist + mov w_\tmp0,1 + strb w_\tmp0,[x_\stream,_internal_state_has_hist] +skip_has_hist: + ldr w_\tmp0,[\stream,_total_in] + ldr x_\m_out_start,[\stream,_next_out] //m_out_start = next_out + + str x_\next_in,[\stream,_next_in] + sub x_\start_in,x_\next_in,x_\start_in + sub x_\end_in,x_\end_in,x_\next_in + add w_\tmp0,w_\tmp0,w_\start_in + stp w_\end_in,w_\tmp0,[\stream,_avail_in] + //next_in=avail_out,start_in=total_out + ldp w_\next_in,w_\start_in,[\stream,_avail_out] + add x_\m_out_start,x_\m_out_start,x_\m_out_buf + str x_\m_out_start,[\stream,_next_out] + add w_\start_in,w_\start_in,w_\m_out_buf + sub w_\next_in,w_\next_in,w_\m_out_buf + stp w_\next_in,w_\start_in,[\stream,_avail_out] +.endm + .global isal_deflate_finish_aarch64 + .arch armv8-a+crc + .type isal_deflate_finish_aarch64, %function +/* + void isal_deflate_finish_aarch64(struct isal_zstream *stream) +*/ + declare_generic_reg stream, 0,x //struct isal_zstream *stream + declare_generic_reg state, 8,x //&stream->state + declare_generic_reg avail_in, 9,w + declare_generic_reg end_of_stream, 10,w //can be used in loop + + declare_generic_reg hash_mask, 11,w + declare_generic_reg match_length, 12,w + declare_generic_reg hufftables, 13,x + + declare_generic_reg m_out_buf, 14,x + declare_generic_reg m_out_start, 15,x + declare_generic_reg m_out_end, 16,x + declare_generic_reg m_bits, 17,x + declare_generic_reg m_bit_count, 18,w + + declare_generic_reg start_in, 19,x + declare_generic_reg end_in, 20,x + declare_generic_reg next_in, 21,x + declare_generic_reg loop_end_cnt, 22,x + + declare_generic_reg literal, 23,w + declare_generic_reg hash, 24,w + declare_generic_reg dist, 25,w + + declare_generic_reg last_seen, 26,x + declare_generic_reg file_start, 27,x + declare_generic_reg hist_size, 28,w + + declare_generic_reg tmp0, 5 ,w + declare_generic_reg tmp1, 6 ,w + declare_generic_reg tmp2, 7 ,w + + declare_generic_reg code, 3,x + declare_generic_reg code_len, 24,x + declare_generic_reg code2, 10,x + declare_generic_reg code_len2, 4,x + + +isal_deflate_finish_aarch64: + //save registers + push_stack + + // set_buf(&state->bitbuf, stream->next_out, stream->avail_out); + ldr w_m_out_end,[stream,_avail_out] + ldr m_out_buf,[stream,_next_out] + add m_out_end,m_out_buf,w_m_out_end,uxtw + sub m_out_end,m_out_end , 8 + mov m_out_start,m_out_buf + stp m_out_buf,m_out_end,[stream, _bitbuf + _internal_state + _m_out_buf] + str m_out_start,[stream, _bitbuf + _internal_state + _m_out_start] + ldr m_bit_count ,[stream,_internal_state_bitbuf_m_bit_count] + ldr m_bits ,[stream,_internal_state_bitbuf_m_bits] + + //init variables + //last_seen=&stream.internal_state.head = _internal_state+_head + add last_seen,stream,65536 + add last_seen,last_seen,_internal_state+_head -65536 + + + //start_in=stream->next_in;next_in=start_in + ldr avail_in, [stream, _avail_in] + ldr start_in,[stream,_next_in] + mov next_in,start_in + add end_in,start_in,avail_in,uxtw //avail_in reg is free now + ldr hufftables,[stream,_hufftables] + cbz avail_in, update_not_full + + + sub loop_end_cnt,end_in,4 //loop end + cmp next_in,loop_end_cnt + + + //file_start = (uint8_t *) ((uintptr_t) stream->next_in - stream->total_in); + ldr w_file_start,[stream,_total_in] + sub file_start,next_in,file_start,uxtw + + //uint32_t hist_size = state->dist_mask; + ldr hist_size,[stream,_internal_state + _dist_mask] + + //uint32_t hash_mask = state->hash_mask; + ldr hash_mask,[stream,_internal_state + _hash_mask] + + bhi main_loop_end +main_loop_start: + //is_full(&state->bitbuf) + cmp m_out_buf,m_out_end + bhi update_state_exit + + ldr literal,[next_in] + crc32cw hash,wzr,literal + and hash,hash,hash_mask + + ///dist = (next_in - file_start - last_seen[hash]) & 0xFFFF; + ldrh w_tmp0,[last_seen,x_hash,lsl 1] //tmp_w last_seen[hash] + sub x_dist,next_in,file_start + //last_seen[hash] = (uint64_t) (next_in - file_start); + strh dist,[last_seen,x_hash,lsl 1] + sub dist,dist,w_tmp0 + and dist,dist,0xffff + + sub w_tmp0,dist,1 + cmp hist_size,w_tmp0 + bls get_lit_code + + /// match_length = compare258(next_in - dist, next_in, 258); + sub x_tmp2,next_in,x_dist + sub x_hash,end_in,next_in + compare_max_258_bytes tmp2,next_in,hash,match_length,tmp0,tmp1 + cmp match_length,3 + bls get_lit_code + + get_len_code hufftables,match_length,code,code_len,tmp0 + get_dist_code hufftables,dist,code2,code_len2,tmp0,tmp1,tmp2 + + //code |= code2 << code_len; + //code_len += code_len2; + lsl code2,code2,code_len + orr code,code,code2 + add code_len,code_len,code_len2 + + //next_in += match_length; + add next_in,next_in,match_length,uxtw + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + + cmp next_in,loop_end_cnt + bls main_loop_start + b main_loop_end +get_lit_code: + //get_lit_code(stream->hufftables, literal & 0xFF, &code, &code_len); + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls main_loop_start +main_loop_end: + sub loop_end_cnt,end_in,1 + cmp next_in,loop_end_cnt + bhi update_not_full +second_loop_start: + cmp m_out_buf,m_out_end + bhi update_state_exit + ldr literal,[next_in] + and literal,literal,0xff + get_lit_code hufftables,literal,code,code_len + //next_in++; + add next_in,next_in,1 + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + cmp next_in,loop_end_cnt + bls second_loop_start + +update_not_full: + cmp m_out_buf,m_out_end + bhi update_state_exit + + mov literal,256 + get_lit_code hufftables,literal,code,code_len + + //write_bits(&state->bitbuf, code, code_len); + update_bits stream,code,code_len,m_bits,m_bit_count,m_out_buf + ldrh w_end_of_stream, [stream, _end_of_stream] + mov w_tmp0,1 + strb w_tmp0,[stream,_internal_state_has_eob] + cmp w_end_of_stream,w_tmp0 + mov w_tmp0, ZSTATE_TRL + mov w_tmp1, ZSTATE_SYNC_FLUSH + csel w_tmp0,w_tmp0,w_tmp1,eq + str w_tmp0, [stream, _internal_state+_state] + +update_state_exit: + update_state stream,start_in,next_in,end_in,m_out_buf,m_out_start,tmp0,tmp1 + pop_stack + ret + + .size isal_deflate_finish_aarch64, .-isal_deflate_finish_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S b/src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S new file mode 100644 index 000000000..441fd4d5e --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_inflate_multibinary_arm64.S @@ -0,0 +1,32 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + +mbin_interface_base decode_huffman_code_block_stateless,decode_huffman_code_block_stateless_base diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S b/src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S new file mode 100644 index 000000000..78d23940d --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_isal_adler32_neon.S @@ -0,0 +1,178 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 3 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg + \name\()_d .req d\reg +.endm + +.macro mod_adler dest:req,tmp:req + umull \tmp\()_x,\dest,const_div1 + lsr \tmp\()_x,\tmp\()_x,47 + msub \dest,\tmp,const_div2,\dest +.endm + +/* + uint32_t adler32_neon(uint32_t adler32, uint8_t * start, uint32_t length); +*/ +/* +Arguements list +*/ + adler32 .req w0 + start .req x1 + length .req x2 + .global adler32_neon + .type adler32_neon, %function +adler32_neon: +/* +local variables +*/ + declare_var_vector_reg factor0 , 6 + declare_var_vector_reg factor1 , 7 + declare_var_vector_reg d0 , 4 + declare_var_vector_reg d1 , 5 + declare_var_vector_reg adacc , 2 + declare_var_vector_reg s2acc , 3 + declare_var_vector_reg zero , 16 + declare_var_vector_reg adler , 17 + declare_var_vector_reg back_d0 , 18 + declare_var_vector_reg back_d1 , 19 + declare_var_vector_reg sum2 , 20 + declare_var_vector_reg tmp2 , 20 + + adler0 .req w4 + adler1 .req w5 + adler0_x .req x4 + adler1_x .req x5 + end .req x0 + tmp .req w8 + tmp_x .req x8 + tmp1_x .req x9 + loop_cnt .req x10 + loop_const .req x11 + const_div1 .req w6 + const_div2 .req w7 + mov const_div1, 32881 + movk const_div1, 0x8007, lsl 16 + mov const_div2, 65521 + and adler0, adler32, 0xffff + lsr adler1, adler32, 16 + + lsr loop_cnt,length,5 + adrp x3,factors + add x3,x3,:lo12:factors + ld1 {factor0_v.16b-factor1_v.16b},[x3] + + add end,start,length + cbz loop_cnt,final_accum32 + ld1 {back_d0_v.16b-back_d1_v.16b},[start] + mov loop_const,173 + + movi v16.4s,0 + + + + +great_than_32: + cmp loop_cnt,173 + csel loop_const,loop_cnt,loop_const,le + mov adacc_v.16b,zero_v.16b + mov s2acc_v.16b,zero_v.16b + ins adacc_v.s[0],adler0 + ins s2acc_v.s[0],adler1 + add tmp_x,start,loop_const,lsl 5 + +accum32_neon: + add start,start,32 + mov d0_v.16b,back_d0_v.16b + mov d1_v.16b,back_d1_v.16b + ld1 {back_d0_v.16b-back_d1_v.16b},[start] + + shl tmp2_v.4s,adacc_v.4s,5 + add s2acc_v.4s,s2acc_v.4s,tmp2_v.4s + + uaddlp adler_v.8h,d0_v.16b + uadalp adler_v.8h,d1_v.16b + uadalp adacc_v.4s,adler_v.8h + + umull sum2_v.8h,factor0_v.8b ,d0_v.8b + umlal2 sum2_v.8h,factor0_v.16b,d0_v.16b + umlal sum2_v.8h,factor1_v.8b ,d1_v.8b + umlal2 sum2_v.8h,factor1_v.16b,d1_v.16b + uadalp s2acc_v.4s,sum2_v.8h + + cmp start,tmp_x + bne accum32_neon + + uaddlv adacc_d,adacc_v.4s + uaddlv s2acc_d,s2acc_v.4s + fmov adler0_x,adacc_d + fmov adler1_x,s2acc_d + + mod_adler adler0,tmp + mod_adler adler1,tmp + sub loop_cnt,loop_cnt,loop_const + cbnz loop_cnt,great_than_32 + +final_accum32: + and length,length,31 + cbz length,end_func + +accum32_body: + cmp start,end + beq end_func + ldrb tmp,[start],1 + add adler0,adler0,tmp + add adler1,adler1,adler0 + b accum32_body + +end_func: + mod_adler adler0,tmp + mod_adler adler1,tmp + orr w0,adler0,adler1,lsl 16 + ret + + .size adler32_neon, .-adler32_neon + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +factors: + .quad 0x191a1b1c1d1e1f20 + .quad 0x1112131415161718 + .quad 0x090a0b0c0d0e0f10 + .quad 0x0102030405060708 + diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c new file mode 100644 index 000000000..968b013a8 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_aarch64_dispatcher.c @@ -0,0 +1,123 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <aarch64_multibinary.h> + +DEFINE_INTERFACE_DISPATCHER(isal_adler32) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(adler32_neon); + + return PROVIDER_BASIC(adler32); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_body) +{ + unsigned long auxval = getauxval(AT_HWCAP); + + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_body_aarch64); + + return PROVIDER_BASIC(isal_deflate_body); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_finish) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_finish_aarch64); + + return PROVIDER_BASIC(isal_deflate_finish); + +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl1) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_body_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_body_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl2) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_hist_aarch64); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_hist); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_body_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); + + return PROVIDER_INFO(icf_body_hash1_fillgreedy_lazy); +} + +DEFINE_INTERFACE_DISPATCHER(isal_deflate_icf_finish_lvl3) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_deflate_icf_finish_hash_map_base); + + return PROVIDER_BASIC(isal_deflate_icf_finish_hash_map); +} + +DEFINE_INTERFACE_DISPATCHER(isal_update_histogram) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_CRC32) + return PROVIDER_INFO(isal_update_histogram_aarch64); + + return PROVIDER_BASIC(isal_update_histogram); +} diff --git a/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S new file mode 100644 index 000000000..3d96c731c --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/igzip_multibinary_arm64.S @@ -0,0 +1,50 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "aarch64_multibinary.h" + + +mbin_interface isal_deflate_icf_body_lvl1 +mbin_interface isal_deflate_icf_body_lvl2 +mbin_interface isal_deflate_icf_body_lvl3 +mbin_interface isal_deflate_icf_finish_lvl1 +mbin_interface isal_deflate_icf_finish_lvl2 +mbin_interface isal_deflate_icf_finish_lvl3 +mbin_interface isal_update_histogram +mbin_interface_base encode_deflate_icf , encode_deflate_icf_base +mbin_interface_base set_long_icf_fg , set_long_icf_fg_base +mbin_interface_base gen_icf_map_lh1 , gen_icf_map_h1_base +mbin_interface_base isal_deflate_hash_lvl0 , isal_deflate_hash_base +mbin_interface_base isal_deflate_hash_lvl1 , isal_deflate_hash_base +mbin_interface_base isal_deflate_hash_lvl2 , isal_deflate_hash_base +mbin_interface_base isal_deflate_hash_lvl3 , isal_deflate_hash_base + +mbin_interface isal_deflate_body +mbin_interface isal_deflate_finish +mbin_interface isal_adler32 diff --git a/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S new file mode 100644 index 000000000..217cc5b73 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_body_hash_hist.S @@ -0,0 +1,364 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + + .global isal_deflate_icf_body_hash_hist_aarch64 + .type isal_deflate_icf_body_hash_hist_aarch64, %function +/* +void isal_deflate_icf_body_hash_hist_base(struct isal_zstream *stream); +*/ + +/* constant */ + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_encode_tables, 0 +.equ offset_hist, 2176 +.equ offset_hist_d_hist, 2176 +.equ offset_hist_ll_hist, 2296 +.equ offset_deflate_hdr_count, 4348 +.equ offset_deflate_hdr_extra_bits, 4352 +.equ offset_deflate_hdr, 4356 +.equ offset_icf_buf_next, 4688 +.equ offset_icf_buf_avail_out, 4696 +.equ offset_icf_buf_start, 4704 +.equ offset_hash8k, 4712 +.equ offset_hash_hist, 4712 + +/* offset of struct isal_zstate */ +.equ offset_dist_mask, 12 +.equ offset_hash_mask, 16 + +/* macros*/ +.equ ISAL_LOOK_AHEAD, 288 + + /* arguments */ + declare_generic_reg stream, 0,x + declare_generic_reg stream_saved, 11,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local varibale */ + declare_generic_reg level_buf, 18,x + declare_generic_reg avail_in, 13,w + declare_generic_reg end_in, 13,x + declare_generic_reg start_in, 19,x + declare_generic_reg next_in, 9,x + declare_generic_reg next_in_iter, 14,x + declare_generic_reg state, 24,x + declare_generic_reg hist_size, 22,w + declare_generic_reg hash_mask, 21,w + declare_generic_reg start_out, 12,x + declare_generic_reg end_out, 12,x + declare_generic_reg next_out, 8,x + declare_generic_reg file_start, 20,x + declare_generic_reg last_seen, 15,x + declare_generic_reg total_in, 25,x + declare_generic_reg NULL_DIST_SYM, 23,w + declare_generic_reg match_length, 3,x + declare_generic_reg dist, 7,x + declare_generic_reg dist_inc, 26,w // dist - 1 + declare_generic_reg literal, 10,x + + declare_generic_reg tmp0, 4,x + declare_generic_reg tmp1, 5,x + +isal_deflate_icf_body_hash_hist_aarch64: + stp x29, x30, [sp, -80]! + add x29, sp, 0 + str x24, [sp, 56] + + ldr avail_in, [stream, offset_avail_in] + cbnz avail_in, .stream_available + + ldr w1, [stream, offset_end_of_stream] // w1 keeps two values of end_of_stream and flush + cbz w1, .done + + add state, stream, offset_state + b .state_flush_read_buffer + + .align 2 +.stream_available: + stp x19, x20, [x29, 16] + stp x21, x22, [x29, 32] + str x23, [x29, 48] + stp x25, x26, [x29, 64] + + ldr level_buf, [stream, offset_level_buf] + add state, stream, offset_state // 64 + mov stream_saved, stream + ldr start_in, [stream, offset_next_in] // 0 + ldr w_total_in, [stream, offset_total_in] + + mov x0, offset_hash_hist + add last_seen, level_buf, x0 + + ldr x0, [level_buf, offset_icf_buf_avail_out] // 4696 + ldr start_out, [level_buf, offset_icf_buf_next] // 4688 + + mov next_in, start_in + and x0, x0, -4 + ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 + add end_in, start_in, x_avail_in, uxtw + mov next_out, start_out + add end_out, start_out, x0 + + add x0, next_in, ISAL_LOOK_AHEAD // 288 + sub file_start, start_in, total_in, uxtw + mov NULL_DIST_SYM, 30 + add next_in_iter, next_in, 1 + cmp end_in, x0 + bls .while_loop_end + + .align 3 +.while_loop: + cmp next_out, end_out + bcs .state_create_hdr + + ldr w_literal, [next_in] + mov w0, w_literal + crc32cw w0, wzr, w0 + + and w0, w0, hash_mask + sub x1, next_in, file_start + lsl x0, x0, 1 + + ldrh w_dist, [last_seen, x0] + strh w1, [last_seen, x0] + sub w1, w1, w_dist + and w_dist, w1, 65535 + + sub dist_inc, w_dist, #1 + cmp dist_inc, hist_size + bcc .dist_vs_hist_size + +.while_latter_part: + and w_literal, w_literal, 255 + mov next_in, next_in_iter + add next_out, next_out, 4 + add x1, level_buf, literal, uxtb 2 + ldr w0, [x1, 2296] + add w0, w0, 1 + str w0, [x1, 2296] + ldrh w0, [next_out, -4] + bfi w0, w_literal, 0, 10 + strh w0, [next_out, -4] + ldr w0, [next_out, -4] + bfi w0, NULL_DIST_SYM, 10, 9 + str w0, [next_out, -4] + ubfx x0, x0, 16, 3 + strh w0, [next_out, -2] + +.while_loop_check: + add x0, next_in, ISAL_LOOK_AHEAD // 288 + add next_in_iter, next_in, 1 + cmp end_in, x0 + bhi .while_loop + b .while_loop_end + + .align 2 +.dist_vs_hist_size: + mov x1, next_in + mov w2, 258 + sub x0, next_in, dist, uxth + compare_258_bytes param0,param1,match_length,tmp0,tmp1 + + and w1, w_match_length, 65535 // 0xffff + cmp w1, 3 + bls .while_latter_part + + ldr w0, [next_in, 1] + mov x4, next_in + add next_in, next_in, x1, uxth + crc32cw w0, wzr, w0 + + and w0, hash_mask, w0 + sub next_in_iter, next_in_iter, file_start + strh w_next_in_iter, [last_seen, x0, lsl 1] + ldr w0, [x4, 2]! + crc32cw w0, wzr, w0 + + and w0, hash_mask, w0 + and w_match_length, w_match_length, 65535 // 0xffff + sub x4, x4, file_start + + // get_len_icf_code + add w_match_length, w_match_length, 254 + // get_dist_icf_code, first part + mov w1, 0 // w1 => dist_extra + strh w4, [last_seen, x0, lsl 1] + cmp w_dist, 2 + ubfiz x0, match_length, 2, 17 + add x0, level_buf, x0 + bhi .compute_dist_icf_code + +.match_length_end: + // handle level_buf->hist + ldr w2, [x0, offset_hist_ll_hist] // 2296, ll_hist + add x4, level_buf, dist_inc, uxtw 2 // d_hist + add next_out, next_out, 4 + add w2, w2, 1 // ll_hist + str w2, [x0, offset_hist_ll_hist] // 2296, ll_hist + ldr w0, [x4, offset_hist_d_hist] // 2176, d_hist + add w0, w0, 1 // d_hist + str w0, [x4, offset_hist_d_hist] // 2176, d_hist + + // write_deflate_icf + ldrh w0, [next_out, -4] + bfi w0, w3, 0, 10 + strh w0, [next_out, -4] + ldr w0, [next_out, -4] + bfi w0, dist_inc, 10, 9 + str w0, [next_out, -4] + lsr w0, w0, 16 + bfi w0, w1, 3, 13 // w1 => dist_extra + strh w0, [next_out, -2] + b .while_loop_check + + .align 2 +// get_dist_icf_code, 2nd part +.compute_dist_icf_code: + clz w1, dist_inc + mov w2, 30 + sub w2, w2, w1 + mov w1, 1 + lsl w1, w1, w2 + sub w1, w1, #1 + and w1, w1, dist_inc + lsr dist_inc, dist_inc, w2 + add dist_inc, dist_inc, w2, lsl 1 + and w1, w1, 8191 + b .match_length_end + +.while_loop_end: + sub x19, next_in, x19 + cmp x19, 0 + ble .skip_igzip_hist2 + + mov w0, 1 + strb w0, [stream_saved, offset_state_has_hist] // 135 + +.skip_igzip_hist2: + add w19, w_total_in, w19 + ldr w0, [stream_saved, offset_end_of_stream] // 56 + sub x12, end_out, next_out + asr x12, x12, 2 // x12 => end_out - next_out + str next_in, [stream_saved] + str w19, [stream_saved, offset_total_in] // 12 + sub next_in, end_in, next_in + str w19, [stream_saved, offset_state_block_end] // 72 + + ldp x25, x26, [x29, 64] + ldr x23, [x29, 48] + ldp x21, x22, [x29, 32] + ldp x19, x20, [x29, 16] + + str w9, [stream_saved, offset_avail_in] // 8 + str next_out, [level_buf, offset_icf_buf_next] // 4688 + str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out + cbnz w0, .state_flush_read_buffer + b .done + + .align 2 +.state_create_hdr: + mov w0, 2 + str w0, [x24, 20] + sub start_in, next_in, start_in + cmp start_in, 0 + ble .skip_igzip_hist + + mov w0, 1 + strb w0, [stream_saved, offset_state_has_hist] // 135 + +.skip_igzip_hist: + add w_total_in, w_total_in, w19 + sub x12, end_out, next_out + asr x12, x12, 2 // x12 => end_out - next_out + str next_in, [stream_saved] + sub next_in, end_in, next_in + str w_total_in, [stream_saved, offset_total_in] // 12 + str w_total_in, [stream_saved, offset_state_block_end] // 72 + + ldp x25, x26, [x29, 64] + ldr x23, [x29, 48] + ldp x21, x22, [x29, 32] + ldp x19, x20, [x29, 16] + + str w9, [stream_saved, offset_avail_in] // 8 + str next_out, [level_buf, offset_icf_buf_next] // 4688 + str x12, [level_buf, offset_icf_buf_avail_out] // 4696, x12 => end_out - next_out + b .done + +.state_flush_read_buffer: + mov w0, 4 + str w0, [x24, 20] + +.done: + ldr x24, [sp, 56] + ldp x29, x30, [sp], 80 + ret + + .size isal_deflate_icf_body_hash_hist_aarch64, .-isal_deflate_icf_body_hash_hist_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S new file mode 100644 index 000000000..3e72c8c78 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S @@ -0,0 +1,397 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +/* +void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream); +*/ + +/* constant */ + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_state, 84 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_encode_tables, 0 +.equ offset_hist, 2176 +.equ offset_hist_d_hist, 2176 +.equ offset_hist_ll_hist, 2296 +.equ offset_deflate_hdr_count, 4348 +.equ offset_deflate_hdr_extra_bits, 4352 +.equ offset_deflate_hdr, 4356 +.equ offset_icf_buf_next, 4688 +.equ offset_icf_buf_avail_out, 4696 +.equ offset_icf_buf_start, 4704 +.equ offset_hash8k, 4712 +.equ offset_hash_hist, 4712 + +/* offset of struct isal_zstate */ +.equ offset_dist_mask, 12 +.equ offset_hash_mask, 16 +.equ offset_state_of_zstate, 20 + +/* macros*/ +.equ ISAL_LOOK_AHEAD, 288 + + /* arguments */ + declare_generic_reg stream, 0,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + declare_generic_reg param3, 3,x + declare_generic_reg param4, 4,x + declare_generic_reg param5, 5,x + declare_generic_reg param6, 6,x + + /* local variable */ + declare_generic_reg stream_saved, 15,x + declare_generic_reg level_buf, 13,x + declare_generic_reg start_in, 21,x + declare_generic_reg start_out, 22,x + declare_generic_reg state, 23,x + declare_generic_reg end_out, 12,x + declare_generic_reg end_in, 11,x + declare_generic_reg next_in, 8,x + declare_generic_reg next_out, 10,x + declare_generic_reg next_out_iter, 5,x + declare_generic_reg file_start, 18,x + declare_generic_reg last_seen, 14,x + + declare_generic_reg literal_code, 9,w + declare_generic_reg hash_mask, 19,w + declare_generic_reg hist_size, 20,w + declare_generic_reg dist, 7,w + declare_generic_reg dist_inc, 24,w + + declare_generic_reg tmp0, 25,x + declare_generic_reg tmp1, 26,x + declare_generic_reg tmp2, 27,x + declare_generic_reg tmp3, 28,x + + .align 2 + .type write_deflate_icf_constprop, %function +write_deflate_icf_constprop: + ldrh w2, [x0] + mov w3, 30 + bfi w2, w1, 0, 10 + strh w2, [x0] + ldr w1, [x0] + bfi w1, w3, 10, 9 + str w1, [x0] + ubfx x1, x1, 16, 3 + strh w1, [x0, 2] + ret + .size write_deflate_icf_constprop, .-write_deflate_icf_constprop + + .align 2 + .type write_deflate_icf, %function +write_deflate_icf: + ldrh w4, [x0] + bfi w4, w1, 0, 10 + strh w4, [x0] + ldr w1, [x0] + bfi w1, w2, 10, 9 + str w1, [x0] + lsr w1, w1, 16 + bfi w1, w3, 3, 13 + strh w1, [x0, 2] + ret + .size write_deflate_icf, .-write_deflate_icf + + .align 2 + .type update_state, %function +update_state: + sub x7, x2, x1 + ldr x4, [x0, 48] + cmp x7, 0 + ble .L48 + mov w1, 1 + strb w1, [x0, 135] +.L48: + ldr w1, [x0, 12] + sub x6, x6, x5 + str x2, [x0] + sub x3, x3, x2 + add w1, w1, w7 + stp w3, w1, [x0, 8] + str w1, [x0, 72] + asr x6, x6, 2 + str x5, [x4, 4688] + str x6, [x4, 4696] + ret + .size update_state, .-update_state + + .align 2 + .global isal_deflate_icf_finish_hash_hist_aarch64 + .type isal_deflate_icf_finish_hash_hist_aarch64, %function +isal_deflate_icf_finish_hash_hist_aarch64: + ldr w_end_in, [stream, 8] // stream->avail_in + cbz w_end_in, .stream_not_available + + stp x29, x30, [sp, -96]! + add x29, sp, 0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] + + mov stream_saved, stream + ldr level_buf, [stream, offset_level_buf] // 48 + ldr start_in, [stream, offset_next_in] // 0 + ldr start_out, [level_buf, offset_icf_buf_next] // 4688 + add state, stream, offset_state // 64 + ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696 + mov next_in, start_in + ldr w_file_start, [stream, offset_total_in] // 12 + mov tmp0, offset_hash_hist // 4712 + add last_seen, level_buf, tmp0 + add end_in, start_in, end_in, uxtw + and end_out, end_out, -4 + mov next_out, start_out + ldp hist_size, hash_mask, [state, offset_dist_mask] // 12 + sub file_start, start_in, file_start + add end_out, start_out, end_out + mov next_out_iter, next_out + + add x0, next_in, 3 + cmp end_in, x0 // x0 <= next_in + 3 + bls .while_first_end + + .p2align 3 +.while_first: + cmp next_out, end_out + bcs .save_and_update_state + ldr literal_code, [next_in] + mov w0, literal_code + crc32cw w0, wzr, w0 + and w0, w0, hash_mask + sub x2, next_in, file_start + lsl x0, x0, 1 + ldrh dist, [last_seen, x0] + strh w2, [last_seen, x0] + sub w2, w2, dist + and dist, w2, 65535 + sub dist_inc, dist, #1 + cmp dist_inc, hist_size + bcs .skip_compare258 + + mov x2, 0 + sub w2, w_end_in, w8 + mov x1, next_in + sub x0, next_in, x7, uxth + + compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1 + mov w0, w_tmp2 + and w2, w0, 65535 + + cmp w2, 3 + bhi .while_first_match_length + +.skip_compare258: + and literal_code, literal_code, 255 // get_lit_icf_code + add next_in, next_in, 1 + mov w1, literal_code + mov x0, next_out + add x_literal_code, level_buf, x_literal_code, uxtb 2 // level_buf->hist.ll_hist + + ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296 + + bl write_deflate_icf_constprop // write_deflate_icf + + add next_out, next_out, 4 +.while_first_check: + add x0, next_in, 3 + mov next_out_iter, next_out + cmp end_in, x0 + bhi .while_first + +.while_first_end: + cmp next_in, end_in + bcs .while_2nd_end + + cmp next_out, end_out + bcc .while_2nd_handle + b .save_and_update_state_2nd + + .p2align 2 +.while_2nd: + cmp end_out, next_out_iter + bls .save_and_update_state_2nd + +.while_2nd_handle: + ldrb w2, [next_in], 1 + mov x0, next_out_iter + add next_out_iter, next_out_iter, 4 + mov w1, w2 + add x2, level_buf, x2, uxtb 2 + + ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x2, offset_hist_ll_hist] // 2296 + + bl write_deflate_icf_constprop + cmp end_in, next_in + bne .while_2nd + + mov next_in, end_in + b .end_of_stream_check_and_exit + + .p2align 2 +.while_first_match_length: + and w0, w0, 65535 + mov w3, 0 + add w1, w0, 254 // get_len_icf_code + cmp dist, 2 + bhi .compute_dist_icf_code + +.while_first_match_length_end: + ubfiz x_tmp2, x1, 2, 17 + add x_tmp1, level_buf, x24, uxtw 2 + add x_tmp2, level_buf, x_tmp2 + + add next_in, next_in, x2, uxth + mov w2, dist_inc + + ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296 + + mov x0, next_out + ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 + add w_tmp0, w_tmp0, 1 + str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176 + + bl write_deflate_icf + add next_out, next_out, 4 + b .while_first_check + +// compute_dist_icf_code + .p2align 2 +.compute_dist_icf_code: + clz w3, dist_inc + mov w0, 30 + sub w0, w0, w3 + + mov w3, 1 + lsl w3, w3, w0 + sub w3, w3, #1 + and w3, w3, dist_inc + lsl w4, w0, 1 + lsr dist_inc, dist_inc, w0 + add dist_inc, dist_inc, w4 + b .while_first_match_length_end + +.while_2nd_end: + beq .end_of_stream_check_and_exit + mov param6, end_out + b .update_state + +.end_of_stream_check_and_exit: + ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56 + cbz w_tmp0, .update_state_2nd + b .save_and_update_state_2nd + + .p2align 3 +.save_and_update_state_2nd: + mov w_tmp0, 2 + str w_tmp0, [state, offset_state_of_zstate] // 20 +.update_state_2nd: + mov param6, end_out + b .update_state + + .p2align 2 +.save_and_update_state: + mov param6, end_out + mov param5, next_out + mov w_tmp0, 2 + str w_tmp0, [state, offset_state_of_zstate] // 20 +.update_state: + mov param4, start_out + mov param1, start_in + mov param3, end_in + mov param2, next_in + mov param0, stream_saved + + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + ldp x29, x30, [sp], 96 + + b update_state + + .p2align 2 +.stream_not_available: + ldr w1, [stream, offset_end_of_stream] // 56 + cbz w1, .done + + mov w1, 2 + str w1, [stream, offset_state_state] // 84 +.done: + ret + + .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S b/src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S new file mode 100644 index 000000000..43b916f8e --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/isal_update_histogram.S @@ -0,0 +1,311 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro convert_dist_to_dist_sym dist:req,tmp0:req,tmp1:req + mov w_\tmp0, w_\dist + mov w_\dist, -1 + cmp w_\tmp0, 32768 + bhi .dist2code_done + sub w_\dist, w_\tmp0, #1 + cmp w_\tmp0, 4 + bls .dist2code_done + clz w_\tmp1, w_\dist + mov w_\tmp0, 30 + sub w_\tmp0, w_\tmp0, w_\tmp1 + lsr w_\dist, w_\dist, w_\tmp0 + add w_\dist, w_\dist, w_\tmp0, lsl 1 +.dist2code_done: +.endm + +.macro convert_length_to_len_sym length:req,length_out:req,tmp0:req + adrp x_\tmp0, .len_to_code_tab_lanchor + add x_\tmp0, x_\tmp0, :lo12:.len_to_code_tab_lanchor + ldr w_\length_out, [x_\tmp0, w_\length, uxtw 2] + add w_\length_out, w_\length_out, 256 +.endm + + .section .rodata + .align 4 +.len_to_code_tab_lanchor = . + 0 + .type len_to_code_tab, %object + .size len_to_code_tab, 1056 +len_to_code_tab: + .word 0x00, 0x00, 0x00 + .word 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 + .word 0x09, 0x09, 0x0a, 0x0a, 0x0b, 0x0b, 0x0c, 0x0c + .word 0x0d, 0x0d, 0x0d, 0x0d, 0x0e, 0x0e, 0x0e, 0x0e + .word 0x0f, 0x0f, 0x0f, 0x0f, 0x10, 0x10, 0x10, 0x10 + .word 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11 + .word 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12, 0x12 + .word 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13, 0x13 + .word 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14, 0x14 + .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 + .word 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15, 0x15 + .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 + .word 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16, 0x16 + .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 + .word 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17 + .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + .word 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a, 0x1a + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b, 0x1b + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c + .word 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1c, 0x1d + .word 0x00, 0x00, 0x00, 0x00, 0x00 + + .text + .global isal_update_histogram_aarch64 + .arch armv8-a+crc + .type isal_update_histogram_aarch64, %function + +/* +void isal_update_histogram_aarch64(uint8_t * start_stream, int length, + struct isal_huff_histogram *histogram); +*/ + + /* arguments */ + declare_generic_reg start_stream, 0,x + declare_generic_reg length, 1,x + declare_generic_reg histogram, 2,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + + /* local variable */ + declare_generic_reg start_stream_saved, 10,x + declare_generic_reg histogram_saved, 23,x + declare_generic_reg current, 19,x + declare_generic_reg last_seen, 20,x + declare_generic_reg end_stream, 21,x + declare_generic_reg loop_end_iter, 22,x + declare_generic_reg dist_histogram, 12,x + declare_generic_reg lit_len_histogram, 23,x + declare_generic_reg literal, 8,x + declare_generic_reg next_hash, 9,x + declare_generic_reg end, 4,x + declare_generic_reg dist, 7,x + declare_generic_reg D, 11,w + declare_generic_reg match_length, 3,w + + declare_generic_reg tmp0, 5,w + declare_generic_reg tmp1, 6,w + +/* constant */ +.equ LIT_LEN, 286 +.equ DIST_LEN, 30 + +.equ lit_len_offset, 0 +.equ dist_offset, (8*LIT_LEN) // 2288 +.equ hash_offset, (dist_offset + 8*DIST_LEN) // 2528 +.equ hash_table_size, (8*1024*2) // 16384 + +isal_update_histogram_aarch64: + cmp w_length, 0 + ble .done + + stp x29, x30, [sp, -64]! + add x29, sp, 0 + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + str x23, [sp, 48] + + add last_seen, histogram, hash_offset + add end_stream, start_stream, length, sxtw + mov current, start_stream + sub loop_end_iter, end_stream, #3 + mov histogram_saved, histogram + + mov x0, last_seen + mov w1, 0 + mov x2, hash_table_size + bl memset + + cmp current, loop_end_iter + bcs .loop_end + + mov start_stream_saved, current + add dist_histogram, histogram_saved, dist_offset + mov D, 32766 + b .loop + + .align 2 +.loop_2nd_stream: + and literal, literal, 0xff + mov current, next_hash + cmp loop_end_iter, current + + ldr x0, [lit_len_histogram, literal, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, literal, lsl 3] + bls .loop_end + +.loop: + ldr w_literal, [current] + add next_hash, current, 1 + + mov w0, w_literal + crc32cw w0, wzr, w0 + + ubfiz x0, x0, 1, 13 + sub x2, current, start_stream_saved + ldrh w_dist, [last_seen, x0] + strh w2, [last_seen, x0] + sub w2, w2, w_dist + and w_dist, w2, 65535 + + sub w0, w_dist, #1 + cmp w0, D + bhi .loop_2nd_stream + + sub w2, w_end_stream, w_current + mov x1, current + sub x0, current, dist, uxth + compare_max_258_bytes param0,param1,param2,match_length,tmp0,tmp1 + + cmp match_length, 3 + bls .loop_2nd_stream + + add end, current, 3 + cmp end, loop_end_iter + csel end, end, loop_end_iter, ls + cmp end, next_hash + bls .skip_inner_loop + + .align 3 +.inner_loop: + ldr w0, [next_hash] + crc32cw w0, wzr, w0 + + ubfiz x0, x0, 1, 13 + sub x1, next_hash, start_stream_saved + add next_hash, next_hash, 1 + cmp next_hash, end + strh w1, [last_seen, x0] + bne .inner_loop + +.skip_inner_loop: + convert_dist_to_dist_sym dist, tmp0, tmp1 + uxtw x2, w_dist + ldr x1, [dist_histogram, x2, lsl 3] + add x1, x1, 1 + str x1, [dist_histogram, x2, lsl 3] + + convert_length_to_len_sym match_length,tmp1,tmp0 + uxtw x0, w_tmp1 + ldr x1, [lit_len_histogram, x0, lsl 3] + add x1, x1, 1 + str x1, [lit_len_histogram, x0, lsl 3] + + sub match_length, match_length, #1 + add x3, x3, 1 + add current, current, x3 + cmp loop_end_iter, current + bhi .loop + + .align 3 +// fold the last for loop +.loop_end: + cmp end_stream, current + bls .loop_fold_end + + mov x0, current + ldrb w1, [x0], 1 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 1] + add x0, current, 2 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 2] + add x0, current, 3 + cmp end_stream, x0 + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + bls .loop_fold_end + + ldrb w1, [current, 3] + ldr x0, [lit_len_histogram, x1, lsl 3] + add x0, x0, 1 + str x0, [lit_len_histogram, x1, lsl 3] + +.loop_fold_end: + ldr x0, [lit_len_histogram, (256*8)] + add x0, x0, 1 + str x0, [lit_len_histogram, (256*8)] + + ldr x23, [sp, 48] + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x29, x30, [sp], 64 + ret + .align 2 +.done: + ret + .size isal_update_histogram_aarch64, .-isal_update_histogram_aarch64 diff --git a/src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h b/src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h new file mode 100644 index 000000000..d55ec09dc --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/lz0a_const_aarch64.h @@ -0,0 +1,72 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __LZ0A_CONST_AARCH64_H__ +#define __LZ0A_CONST_AARCH64_H__ +#include "options_aarch64.h" + +#ifdef __ASSEMBLY__ +.set K , 1024 +.set D , IGZIP_HIST_SIZE // Amount of history +.set LA , 18 * 16 // Max look-ahead, rounded up to 32 byte boundary +.set BSIZE , 2*IGZIP_HIST_SIZE + LA // Nominal buffer size + +/// Constants for stateless compression +#define LAST_BYTES_COUNT 3 // Bytes to prevent reading out of array bounds +#define LA_STATELESS 258 // No round up since no data is copied to a buffer + +.set IGZIP_LVL0_HASH_SIZE , (8 * K) +.set IGZIP_HASH8K_HASH_SIZE , (8 * K) +.set IGZIP_HASH_HIST_HASH_SIZE , IGZIP_HIST_SIZE +.set IGZIP_HASH_MAP_HASH_SIZE , IGZIP_HIST_SIZE + +#define LVL0_HASH_MASK (IGZIP_LVL0_HASH_SIZE - 1) +#define HASH8K_HASH_MASK (IGZIP_HASH8K_HASH_SIZE - 1) +#define HASH_HIST_HASH_MASK (IGZIP_HASH_HIST_HASH_SIZE - 1) +#define HASH_MAP_HASH_MASK (IGZIP_HASH_MAP_HASH_SIZE - 1) + +.set MIN_DEF_MATCH , 3 // Minimum length of a match in deflate +.set SHORTEST_MATCH , 4 + +.set SLOP , 8 + +#define ICF_CODE_BYTES 4 +#define LIT_LEN_BIT_COUNT 10 +#define DIST_LIT_BIT_COUNT 9 + +#define LIT_LEN_MASK ((1 << LIT_LEN_BIT_COUNT) - 1) +#define LIT_DIST_MASK ((1 << DIST_LIT_BIT_COUNT) - 1) + +#define DIST_OFFSET LIT_LEN_BIT_COUNT +#define EXTRA_BITS_OFFSET (DIST_OFFSET + DIST_LIT_BIT_COUNT) +#define LIT (0x1E << DIST_OFFSET) + + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/options_aarch64.h b/src/spdk/isa-l/igzip/aarch64/options_aarch64.h new file mode 100644 index 000000000..32db918f3 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/options_aarch64.h @@ -0,0 +1,71 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __OPTIONS_AARCH64_H__ +#define __OPTIONS_AARCH64_H__ + + +#ifdef __ASSEMBLY__ + +/// Options:dir +/// m - reschedule mem reads +/// e b - bitbuff style +/// t s x - compare style +/// h - limit hash updates +/// l - use longer huffman table +/// f - fix cache read + +#ifndef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (32 * 1024) +#endif + +#if (IGZIP_HIST_SIZE > (32 * 1024)) +#undef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (32 * 1024) +#endif + +#ifdef LONGER_HUFFTABLE +#if (IGZIP_HIST_SIZE > 8 * 1024) +#undef IGZIP_HIST_SIZE +#define IGZIP_HIST_SIZE (8 * 1024) +#endif +#endif + +/// (h) limit hash update +#define LIMIT_HASH_UPDATE + +/// (f) fix cache read problem +#define FIX_CACHE_READ + +#define ISAL_DEF_MAX_HDR_SIZE 328 + + + +#endif +#endif diff --git a/src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h b/src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h new file mode 100644 index 000000000..39afbc640 --- /dev/null +++ b/src/spdk/isa-l/igzip/aarch64/stdmac_aarch64.h @@ -0,0 +1,57 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef __STDMAC_AARCH64_H__ +#define __STDMAC_AARCH64_H__ + +#ifdef __ASSEMBLY__ + +#define DEBUG_STACK 144 + +.macro push_stack + stp x29, x30,[sp,0-DEBUG_STACK]! + mov x29, sp + stp x19, x20, [sp, 16] + stp x21, x22, [sp, 32] + stp x23, x24, [sp, 48] + stp x25, x26, [sp, 64] + stp x27, x28, [sp, 80] +.endm +.macro pop_stack + ldp x19, x20, [sp, 16] + ldp x21, x22, [sp, 32] + ldp x23, x24, [sp, 48] + ldp x25, x26, [sp, 64] + ldp x27, x28, [sp, 80] + + ldp x29, x30, [sp], DEBUG_STACK +.endm + +#endif +#endif |