diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/isa-l/igzip/aarch64/gen_icf_map.S | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l/igzip/aarch64/gen_icf_map.S')
-rw-r--r-- | src/isa-l/igzip/aarch64/gen_icf_map.S | 266 |
1 files changed, 266 insertions, 0 deletions
diff --git a/src/isa-l/igzip/aarch64/gen_icf_map.S b/src/isa-l/igzip/aarch64/gen_icf_map.S new file mode 100644 index 000000000..fe04ee4c3 --- /dev/null +++ b/src/isa-l/igzip/aarch64/gen_icf_map.S @@ -0,0 +1,266 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crc+crypto + .text + .align 2 + +#include "lz0a_const_aarch64.h" +#include "data_struct_aarch64.h" +#include "huffman_aarch64.h" +#include "bitbuf2_aarch64.h" +#include "stdmac_aarch64.h" + +/* +declare Macros +*/ + +.macro declare_generic_reg name:req,reg:req,default:req + \name .req \default\reg + w_\name .req w\reg + x_\name .req x\reg +.endm + +.macro tzbytecnt param0:req,param1:req + rbit x_\param1, x_\param0 + cmp x_\param0, 0 + clz x_\param1, x_\param1 + mov w_\param0, 8 + lsr w_\param1, w_\param1, 3 + csel w_\param0, w_\param1, w_\param0, ne +.endm + +.macro write_deflate_icf param0:req,param1:req,param2:req,param3:req + orr w_\param1, w_\param1, w_\param3, lsl 19 + orr w_\param1, w_\param1, w_\param2, lsl 10 + str w_\param1, [x_\param0] +.endm + + .align 2 + .global gen_icf_map_h1_aarch64 + .type gen_icf_map_h1_aarch64, %function + + /* arguments */ + declare_generic_reg stream_param, 0,x + declare_generic_reg matches_icf_lookup_param, 1,x + declare_generic_reg input_size_param, 2,x + + declare_generic_reg param0, 0,x + declare_generic_reg param1, 1,x + declare_generic_reg param2, 2,x + declare_generic_reg param3, 3,x + + /* return */ + declare_generic_reg ret_val, 0,x + + /* variables */ + declare_generic_reg input_size, 3,x + declare_generic_reg next_in, 4,x + declare_generic_reg matches_icf_lookup, 6,x + declare_generic_reg hash_table, 7,x + declare_generic_reg end_in, 8,x + declare_generic_reg file_start, 9,x + declare_generic_reg hash_mask, 10,w + declare_generic_reg hist_size, 11,w + declare_generic_reg stream_saved, 12,x + declare_generic_reg literal_32, 13,w + declare_generic_reg literal_1, 14,w + declare_generic_reg dist, 15,w + + declare_generic_reg tmp_has_hist, 0,w + declare_generic_reg tmp_offset_hash_table, 1,x + declare_generic_reg tmp0, 0,x + declare_generic_reg tmp1, 1,x + declare_generic_reg tmp2, 2,x + declare_generic_reg tmp3, 3,x + declare_generic_reg tmp5, 5,x + +/* constant */ +.equ ISAL_LOOK_AHEAD, 288 +.equ SHORTEST_MATCH, 4 +.equ LEN_OFFSET, 254 + +/* mask */ +.equ mask_10bit, 1023 +.equ mask_lit_dist, 0x7800 + +/* offset of struct isal_zstream */ +.equ offset_next_in, 0 +.equ offset_avail_in, 8 +.equ offset_total_in, 12 +.equ offset_next_out, 16 +.equ offset_avail_out, 24 +.equ offset_total_out, 28 +.equ offset_hufftables, 32 +.equ offset_level, 40 +.equ offset_level_buf_size, 44 +.equ offset_level_buf, 48 +.equ offset_end_of_stream, 56 +.equ offset_flush, 58 +.equ offset_gzip_flag, 60 +.equ offset_hist_bits, 62 +.equ offset_state, 64 +.equ offset_state_block_end, 72 +.equ offset_state_dist_mask, 76 +.equ offset_state_has_hist, 135 + +/* offset of struct level_buf */ +.equ offset_hash_map_hash_table, 4712 + +/* +uint64_t gen_icf_map_h1_base(struct isal_zstream *stream, + struct deflate_icf *matches_icf_lookup, uint64_t input_size) +*/ + +gen_icf_map_h1_aarch64: + cmp input_size_param, (ISAL_LOOK_AHEAD-1) // 287 + bls .fast_exit + stp x29, x30, [sp, -16]! + + mov stream_saved, stream_param + mov matches_icf_lookup, matches_icf_lookup_param + mov x29, sp + + ldrb tmp_has_hist, [stream_saved, offset_state_has_hist] + mov tmp_offset_hash_table, offset_hash_map_hash_table + ldr end_in, [stream_saved, offset_next_in] + mov input_size, input_size_param + ldr hash_table, [stream_saved, offset_level_buf] + ldr w_file_start, [stream_saved, offset_total_in] + ldp hist_size, hash_mask, [stream_saved, offset_state_dist_mask] + add hash_table, hash_table, tmp_offset_hash_table + sub file_start, end_in, file_start + cbz tmp_has_hist, .igzip_no_hist + b .while_check1 + + .align 3 +.igzip_no_hist: + ldrb w_tmp1, [end_in] + add next_in, end_in, 1 + ldrh w_tmp0, [matches_icf_lookup] + bfi w_tmp0, w_tmp1, 0, 10 + strh w_tmp0, [matches_icf_lookup] + ldr w_tmp0, [matches_icf_lookup] + and w_tmp0, w_tmp0, mask_10bit + orr w_tmp0, w_tmp0, mask_lit_dist + str w_tmp0, [matches_icf_lookup], 4 + ldr w_tmp0, [end_in] + crc32cw w_tmp0, wzr, w_tmp0 + + and w_tmp5, w_tmp0, hash_mask + sub x_tmp1, end_in, file_start + mov w_tmp2, 1 + mov x_tmp0, 1 + strh w_tmp1, [hash_table, x_tmp5, lsl 1] + strb w_tmp2, [stream_saved, offset_state_has_hist] + b .while_check2 + +.while_check1: + mov next_in, end_in + mov x_tmp0, 0 + +.while_check2: + sub input_size, input_size, #288 + add end_in, end_in, input_size + cmp next_in, end_in + bcs .exit + mov literal_32, 32 + mov literal_1, 1 + b .while_loop + + .align 3 +.new_match_found: + clz w_tmp5, w_tmp2 + add w_tmp1, w_tmp0, LEN_OFFSET + sub w_tmp5, literal_32, w_tmp5 + cmp dist, 2 + sub w_tmp5, w_tmp5, #2 + bls .skip_compute_dist_icf_code + + lsl w_tmp3, literal_1, w_tmp5 + sub w_tmp3, w_tmp3, #1 + lsr w_tmp0, w_tmp2, w_tmp5 + and w_tmp3, w_tmp3, w_tmp2 + add w_tmp2, w_tmp0, w_tmp5, lsl 1 + +.skip_compute_dist_icf_code: + mov param0, matches_icf_lookup + write_deflate_icf param0,param1,param2,param3 + + add next_in, next_in, 1 + add matches_icf_lookup, matches_icf_lookup, 4 + cmp next_in, end_in + beq .save_with_exit + +.while_loop: + ldr w_tmp0, [next_in] + crc32cw w_tmp0, wzr, w_tmp0 + + and w_tmp0, w_tmp0, hash_mask + sub x_tmp1, next_in, file_start + lsl x_tmp0, x_tmp0, 1 + sub w_tmp2, w_tmp1, #1 + ldrh w_tmp3, [hash_table, x_tmp0] + strh w_tmp1, [hash_table, x_tmp0] + sub w_tmp2, w_tmp2, w_tmp3 + and w_tmp2, w_tmp2, hist_size + add dist, w_tmp2, 1 + ldr x_tmp0, [next_in] + sub x_tmp1, next_in, w_dist, uxtw + ldr x_tmp1, [x_tmp1] + eor x_tmp0, x_tmp1, x_tmp0 + tzbytecnt param0,param1 + + cmp w_tmp0, (SHORTEST_MATCH-1) + mov w_tmp3, 0 + bhi .new_match_found + + ldrb w_param1, [next_in] + mov x_param0, matches_icf_lookup + mov w_param3, 0 + mov w_param2, 0x1e + write_deflate_icf param0,param1,param2,param3 + + add next_in, next_in, 1 + add matches_icf_lookup, matches_icf_lookup, 4 + cmp next_in, end_in + bne .while_loop + +.save_with_exit: + ldr ret_val, [stream_saved, offset_next_in] + sub ret_val, next_in, ret_val + +.exit: + ldp x29, x30, [sp], 16 + ret + + .align 3 +.fast_exit: + mov ret_val, 0 + ret + .size gen_icf_map_h1_aarch64, .-gen_icf_map_h1_aarch64 |