summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S')
-rw-r--r--src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S397
1 files changed, 397 insertions, 0 deletions
diff --git a/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
new file mode 100644
index 000000000..bb2baa22f
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/isal_deflate_icf_finish_hash_hist.S
@@ -0,0 +1,397 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crc
+ .text
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+/*
+void isal_deflate_icf_finish_hash_hist_aarch64(struct isal_zstream *stream);
+*/
+
+/* constant */
+
+/* offset of struct isal_zstream */
+.equ offset_next_in, 0
+.equ offset_avail_in, 8
+.equ offset_total_in, 12
+.equ offset_next_out, 16
+.equ offset_avail_out, 24
+.equ offset_total_out, 28
+.equ offset_hufftables, 32
+.equ offset_level, 40
+.equ offset_level_buf_size, 44
+.equ offset_level_buf, 48
+.equ offset_end_of_stream, 56
+.equ offset_flush, 58
+.equ offset_gzip_flag, 60
+.equ offset_hist_bits, 62
+.equ offset_state, 64
+.equ offset_state_block_end, 72
+.equ offset_state_state, 84
+.equ offset_state_has_hist, 135
+
+/* offset of struct level_buf */
+.equ offset_encode_tables, 0
+.equ offset_hist, 2176
+.equ offset_hist_d_hist, 2176
+.equ offset_hist_ll_hist, 2296
+.equ offset_deflate_hdr_count, 4348
+.equ offset_deflate_hdr_extra_bits, 4352
+.equ offset_deflate_hdr, 4356
+.equ offset_icf_buf_next, 4688
+.equ offset_icf_buf_avail_out, 4696
+.equ offset_icf_buf_start, 4704
+.equ offset_hash8k, 4712
+.equ offset_hash_hist, 4712
+
+/* offset of struct isal_zstate */
+.equ offset_dist_mask, 12
+.equ offset_hash_mask, 16
+.equ offset_state_of_zstate, 20
+
+/* macros*/
+.equ ISAL_LOOK_AHEAD, 288
+
+ /* arguments */
+ declare_generic_reg stream, 0,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+ declare_generic_reg param3, 3,x
+ declare_generic_reg param4, 4,x
+ declare_generic_reg param5, 5,x
+ declare_generic_reg param6, 6,x
+
+ /* local variable */
+ declare_generic_reg stream_saved, 15,x
+ declare_generic_reg level_buf, 13,x
+ declare_generic_reg start_in, 21,x
+ declare_generic_reg start_out, 22,x
+ declare_generic_reg state, 23,x
+ declare_generic_reg end_out, 12,x
+ declare_generic_reg end_in, 11,x
+ declare_generic_reg next_in, 8,x
+ declare_generic_reg next_out, 10,x
+ declare_generic_reg next_out_iter, 5,x
+ declare_generic_reg file_start, 18,x
+ declare_generic_reg last_seen, 14,x
+
+ declare_generic_reg literal_code, 9,w
+ declare_generic_reg hash_mask, 19,w
+ declare_generic_reg hist_size, 20,w
+ declare_generic_reg dist, 7,w
+ declare_generic_reg dist_inc, 24,w
+
+ declare_generic_reg tmp0, 25,x
+ declare_generic_reg tmp1, 26,x
+ declare_generic_reg tmp2, 27,x
+ declare_generic_reg tmp3, 28,x
+
+ .align 2
+ .type write_deflate_icf_constprop, %function
+write_deflate_icf_constprop:
+ ldrh w2, [x0]
+ mov w3, 30
+ bfi w2, w1, 0, 10
+ strh w2, [x0]
+ ldr w1, [x0]
+ bfi w1, w3, 10, 9
+ str w1, [x0]
+ ubfx x1, x1, 16, 3
+ strh w1, [x0, 2]
+ ret
+ .size write_deflate_icf_constprop, .-write_deflate_icf_constprop
+
+ .align 2
+ .type write_deflate_icf, %function
+write_deflate_icf:
+ ldrh w4, [x0]
+ bfi w4, w1, 0, 10
+ strh w4, [x0]
+ ldr w1, [x0]
+ bfi w1, w2, 10, 9
+ str w1, [x0]
+ lsr w1, w1, 16
+ bfi w1, w3, 3, 13
+ strh w1, [x0, 2]
+ ret
+ .size write_deflate_icf, .-write_deflate_icf
+
+ .align 2
+ .type update_state, %function
+update_state:
+ sub x7, x2, x1
+ ldr x4, [x0, 48]
+ cmp x7, 0
+ ble .L48
+ mov w1, 1
+ strb w1, [x0, 135]
+.L48:
+ ldr w1, [x0, 12]
+ sub x6, x6, x5
+ str x2, [x0]
+ sub x3, x3, x2
+ add w1, w1, w7
+ stp w3, w1, [x0, 8]
+ str w1, [x0, 72]
+ asr x6, x6, 2
+ str x5, [x4, 4688]
+ str x6, [x4, 4696]
+ ret
+ .size update_state, .-update_state
+
+ .align 2
+ .global isal_deflate_icf_finish_hash_hist_aarch64
+ .type isal_deflate_icf_finish_hash_hist_aarch64, %function
+isal_deflate_icf_finish_hash_hist_aarch64:
+ ldr w_end_in, [stream, 8] // stream->avail_in
+ cbz w_end_in, .stream_not_available
+
+ stp x29, x30, [sp, -96]!
+ add x29, sp, 0
+ stp x19, x20, [sp, 16]
+ stp x21, x22, [sp, 32]
+ stp x23, x24, [sp, 48]
+ stp x25, x26, [sp, 64]
+ stp x27, x28, [sp, 80]
+
+ mov stream_saved, stream
+ ldr level_buf, [stream, offset_level_buf] // 48
+ ldr start_in, [stream, offset_next_in] // 0
+ ldr start_out, [level_buf, offset_icf_buf_next] // 4688
+ add state, stream, offset_state // 64
+ ldr end_out, [level_buf, offset_icf_buf_avail_out] // 4696
+ mov next_in, start_in
+ ldr w_file_start, [stream, offset_total_in] // 12
+ mov tmp0, offset_hash_hist // 4712
+ add last_seen, level_buf, tmp0
+ add end_in, start_in, w_end_in, uxtw
+ and end_out, end_out, -4
+ mov next_out, start_out
+ ldp hist_size, hash_mask, [state, offset_dist_mask] // 12
+ sub file_start, start_in, file_start
+ add end_out, start_out, end_out
+ mov next_out_iter, next_out
+
+ add x0, next_in, 3
+ cmp end_in, x0 // x0 <= next_in + 3
+ bls .while_first_end
+
+ .p2align 3
+.while_first:
+ cmp next_out, end_out
+ bcs .save_and_update_state
+ ldr literal_code, [next_in]
+ mov w0, literal_code
+ crc32cw w0, wzr, w0
+ and w0, w0, hash_mask
+ sub x2, next_in, file_start
+ lsl x0, x0, 1
+ ldrh dist, [last_seen, x0]
+ strh w2, [last_seen, x0]
+ sub w2, w2, dist
+ and w_dist, w2, 65535
+ sub dist_inc, dist, #1
+ cmp dist_inc, hist_size
+ bcs .skip_compare258
+
+ mov x2, 0
+ sub w2, w_end_in, w8
+ mov x1, next_in
+ sub x0, next_in, w_dist, uxth
+
+ compare_max_258_bytes param0,param1,param2,tmp2,tmp0,tmp1
+ mov w0, w_tmp2
+ and w2, w0, 65535
+
+ cmp w2, 3
+ bhi .while_first_match_length
+
+.skip_compare258:
+ and literal_code, literal_code, 255 // get_lit_icf_code
+ add next_in, next_in, 1
+ mov w1, literal_code
+ mov x0, next_out
+ add x_literal_code, level_buf, literal_code, uxtb 2 // level_buf->hist.ll_hist
+
+ ldr w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_literal_code, offset_hist_ll_hist] // 2296
+
+ bl write_deflate_icf_constprop // write_deflate_icf
+
+ add next_out, next_out, 4
+.while_first_check:
+ add x0, next_in, 3
+ mov next_out_iter, next_out
+ cmp end_in, x0
+ bhi .while_first
+
+.while_first_end:
+ cmp next_in, end_in
+ bcs .while_2nd_end
+
+ cmp next_out, end_out
+ bcc .while_2nd_handle
+ b .save_and_update_state_2nd
+
+ .p2align 2
+.while_2nd:
+ cmp end_out, next_out_iter
+ bls .save_and_update_state_2nd
+
+.while_2nd_handle:
+ ldrb w2, [next_in], 1
+ mov x0, next_out_iter
+ add next_out_iter, next_out_iter, 4
+ mov w1, w2
+ add x2, level_buf, w2, uxtb 2
+
+ ldr w_tmp0, [x2, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x2, offset_hist_ll_hist] // 2296
+
+ bl write_deflate_icf_constprop
+ cmp end_in, next_in
+ bne .while_2nd
+
+ mov next_in, end_in
+ b .end_of_stream_check_and_exit
+
+ .p2align 2
+.while_first_match_length:
+ and w0, w0, 65535
+ mov w3, 0
+ add w1, w0, 254 // get_len_icf_code
+ cmp dist, 2
+ bhi .compute_dist_icf_code
+
+.while_first_match_length_end:
+ ubfiz x_tmp2, x1, 2, 17
+ add x_tmp1, level_buf, dist_inc, uxtw 2
+ add x_tmp2, level_buf, x_tmp2
+
+ add next_in, next_in, w2, uxth
+ mov w2, dist_inc
+
+ ldr w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_tmp2, offset_hist_ll_hist] // 2296
+
+ mov x0, next_out
+ ldr w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
+ add w_tmp0, w_tmp0, 1
+ str w_tmp0, [x_tmp1, offset_hist_d_hist] // 2176
+
+ bl write_deflate_icf
+ add next_out, next_out, 4
+ b .while_first_check
+
+// compute_dist_icf_code
+ .p2align 2
+.compute_dist_icf_code:
+ clz w3, dist_inc
+ mov w0, 30
+ sub w0, w0, w3
+
+ mov w3, 1
+ lsl w3, w3, w0
+ sub w3, w3, #1
+ and w3, w3, dist_inc
+ lsl w4, w0, 1
+ lsr dist_inc, dist_inc, w0
+ add dist_inc, dist_inc, w4
+ b .while_first_match_length_end
+
+.while_2nd_end:
+ beq .end_of_stream_check_and_exit
+ mov param6, end_out
+ b .update_state
+
+.end_of_stream_check_and_exit:
+ ldr w_tmp0, [stream_saved, offset_end_of_stream] // 56
+ cbz w_tmp0, .update_state_2nd
+ b .save_and_update_state_2nd
+
+ .p2align 3
+.save_and_update_state_2nd:
+ mov w_tmp0, 2
+ str w_tmp0, [state, offset_state_of_zstate] // 20
+.update_state_2nd:
+ mov param6, end_out
+ b .update_state
+
+ .p2align 2
+.save_and_update_state:
+ mov param6, end_out
+ mov param5, next_out
+ mov w_tmp0, 2
+ str w_tmp0, [state, offset_state_of_zstate] // 20
+.update_state:
+ mov param4, start_out
+ mov param1, start_in
+ mov param3, end_in
+ mov param2, next_in
+ mov param0, stream_saved
+
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldp x23, x24, [sp, 48]
+ ldp x25, x26, [sp, 64]
+ ldp x27, x28, [sp, 80]
+ ldp x29, x30, [sp], 96
+
+ b update_state
+
+ .p2align 2
+.stream_not_available:
+ ldr w1, [stream, offset_end_of_stream] // 56
+ cbz w1, .done
+
+ mov w1, 2
+ str w1, [stream, offset_state_state] // 84
+.done:
+ ret
+
+ .size isal_deflate_icf_finish_hash_hist_aarch64, .-isal_deflate_icf_finish_hash_hist_aarch64