summaryrefslogtreecommitdiffstats
path: root/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
parentInitial commit. (diff)
downloadceph-upstream/16.2.11+ds.tar.xz
ceph-upstream/16.2.11+ds.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S')
-rw-r--r--src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S194
1 files changed, 194 insertions, 0 deletions
diff --git a/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
new file mode 100644
index 000000000..13f9b087d
--- /dev/null
+++ b/src/isa-l/igzip/aarch64/igzip_set_long_icf_fg.S
@@ -0,0 +1,194 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a
+ .text
+ .align 2
+
+#include "lz0a_const_aarch64.h"
+#include "data_struct_aarch64.h"
+#include "huffman_aarch64.h"
+#include "bitbuf2_aarch64.h"
+#include "stdmac_aarch64.h"
+
+/*
+declare Macros
+*/
+
+.macro declare_generic_reg name:req,reg:req,default:req
+ \name .req \default\reg
+ w_\name .req w\reg
+ x_\name .req x\reg
+.endm
+
+ .text
+ .align 2
+ .global set_long_icf_fg_aarch64
+ .type set_long_icf_fg_aarch64, %function
+
+/*
+void set_long_icf_fg_aarch64(uint8_t * next_in, uint64_t processed, uint64_t input_size,
+ struct deflate_icf *match_lookup)
+*/
+
+ /* arguments */
+ declare_generic_reg next_in_param, 0,x
+ declare_generic_reg processed_param, 1,x
+ declare_generic_reg input_size_param, 2,x
+ declare_generic_reg match_lookup_param, 3,x
+
+ declare_generic_reg param0, 0,x
+ declare_generic_reg param1, 1,x
+ declare_generic_reg param2, 2,x
+
+ /* local variable */
+ declare_generic_reg len, 7,w
+ declare_generic_reg dist_code, 8,w
+ declare_generic_reg shortest_match_len 9,w
+ declare_generic_reg len_max, 10,w
+ declare_generic_reg dist_extra, 11,w
+ declare_generic_reg const_8, 13,x
+ declare_generic_reg next_in, 20,x
+ declare_generic_reg dist_start, 21,x
+ declare_generic_reg end_processed, 22,x
+ declare_generic_reg end_in, 23,x
+ declare_generic_reg match_lookup, 19,x
+
+ declare_generic_reg match_length, 4,w
+ declare_generic_reg tmp0, 5,w
+ declare_generic_reg tmp1, 6,w
+
+/* constant */
+.equ DIST_START_SIZE, 128
+.equ ISAL_LOOK_AHEAD, 288
+.equ LEN_OFFSET, 254
+.equ SHORTEST_MATCH, 4
+.equ LEN_MAX_CONST, 512
+
+set_long_icf_fg_aarch64:
+ stp x29, x30, [sp, -192]!
+ add x29, sp, 0
+ stp x21, x22, [sp, 32]
+ add x21, x29, 64
+ stp x19, x20, [sp, 16]
+ str x23, [sp, 48]
+
+ add end_processed, next_in_param, processed_param
+ mov next_in, next_in_param
+ add end_in, next_in_param, input_size_param
+ mov match_lookup, match_lookup_param
+
+ adrp x1, .data_dist_start
+ mov x2, DIST_START_SIZE // 128
+ add x1, x1, :lo12:.data_dist_start
+ mov x0, dist_start
+ bl memcpy
+
+ add x_tmp0, end_processed, ISAL_LOOK_AHEAD // 288
+ cmp end_in, x_tmp0
+ csel end_in, end_in, x_tmp0, cc
+ cmp next_in, end_processed
+ bcs .done
+
+ mov const_8, 8
+ mov len_max, LEN_MAX_CONST // 512
+ mov shortest_match_len, (LEN_OFFSET + SHORTEST_MATCH - 1)
+ b .while_outer_loop
+
+ .align 2
+.while_outer_check:
+ add next_in, next_in, 1
+ add match_lookup, match_lookup, 4
+ cmp end_processed, next_in
+ bls .done
+
+.while_outer_loop:
+ ldrh len, [match_lookup]
+ and len, len, LIT_LEN_MASK // 1023
+ cmp len, (LEN_OFFSET + 8 - 1) // 261
+ bls .while_outer_check
+
+ ldr dist_code, [match_lookup]
+ add x1, next_in, 8
+ ldrh dist_extra, [match_lookup, 2]
+ sub w2, w_end_in, w1
+ ubfx x_dist_code, x_dist_code, 10, 9
+ ubfx x_dist_extra, x_dist_extra, 3, 13
+ uxtw x0, dist_code
+ ldr w0, [dist_start, x0, lsl 2]
+ add w0, dist_extra, w0
+ sub x0, const_8, x0
+ add x0, next_in, x0
+
+ compare_aarch64 param0,param1,param2,match_length,tmp0,tmp1
+ mov w0, w_match_length
+
+ add w0, w0, (LEN_OFFSET + 8) // 262
+ cmp w0, len
+ bls .while_outer_check
+
+ lsl w2, dist_extra, 19
+ orr w2, w2, dist_code, lsl 10
+
+ .align 3
+.while_inner_loop:
+ cmp w0, LEN_MAX_CONST // 512
+ add next_in, next_in, 1
+ csel w1, w0, len_max, ls
+ sub w0, w0, #1
+ orr w1, w1, w2
+ str w1, [match_lookup]
+ ldrh w1, [match_lookup, 4]!
+
+ and w1, w1, LIT_LEN_MASK // 1023
+ cmp w1, (LEN_OFFSET + SHORTEST_MATCH - 1) // 257
+ csel w1, w1, shortest_match_len, cs
+ cmp w1, w0
+ bcc .while_inner_loop
+
+ add next_in, next_in, 1
+ add match_lookup, match_lookup, 4
+ cmp end_processed, next_in
+ bhi .while_outer_loop
+
+.done:
+ ldp x19, x20, [sp, 16]
+ ldp x21, x22, [sp, 32]
+ ldr x23, [sp, 48]
+ ldp x29, x30, [sp], 192
+ ret
+ .size set_long_icf_fg_aarch64, .-set_long_icf_fg_aarch64
+
+ .section .rodata
+ .align 3
+ .set .data_dist_start,. + 0
+.real_data_dist_start:
+ .word 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0007, 0x0009, 0x000d
+ .word 0x0011, 0x0019, 0x0021, 0x0031, 0x0041, 0x0061, 0x0081, 0x00c1
+ .word 0x0101, 0x0181, 0x0201, 0x0301, 0x0401, 0x0601, 0x0801, 0x0c01
+ .word 0x1001, 0x1801, 0x2001, 0x3001, 0x4001, 0x6001, 0x0000, 0x0000