diff options
Diffstat (limited to 'src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S')
-rw-r--r-- | src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S | 176 |
1 files changed, 176 insertions, 0 deletions
diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S new file mode 100644 index 000000000..98cf12928 --- /dev/null +++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S @@ -0,0 +1,176 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + + .arch armv8-a+crc+crypto + .text + .align 3 + .global crc32_gzip_refl_hw_fold + .type crc32_gzip_refl_hw_fold, %function + +/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */ + +w_seed .req w0 +w_crc .req w0 +x_buf .req x1 +x_len .req x2 + +x_buf_loop_end .req x10 +x_buf_iter .req x10 + +x_tmp .req x15 +w_tmp .req w15 + +d_c0 .req d3 +d_c1 .req d1 +v_c0 .req v3 +v_c1 .req v1 +crc32_gzip_refl_hw_fold: + mvn w_seed, w_seed + cmp x_len, 1023 + mov x_buf_iter, x_buf + bls .loop_fold_end + + sub x_buf_loop_end, x_len, #1024 + and x_buf_loop_end, x_buf_loop_end, -1024 + add x_buf_loop_end, x_buf_loop_end, 1024 + add x_buf_loop_end, x_buf, x_buf_loop_end + + mov x_tmp, 0x819b + movk x_tmp, 0xb486, lsl 16 + fmov d_c0, x_tmp + + mov x_tmp, 0x8617 + movk x_tmp, 0x7627, lsl 16 + fmov d_c1, x_tmp + +x_in64 .req x3 +w_crc0 .req w0 +w_crc1 .req w4 +w_crc2 .req w5 + +d_crc0 .req d4 +d_crc1 .req d5 +v_crc0 .req v4 +v_crc1 .req v5 + .align 3 +.loop_fold: + add x9, x_buf, 336 + mov x_in64, x_buf + mov w_crc1, 0 + mov w_crc2, 0 + + .align 3 +.loop_for: + ldr x8, [x_in64] + ldr x7, [x_in64, 336] + ldr x6, [x_in64, 672] + + add x_in64, x_in64, 8 + cmp x_in64, x9 + + crc32x w_crc0, w_crc0, x8 + crc32x w_crc1, w_crc1, x7 + crc32x w_crc2, w_crc2, x6 + bne .loop_for + + uxtw x_tmp, w_crc0 + fmov d_crc0, x_tmp + pmull v_crc0.1q, v_crc0.1d, v_c0.1d + + uxtw x_tmp, w_crc1 + fmov d_crc1, x_tmp + pmull v_crc1.1q, v_crc1.1d, v_c1.1d + + ldr x_tmp, [x_buf, 1008] + crc32x w_crc2, w_crc2, x_tmp + + fmov x_tmp, d_crc0 + crc32x w_crc0, wzr, x_tmp + + fmov x_tmp, d_crc1 + crc32x w_crc1, wzr, x_tmp + + eor w_crc0, w_crc0, w_crc1 + eor w_crc0, w_crc0, w_crc2 + + ldr x_tmp, [x_buf, 1016] + crc32x w_crc0, w_crc0, x_tmp + + add x_buf, x_buf, 1024 + cmp x_buf_loop_end, x_buf + bne .loop_fold + + and x_len, x_len, 1023 + +x_buf_loop_size8_end .req x3 +.loop_fold_end: + cmp x_len, 7 + bls .size_4 + + sub x_buf_loop_size8_end, x_len, #8 + and x_buf_loop_size8_end, x_buf_loop_size8_end, -8 + add x_buf_loop_size8_end, x_buf_loop_size8_end, 8 + add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end + + .align 3 +.loop_size_8: + ldr x_tmp, [x_buf_iter], 8 + crc32x w_crc, w_crc, x_tmp + + cmp x_buf_iter, x_buf_loop_size8_end + bne .loop_size_8 + + and x_len, x_len, 7 +.size_4: + cmp x_len, 3 + bls .size_2 + + ldr w_tmp, [x_buf_iter], 4 + crc32w w_crc, w_crc, w_tmp + + sub x_len, x_len, #4 +.size_2: + cmp x_len, 1 + bls .size_1 + + ldrh w_tmp, [x_buf_iter], 2 + crc32h w_crc, w_crc, w_tmp + + sub x_len, x_len, #2 +.size_1: + cbz x_len, .done + + ldrb w_tmp, [x_buf_iter] + crc32b w_crc, w_crc, w_tmp + +.done: + mvn w_crc, w_crc + ret + + .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold |