summaryrefslogtreecommitdiffstats
path: root/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S
diff options
context:
space:
mode:
Diffstat (limited to 'src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S')
-rw-r--r--src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S176
1 files changed, 176 insertions, 0 deletions
diff --git a/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S b/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S
new file mode 100644
index 000000000..98cf12928
--- /dev/null
+++ b/src/isa-l/crc/aarch64/crc32_gzip_refl_hw_fold.S
@@ -0,0 +1,176 @@
+########################################################################
+# Copyright(c) 2019 Arm Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Arm Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#########################################################################
+
+ .arch armv8-a+crc+crypto
+ .text
+ .align 3
+ .global crc32_gzip_refl_hw_fold
+ .type crc32_gzip_refl_hw_fold, %function
+
+/* uint32_t crc32_gzip_refl_hw_fold(uint32_t seed, const unsigned char *buf, uint64_t len) */
+
+w_seed .req w0
+w_crc .req w0
+x_buf .req x1
+x_len .req x2
+
+x_buf_loop_end .req x10
+x_buf_iter .req x10
+
+x_tmp .req x15
+w_tmp .req w15
+
+d_c0 .req d3
+d_c1 .req d1
+v_c0 .req v3
+v_c1 .req v1
+crc32_gzip_refl_hw_fold:
+ mvn w_seed, w_seed
+ cmp x_len, 1023
+ mov x_buf_iter, x_buf
+ bls .loop_fold_end
+
+ sub x_buf_loop_end, x_len, #1024
+ and x_buf_loop_end, x_buf_loop_end, -1024
+ add x_buf_loop_end, x_buf_loop_end, 1024
+ add x_buf_loop_end, x_buf, x_buf_loop_end
+
+ mov x_tmp, 0x819b
+ movk x_tmp, 0xb486, lsl 16
+ fmov d_c0, x_tmp
+
+ mov x_tmp, 0x8617
+ movk x_tmp, 0x7627, lsl 16
+ fmov d_c1, x_tmp
+
+x_in64 .req x3
+w_crc0 .req w0
+w_crc1 .req w4
+w_crc2 .req w5
+
+d_crc0 .req d4
+d_crc1 .req d5
+v_crc0 .req v4
+v_crc1 .req v5
+ .align 3
+.loop_fold:
+ add x9, x_buf, 336
+ mov x_in64, x_buf
+ mov w_crc1, 0
+ mov w_crc2, 0
+
+ .align 3
+.loop_for:
+ ldr x8, [x_in64]
+ ldr x7, [x_in64, 336]
+ ldr x6, [x_in64, 672]
+
+ add x_in64, x_in64, 8
+ cmp x_in64, x9
+
+ crc32x w_crc0, w_crc0, x8
+ crc32x w_crc1, w_crc1, x7
+ crc32x w_crc2, w_crc2, x6
+ bne .loop_for
+
+ uxtw x_tmp, w_crc0
+ fmov d_crc0, x_tmp
+ pmull v_crc0.1q, v_crc0.1d, v_c0.1d
+
+ uxtw x_tmp, w_crc1
+ fmov d_crc1, x_tmp
+ pmull v_crc1.1q, v_crc1.1d, v_c1.1d
+
+ ldr x_tmp, [x_buf, 1008]
+ crc32x w_crc2, w_crc2, x_tmp
+
+ fmov x_tmp, d_crc0
+ crc32x w_crc0, wzr, x_tmp
+
+ fmov x_tmp, d_crc1
+ crc32x w_crc1, wzr, x_tmp
+
+ eor w_crc0, w_crc0, w_crc1
+ eor w_crc0, w_crc0, w_crc2
+
+ ldr x_tmp, [x_buf, 1016]
+ crc32x w_crc0, w_crc0, x_tmp
+
+ add x_buf, x_buf, 1024
+ cmp x_buf_loop_end, x_buf
+ bne .loop_fold
+
+ and x_len, x_len, 1023
+
+x_buf_loop_size8_end .req x3
+.loop_fold_end:
+ cmp x_len, 7
+ bls .size_4
+
+ sub x_buf_loop_size8_end, x_len, #8
+ and x_buf_loop_size8_end, x_buf_loop_size8_end, -8
+ add x_buf_loop_size8_end, x_buf_loop_size8_end, 8
+ add x_buf_loop_size8_end, x_buf_iter, x_buf_loop_size8_end
+
+ .align 3
+.loop_size_8:
+ ldr x_tmp, [x_buf_iter], 8
+ crc32x w_crc, w_crc, x_tmp
+
+ cmp x_buf_iter, x_buf_loop_size8_end
+ bne .loop_size_8
+
+ and x_len, x_len, 7
+.size_4:
+ cmp x_len, 3
+ bls .size_2
+
+ ldr w_tmp, [x_buf_iter], 4
+ crc32w w_crc, w_crc, w_tmp
+
+ sub x_len, x_len, #4
+.size_2:
+ cmp x_len, 1
+ bls .size_1
+
+ ldrh w_tmp, [x_buf_iter], 2
+ crc32h w_crc, w_crc, w_tmp
+
+ sub x_len, x_len, #2
+.size_1:
+ cbz x_len, .done
+
+ ldrb w_tmp, [x_buf_iter]
+ crc32b w_crc, w_crc, w_tmp
+
+.done:
+ mvn w_crc, w_crc
+ ret
+
+ .size crc32_gzip_refl_hw_fold, .-crc32_gzip_refl_hw_fold