diff options
Diffstat (limited to 'src/isa-l/crc/aarch64/crc32_refl_common_pmull.h')
-rw-r--r-- | src/isa-l/crc/aarch64/crc32_refl_common_pmull.h | 126 |
1 files changed, 126 insertions, 0 deletions
diff --git a/src/isa-l/crc/aarch64/crc32_refl_common_pmull.h b/src/isa-l/crc/aarch64/crc32_refl_common_pmull.h new file mode 100644 index 000000000..6418f1240 --- /dev/null +++ b/src/isa-l/crc/aarch64/crc32_refl_common_pmull.h @@ -0,0 +1,126 @@ +######################################################################## +# Copyright(c) 2019 Arm Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Arm Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################### + +#include "crc_common_pmull.h" + +.macro crc32_refl_func name:req + .arch armv8-a+crypto + .text + .align 3 + .global \name + .type \name, %function + +/* uint32_t crc32_refl_func(uint32_t seed, uint8_t * buf, uint64_t len) */ + +\name\(): + mvn w_seed, w_seed + mov x_counter, 0 + cmp x_len, (FOLD_SIZE - 1) + bhi .crc32_clmul_pre + +.crc_tab_pre: + cmp x_len, x_counter + bls .done + + adrp x_tmp, .lanchor_crc_tab + add x_buf_iter, x_buf, x_counter + add x_buf, x_buf, x_len + add x_crc_tab_addr, x_tmp, :lo12:.lanchor_crc_tab + + .align 3 +.loop_crc_tab: + ldrb w_tmp, [x_buf_iter], 1 + cmp x_buf, x_buf_iter + eor w_tmp, w_tmp, w_seed + and w_tmp, w_tmp, 255 + ldr w_tmp, [x_crc_tab_addr, w_tmp, uxtw 2] + eor w_seed, w_tmp, w_seed, lsr 8 + bhi .loop_crc_tab + +.done: + mvn w_crc_ret, w_seed + ret + + .align 2 +.crc32_clmul_pre: + fmov s_x0, w_seed // save crc to s_x0 + + crc_refl_load_first_block + + bls .clmul_loop_end + + crc32_load_p4 + +// 1024bit --> 512bit loop +// merge x0, x1, x2, x3, y0, y1, y2, y3 => x0, x1, x2, x3 (uint64x2_t) + crc_refl_loop + +.clmul_loop_end: +// folding 512bit --> 128bit + crc32_fold_512b_to_128b + +// folding 128bit --> 64bit + mov x_tmp, p0_low_b0 + movk x_tmp, p0_low_b1, lsl 16 + fmov d_p0_low2, x_tmp + + mov d_tmp_high, v_x3.d[1] + + mov d_p0_low, v_p1.d[1] + pmull v_x3.1q, v_x3.1d, v_p0.1d + + eor v_tmp_high.16b, v_tmp_high.16b, v_x3.16b + mov s_x3, v_tmp_high.s[0] + ext v_tmp_high.16b, v_tmp_high.16b, v_tmp_high.16b, #4 + pmull v_x3.1q, v_x3.1d, v_p02.1d + +// barrett reduction + mov x_tmp2, br_high_b0 + movk x_tmp2, br_high_b1, lsl 16 + movk x_tmp2, br_high_b2, lsl 32 + fmov d_br_high, x_tmp2 + + mov x_tmp, br_low_b0 + movk x_tmp, br_low_b1, lsl 16 + movk x_tmp, br_low_b2, lsl 32 + fmov d_br_low, x_tmp + + eor v_tmp_high.16b, v_tmp_high.16b, v_x3.16b + mov s_x3, v_tmp_high.s[0] + pmull v_x3.1q, v_x3.1d, v_br_high.1d + + mov s_x3, v_x3.s[0] + pmull v_x3.1q, v_x3.1d, v_br_low.1d + eor v_tmp_high.8b, v_tmp_high.8b, v_x3.8b + umov w_seed, v_tmp_high.s[1] + + b .crc_tab_pre + + .size \name, .-\name +.endm |