From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- .../mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c | 55 +++ .../isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c | 53 +++ .../mh_sha1/aarch64/mh_sha1_block_asimd.S | 124 +++++++ .../mh_sha1/aarch64/mh_sha1_block_ce.S | 384 +++++++++++++++++++++ .../isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c | 53 +++ .../mh_sha1/aarch64/mh_sha1_multibinary.S | 35 ++ .../mh_sha1/aarch64/sha1_asimd_common.S | 269 +++++++++++++++ 7 files changed, 973 insertions(+) create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S (limited to 'src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64') diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c new file mode 100644 index 000000000..2ad8871fa --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c @@ -0,0 +1,55 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include + +DEFINE_INTERFACE_DISPATCHER(mh_sha1_update) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(mh_sha1_update_ce); + + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mh_sha1_update_asimd); + + return PROVIDER_BASIC(mh_sha1_update); + +} + +DEFINE_INTERFACE_DISPATCHER(mh_sha1_finalize) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(mh_sha1_finalize_ce); + + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mh_sha1_finalize_asimd); + + return PROVIDER_BASIC(mh_sha1_finalize); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c new file mode 100644 index 000000000..c913a64df --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c @@ -0,0 +1,53 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include "mh_sha1_internal.h" + +void mh_sha1_block_asimd(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +/***************mh_sha1_update***********/ +// mh_sha1_update_asimd.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_asimd +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail +// mh_sha1_finalize_asimd.c and mh_sha1_tail_asimd.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_asimd +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_asimd +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S new file mode 100644 index 000000000..22f716f27 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S @@ -0,0 +1,124 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +#include "sha1_asimd_common.S" + +.macro load_x4_word idx:req + ld1 {WORD\idx\().16b},[segs_ptr] + add segs_ptr,segs_ptr,#64 +.endm + +/* + * void mh_sha1_block_asimd (const uint8_t * input_data, + * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + * uint32_t num_blocks); + * arg 0 pointer to input data + * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) + * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. + * arg 3 number of 1KB blocks + */ + + input_data .req x0 + sha1_digest .req x1 + data_buf .req x2 + num_blocks .req w3 + src .req x4 + dst .req x5 + offs .req x6 + mh_segs .req x7 + tmp .req x8 + segs_ptr .req x9 + block_ctr .req w10 + + .global mh_sha1_block_asimd + .type mh_sha1_block_asimd, %function +mh_sha1_block_asimd: + cmp num_blocks, #0 + beq .return + sha1_asimd_save_stack + + mov mh_segs, #0 +.seg_loops: + add segs_ptr,input_data,mh_segs + mov offs, #64 + add src, sha1_digest, mh_segs + ld1 {VA.4S}, [src], offs + ld1 {VB.4S}, [src], offs + ld1 {VC.4S}, [src], offs + ld1 {VD.4S}, [src], offs + ld1 {VE.4S}, [src], offs + mov block_ctr,num_blocks + +.block_loop: + sha1_single + subs block_ctr, block_ctr, 1 + bne .block_loop + + mov offs, #64 + add dst, sha1_digest, mh_segs + st1 {VA.4S}, [dst], offs + st1 {VB.4S}, [dst], offs + st1 {VC.4S}, [dst], offs + st1 {VD.4S}, [dst], offs + st1 {VE.4S}, [dst], offs + + add mh_segs, mh_segs, #16 + cmp mh_segs, #64 + bne .seg_loops + + sha1_asimd_restore_stack +.return: + ret + + .size mh_sha1_block_asimd, .-mh_sha1_block_asimd + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S new file mode 100644 index 000000000..12d3c5df2 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S @@ -0,0 +1,384 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm + + + +/* +Variable list +*/ + + declare_var_vector_reg lane0_msg_0, 0 + declare_var_vector_reg lane1_msg_0, 1 + declare_var_vector_reg lane2_msg_0, 2 + declare_var_vector_reg lane3_msg_0, 3 + declare_var_vector_reg lane0_msg_1, 4 + declare_var_vector_reg lane1_msg_1, 5 + declare_var_vector_reg lane2_msg_1, 6 + declare_var_vector_reg lane3_msg_1, 7 + declare_var_vector_reg lane0_msg_2, 8 + declare_var_vector_reg lane1_msg_2, 9 + declare_var_vector_reg lane2_msg_2,10 + declare_var_vector_reg lane3_msg_2,11 + declare_var_vector_reg lane0_msg_3,12 + declare_var_vector_reg lane1_msg_3,13 + declare_var_vector_reg lane2_msg_3,14 + declare_var_vector_reg lane3_msg_3,15 + + declare_var_vector_reg lane0_abcd ,16 + declare_var_vector_reg lane1_abcd ,17 + declare_var_vector_reg lane2_abcd ,18 + declare_var_vector_reg lane3_abcd ,19 + declare_var_vector_reg lane0_tmp0 ,20 + declare_var_vector_reg lane1_tmp0 ,21 + declare_var_vector_reg lane2_tmp0 ,22 + declare_var_vector_reg lane3_tmp0 ,23 + declare_var_vector_reg lane0_tmp1 ,24 + declare_var_vector_reg lane1_tmp1 ,25 + declare_var_vector_reg lane2_tmp1 ,26 + declare_var_vector_reg lane3_tmp1 ,27 + + + declare_var_vector_reg e0 ,28 + declare_var_vector_reg e1 ,29 + declare_var_vector_reg key ,30 + declare_var_vector_reg tmp ,31 + + key_adr .req x4 + msg_adr .req x5 + block_cnt .req x6 + offs .req x7 + digest_adr .req x16 + tmp0_adr .req x17 + tmp1_adr .req x18 + +/** +maros for round 4-67 +*/ +.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req + sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s + sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s + sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s + sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s + mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0] + mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0] + mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0] + mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0] + mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0] + mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1] + mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2] + mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3] + \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s + \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s + \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s + \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s + ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr] + add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s + add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s + add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s + add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s + st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr] + sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s + sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s + sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s + sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s + sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s + sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s + sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s + sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s + +.endm + + +/* + void mh_sha1_block_ce(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks) +*/ +/* +Arguements list +*/ + input_data .req x0 + digests .req x1 + frame_buffer .req x2 + num_blocks .req w3 + + .global mh_sha1_block_ce + .type mh_sha1_block_ce, %function +mh_sha1_block_ce: + //save temp vector registers + stp d8, d9, [sp, -128]! + + stp d10, d11, [sp, 16] + stp d12, d13, [sp, 32] + stp d14, d15, [sp, 48] + mov tmp0_adr,frame_buffer + add tmp1_adr,tmp0_adr,128 + + +start_loop: + mov block_cnt,0 + mov msg_adr,input_data +lane_loop: + mov offs,64 + adr key_adr,KEY_0 + //load msg 0 + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs + + add digest_adr,digests,block_cnt + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + ldr e0_q,[digest_adr] + + //load key_0 + ldr key_q,[key_adr] + + rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b + rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b + rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b + rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b + rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b + rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b + rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b + rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b + rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b + rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b + rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b + rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b + rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b + rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b + rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b + rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b + + add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s + st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + + add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s + add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s + add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s + add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s + + /* rounds 0-3 */ + sha1h lane0_tmp1_s,lane0_abcd_s + sha1h lane1_tmp1_s,lane1_abcd_s + sha1h lane2_tmp1_s,lane2_abcd_s + sha1h lane3_tmp1_s,lane3_abcd_s + mov e1_v.S[0],lane0_tmp1_v.S[0] + mov e1_v.S[1],lane1_tmp1_v.S[0] + mov e1_v.S[2],lane2_tmp1_v.S[0] + mov e1_v.S[3],lane3_tmp1_v.S[0] + mov lane0_tmp1_v.S[0],e0_v.S[0] + mov lane1_tmp1_v.S[0],e0_v.S[1] + mov lane2_tmp1_v.S[0],e0_v.S[2] + mov lane3_tmp1_v.S[0],e0_v.S[3] + sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s + sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s + sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s + sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s + ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s + sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s + add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s + sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s + add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s + sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s + add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s + sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s + st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr] + + sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */ + sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + + + adr key_adr,KEY_1 + ldr key_q,[key_adr] + sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */ + sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */ + sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + + adr key_adr,KEY_2 + ldr key_q,[key_adr] + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + adr key_adr,KEY_3 + ldr key_q,[key_adr] + sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + //msg2 and msg1 are free + mov lane0_msg_2_v.S[0],e1_v.S[0] + mov lane1_msg_2_v.S[0],e1_v.S[1] + mov lane2_msg_2_v.S[0],e1_v.S[2] + mov lane3_msg_2_v.S[0],e1_v.S[3] + + /* rounds 68-71 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s + sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s + sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s + sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s + sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s + + /* rounds 72-75 */ + sha1h lane0_msg_2_s,lane0_abcd_s + sha1h lane1_msg_2_s,lane1_abcd_s + sha1h lane2_msg_2_s,lane2_abcd_s + sha1h lane3_msg_2_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s + sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s + sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s + sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s + + /* rounds 76-79 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add digest_adr,digests,block_cnt + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr] + + add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S + add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S + add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S + add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S + + add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S + add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S + add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S + add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S + + add digest_adr,digests,block_cnt + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr] + + add block_cnt,block_cnt,16 + cmp block_cnt,64 + add msg_adr,input_data,block_cnt + add digest_adr,digests,block_cnt + bcc lane_loop + + subs num_blocks,num_blocks,1 + add input_data,input_data,1024 + bhi start_loop +exit_func: + //restore temp register + ldp d10, d11, [sp, 16] + ldp d12, d13, [sp, 32] + ldp d14, d15, [sp, 48] + ldp d8, d9, [sp], 128 + ret + + .size mh_sha1_block_ce, .-mh_sha1_block_ce + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c new file mode 100644 index 000000000..c35daeab0 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c @@ -0,0 +1,53 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include "mh_sha1_internal.h" + +void mh_sha1_block_ce(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +/***************mh_sha1_update***********/ +// mh_sha1_update_ce.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_ce +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail +// mh_sha1_finalize_ce.c and mh_sha1_tail_ce.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_ce +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_ce +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S new file mode 100644 index 000000000..9a6d0caea --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S @@ -0,0 +1,35 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#include "aarch64_multibinary.h" + + +mbin_interface mh_sha1_update +mbin_interface mh_sha1_finalize diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S new file mode 100644 index 000000000..c8b8dd982 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S @@ -0,0 +1,269 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +// macro F = (D ^ (B & (C ^ D))) +.macro FUNC_F0 + eor VF.16b, VC.16b, VD.16b + and VF.16b, VB.16b, VF.16b + eor VF.16b, VD.16b, VF.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F1 + eor VF.16b, VB.16b, VC.16b + eor VF.16b, VF.16b, VD.16b +.endm + +// F = ((B & C) | (B & D) | (C & D)) +.macro FUNC_F2 + and vT0.16b, VB.16b, VC.16b + and vT1.16b, VB.16b, VD.16b + and vT2.16b, VC.16b, VD.16b + orr VF.16b, vT0.16b, vT1.16b + orr VF.16b, VF.16b, vT2.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F3 + FUNC_F1 +.endm + +.altmacro +.macro load_next_word windex + .if \windex < 16 + load_x4_word \windex + .endif +.endm + +// FUNC_F0 is merged into STEP_00_15 for efficiency +.macro SHA1_STEP_00_15_F0 windex:req + rev32 WORD\windex\().16b,WORD\windex\().16b + next_word=\windex+1 + load_next_word %next_word + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, VA.4s, 32 - 5 + add VE.4s, VE.4s, VK.4s + sli VT.4s, VA.4s, 5 + eor VF.16b, VC.16b, VD.16b + add VE.4s, VE.4s, WORD\windex\().4s + and VF.16b, VB.16b, VF.16b + add VE.4s, VE.4s, VT.4s + eor VF.16b, VD.16b, VF.16b + ushr VT.4s, VB.4s, 32 - 30 + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + +.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req + eor vT0.16b,\reg_3\().16b,\reg_8\().16b + eor VT.16b,\reg_14\().16b,\reg_16\().16b + eor vT0.16b,vT0.16b,VT.16b + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, vT0.4s, 32 - 1 + add VE.4s, VE.4s, VK.4s + ushr vT1.4s, VA.4s, 32 - 5 + sli VT.4s, vT0.4s, 1 + add VE.4s, VE.4s, VT.4s + sli vT1.4s, VA.4s, 5 + mov \reg_16\().16b,VT.16b + add VE.4s, VE.4s, vT1.4s + ushr VT.4s, VB.4s, 32 - 30 + \func_f + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + + VA .req v0 + VB .req v1 + VC .req v2 + VD .req v3 + VE .req v4 + VT .req v5 + VF .req v6 + VK .req v7 + WORD0 .req v8 + WORD1 .req v9 + WORD2 .req v10 + WORD3 .req v11 + WORD4 .req v12 + WORD5 .req v13 + WORD6 .req v14 + WORD7 .req v15 + WORD8 .req v16 + WORD9 .req v17 + WORD10 .req v18 + WORD11 .req v19 + WORD12 .req v20 + WORD13 .req v21 + WORD14 .req v22 + WORD15 .req v23 + vT0 .req v24 + vT1 .req v25 + vT2 .req v26 + vAA .req v27 + vBB .req v28 + vCC .req v29 + vDD .req v30 + vEE .req v31 + TT .req v0 + sha1key_adr .req x15 + +.macro SWAP_STATES + // shifted VB is held in VT after each step + .unreq TT + TT .req VE + .unreq VE + VE .req VD + .unreq VD + VD .req VC + .unreq VC + VC .req VT + .unreq VT + VT .req VB + .unreq VB + VB .req VA + .unreq VA + VA .req TT +.endm + +.altmacro +.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req + SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\() +.endm + +.macro exec_step windex:req + .if \windex <= 15 + SHA1_STEP_00_15_F0 windex + .else + idx14=((\windex - 14) & 15) + idx8=((\windex - 8) & 15) + idx3=((\windex - 3) & 15) + idx16=(\windex & 15) + .if \windex <= 19 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 20 && \windex <= 39 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 40 && \windex <= 59 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 60 && \windex <= 79 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16 + .endif + .endif + + SWAP_STATES + + .if \windex == 79 + // after 80 steps, the registers ABCDET has shifted from + // its orignal order of 012345 to 341520 + // have to swap back for both compile- and run-time correctness + mov v0.16b,v3.16b + .unreq VA + VA .req v0 + + mov vT0.16b,v2.16b + mov v2.16b,v1.16b + mov v1.16b,v4.16b + .unreq VB + VB .req v1 + .unreq VC + VC .req v2 + + mov v3.16b,v5.16b + .unreq VD + VD .req v3 + + mov v4.16b,vT0.16b + .unreq VE + VE .req v4 + + .unreq VT + VT .req v5 + .endif +.endm + +.macro exec_steps idx:req,more:vararg + exec_step \idx + .ifnb \more + exec_steps \more + .endif +.endm + +.macro sha1_single + load_x4_word 0 + + mov vAA.16B, VA.16B + mov vBB.16B, VB.16B + mov vCC.16B, VC.16B + mov vDD.16B, VD.16B + mov vEE.16B, VE.16B + + adr sha1key_adr, KEY_0 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 + + // 20 ~ 39 + adr sha1key_adr, KEY_1 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 + + // 40 ~ 59 + adr sha1key_adr, KEY_2 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 + + // 60 ~ 79 + adr sha1key_adr, KEY_3 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 + + add VA.4s, vAA.4s, VA.4s + add VB.4s, vBB.4s, VB.4s + add VC.4s, vCC.4s, VC.4s + add VD.4s, vDD.4s, VD.4s + add VE.4s, vEE.4s, VE.4s +.endm + +.macro sha1_asimd_save_stack + stp d8,d9,[sp, -64]! + stp d10,d11,[sp, 16] + stp d12,d13,[sp, 32] + stp d14,d15,[sp, 48] +.endm + +.macro sha1_asimd_restore_stack + ldp d10,d11,[sp, 16] + ldp d12,d13,[sp, 32] + ldp d14,d15,[sp, 48] + ldp d8,d9,[sp],64 +.endm -- cgit v1.2.3