summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64
parentInitial commit. (diff)
downloadceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c55
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c53
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S124
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S384
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c53
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S35
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S269
7 files changed, 973 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c
new file mode 100644
index 000000000..2ad8871fa
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c
@@ -0,0 +1,55 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha1_update)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(mh_sha1_update_ce);
+
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mh_sha1_update_asimd);
+
+ return PROVIDER_BASIC(mh_sha1_update);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha1_finalize)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(mh_sha1_finalize_ce);
+
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mh_sha1_finalize_asimd);
+
+ return PROVIDER_BASIC(mh_sha1_finalize);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c
new file mode 100644
index 000000000..c913a64df
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c
@@ -0,0 +1,53 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+void mh_sha1_block_asimd(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+/***************mh_sha1_update***********/
+// mh_sha1_update_asimd.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_asimd
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+// mh_sha1_finalize_asimd.c and mh_sha1_tail_asimd.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_asimd
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_asimd
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S
new file mode 100644
index 000000000..22f716f27
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S
@@ -0,0 +1,124 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+#include "sha1_asimd_common.S"
+
+.macro load_x4_word idx:req
+ ld1 {WORD\idx\().16b},[segs_ptr]
+ add segs_ptr,segs_ptr,#64
+.endm
+
+/*
+ * void mh_sha1_block_asimd (const uint8_t * input_data,
+ * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ * uint32_t num_blocks);
+ * arg 0 pointer to input data
+ * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+ * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+ * arg 3 number of 1KB blocks
+ */
+
+ input_data .req x0
+ sha1_digest .req x1
+ data_buf .req x2
+ num_blocks .req w3
+ src .req x4
+ dst .req x5
+ offs .req x6
+ mh_segs .req x7
+ tmp .req x8
+ segs_ptr .req x9
+ block_ctr .req w10
+
+ .global mh_sha1_block_asimd
+ .type mh_sha1_block_asimd, %function
+mh_sha1_block_asimd:
+ cmp num_blocks, #0
+ beq .return
+ sha1_asimd_save_stack
+
+ mov mh_segs, #0
+.seg_loops:
+ add segs_ptr,input_data,mh_segs
+ mov offs, #64
+ add src, sha1_digest, mh_segs
+ ld1 {VA.4S}, [src], offs
+ ld1 {VB.4S}, [src], offs
+ ld1 {VC.4S}, [src], offs
+ ld1 {VD.4S}, [src], offs
+ ld1 {VE.4S}, [src], offs
+ mov block_ctr,num_blocks
+
+.block_loop:
+ sha1_single
+ subs block_ctr, block_ctr, 1
+ bne .block_loop
+
+ mov offs, #64
+ add dst, sha1_digest, mh_segs
+ st1 {VA.4S}, [dst], offs
+ st1 {VB.4S}, [dst], offs
+ st1 {VC.4S}, [dst], offs
+ st1 {VD.4S}, [dst], offs
+ st1 {VE.4S}, [dst], offs
+
+ add mh_segs, mh_segs, #16
+ cmp mh_segs, #64
+ bne .seg_loops
+
+ sha1_asimd_restore_stack
+.return:
+ ret
+
+ .size mh_sha1_block_asimd, .-mh_sha1_block_asimd
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S
new file mode 100644
index 000000000..12d3c5df2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S
@@ -0,0 +1,384 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 2
+ .p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+.endm
+
+
+
+/*
+Variable list
+*/
+
+ declare_var_vector_reg lane0_msg_0, 0
+ declare_var_vector_reg lane1_msg_0, 1
+ declare_var_vector_reg lane2_msg_0, 2
+ declare_var_vector_reg lane3_msg_0, 3
+ declare_var_vector_reg lane0_msg_1, 4
+ declare_var_vector_reg lane1_msg_1, 5
+ declare_var_vector_reg lane2_msg_1, 6
+ declare_var_vector_reg lane3_msg_1, 7
+ declare_var_vector_reg lane0_msg_2, 8
+ declare_var_vector_reg lane1_msg_2, 9
+ declare_var_vector_reg lane2_msg_2,10
+ declare_var_vector_reg lane3_msg_2,11
+ declare_var_vector_reg lane0_msg_3,12
+ declare_var_vector_reg lane1_msg_3,13
+ declare_var_vector_reg lane2_msg_3,14
+ declare_var_vector_reg lane3_msg_3,15
+
+ declare_var_vector_reg lane0_abcd ,16
+ declare_var_vector_reg lane1_abcd ,17
+ declare_var_vector_reg lane2_abcd ,18
+ declare_var_vector_reg lane3_abcd ,19
+ declare_var_vector_reg lane0_tmp0 ,20
+ declare_var_vector_reg lane1_tmp0 ,21
+ declare_var_vector_reg lane2_tmp0 ,22
+ declare_var_vector_reg lane3_tmp0 ,23
+ declare_var_vector_reg lane0_tmp1 ,24
+ declare_var_vector_reg lane1_tmp1 ,25
+ declare_var_vector_reg lane2_tmp1 ,26
+ declare_var_vector_reg lane3_tmp1 ,27
+
+
+ declare_var_vector_reg e0 ,28
+ declare_var_vector_reg e1 ,29
+ declare_var_vector_reg key ,30
+ declare_var_vector_reg tmp ,31
+
+ key_adr .req x4
+ msg_adr .req x5
+ block_cnt .req x6
+ offs .req x7
+ digest_adr .req x16
+ tmp0_adr .req x17
+ tmp1_adr .req x18
+
+/**
+maros for round 4-67
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req
+ sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s
+ sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s
+ sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s
+ sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s
+ mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0]
+ mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0]
+ mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1]
+ mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2]
+ mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3]
+ \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s
+ \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s
+ \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s
+ \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s
+ ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr]
+ add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s
+ add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s
+ add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s
+ add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s
+ st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr]
+ sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s
+ sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s
+ sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s
+ sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s
+ sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s
+ sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s
+ sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s
+ sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s
+
+.endm
+
+
+/*
+ void mh_sha1_block_ce(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks)
+*/
+/*
+Arguements list
+*/
+ input_data .req x0
+ digests .req x1
+ frame_buffer .req x2
+ num_blocks .req w3
+
+ .global mh_sha1_block_ce
+ .type mh_sha1_block_ce, %function
+mh_sha1_block_ce:
+ //save temp vector registers
+ stp d8, d9, [sp, -128]!
+
+ stp d10, d11, [sp, 16]
+ stp d12, d13, [sp, 32]
+ stp d14, d15, [sp, 48]
+ mov tmp0_adr,frame_buffer
+ add tmp1_adr,tmp0_adr,128
+
+
+start_loop:
+ mov block_cnt,0
+ mov msg_adr,input_data
+lane_loop:
+ mov offs,64
+ adr key_adr,KEY_0
+ //load msg 0
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs
+
+ add digest_adr,digests,block_cnt
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs
+ ldr e0_q,[digest_adr]
+
+ //load key_0
+ ldr key_q,[key_adr]
+
+ rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b
+ rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b
+ rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b
+ rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b
+ rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b
+ rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b
+ rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b
+ rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b
+ rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b
+ rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b
+ rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b
+ rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b
+ rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b
+ rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b
+ rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b
+ rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b
+
+ add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s
+ add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s
+ add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s
+ add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s
+ st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr]
+
+ add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s
+ add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s
+ add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s
+ add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s
+
+ /* rounds 0-3 */
+ sha1h lane0_tmp1_s,lane0_abcd_s
+ sha1h lane1_tmp1_s,lane1_abcd_s
+ sha1h lane2_tmp1_s,lane2_abcd_s
+ sha1h lane3_tmp1_s,lane3_abcd_s
+ mov e1_v.S[0],lane0_tmp1_v.S[0]
+ mov e1_v.S[1],lane1_tmp1_v.S[0]
+ mov e1_v.S[2],lane2_tmp1_v.S[0]
+ mov e1_v.S[3],lane3_tmp1_v.S[0]
+ mov lane0_tmp1_v.S[0],e0_v.S[0]
+ mov lane1_tmp1_v.S[0],e0_v.S[1]
+ mov lane2_tmp1_v.S[0],e0_v.S[2]
+ mov lane3_tmp1_v.S[0],e0_v.S[3]
+ sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s
+ sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s
+ sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s
+ sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s
+ ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr]
+ add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s
+ sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s
+ add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s
+ sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s
+ add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s
+ sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s
+ add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s
+ sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s
+ st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr]
+
+ sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */
+ sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+
+
+ adr key_adr,KEY_1
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */
+ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */
+ sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+
+ adr key_adr,KEY_2
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+ sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+
+ adr key_adr,KEY_3
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+
+ //msg2 and msg1 are free
+ mov lane0_msg_2_v.S[0],e1_v.S[0]
+ mov lane1_msg_2_v.S[0],e1_v.S[1]
+ mov lane2_msg_2_v.S[0],e1_v.S[2]
+ mov lane3_msg_2_v.S[0],e1_v.S[3]
+
+ /* rounds 68-71 */
+ sha1h lane0_msg_1_s,lane0_abcd_s
+ sha1h lane1_msg_1_s,lane1_abcd_s
+ sha1h lane2_msg_1_s,lane2_abcd_s
+ sha1h lane3_msg_1_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s
+ sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s
+ sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s
+ sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s
+ add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s
+ add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s
+ add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s
+ add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s
+ sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s
+ sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s
+ sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s
+ sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s
+
+ /* rounds 72-75 */
+ sha1h lane0_msg_2_s,lane0_abcd_s
+ sha1h lane1_msg_2_s,lane1_abcd_s
+ sha1h lane2_msg_2_s,lane2_abcd_s
+ sha1h lane3_msg_2_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s
+ sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s
+ sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s
+ sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s
+
+ /* rounds 76-79 */
+ sha1h lane0_msg_1_s,lane0_abcd_s
+ sha1h lane1_msg_1_s,lane1_abcd_s
+ sha1h lane2_msg_1_s,lane2_abcd_s
+ sha1h lane3_msg_1_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s
+ sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s
+ sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s
+ sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s
+ add digest_adr,digests,block_cnt
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr]
+
+ add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S
+ add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S
+ add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S
+ add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S
+
+ add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S
+ add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S
+ add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S
+ add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S
+
+ add digest_adr,digests,block_cnt
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs
+ st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr]
+
+ add block_cnt,block_cnt,16
+ cmp block_cnt,64
+ add msg_adr,input_data,block_cnt
+ add digest_adr,digests,block_cnt
+ bcc lane_loop
+
+ subs num_blocks,num_blocks,1
+ add input_data,input_data,1024
+ bhi start_loop
+exit_func:
+ //restore temp register
+ ldp d10, d11, [sp, 16]
+ ldp d12, d13, [sp, 32]
+ ldp d14, d15, [sp, 48]
+ ldp d8, d9, [sp], 128
+ ret
+
+ .size mh_sha1_block_ce, .-mh_sha1_block_ce
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c
new file mode 100644
index 000000000..c35daeab0
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c
@@ -0,0 +1,53 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+void mh_sha1_block_ce(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+/***************mh_sha1_update***********/
+// mh_sha1_update_ce.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_ce
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+// mh_sha1_finalize_ce.c and mh_sha1_tail_ce.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_ce
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_ce
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S
new file mode 100644
index 000000000..9a6d0caea
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S
@@ -0,0 +1,35 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface mh_sha1_update
+mbin_interface mh_sha1_finalize
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S
new file mode 100644
index 000000000..c8b8dd982
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S
@@ -0,0 +1,269 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+ eor VF.16b, VC.16b, VD.16b
+ and VF.16b, VB.16b, VF.16b
+ eor VF.16b, VD.16b, VF.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+ eor VF.16b, VB.16b, VC.16b
+ eor VF.16b, VF.16b, VD.16b
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+ and vT0.16b, VB.16b, VC.16b
+ and vT1.16b, VB.16b, VD.16b
+ and vT2.16b, VC.16b, VD.16b
+ orr VF.16b, vT0.16b, vT1.16b
+ orr VF.16b, VF.16b, vT2.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+ FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+ .if \windex < 16
+ load_x4_word \windex
+ .endif
+.endm
+
+// FUNC_F0 is merged into STEP_00_15 for efficiency
+.macro SHA1_STEP_00_15_F0 windex:req
+ rev32 WORD\windex\().16b,WORD\windex\().16b
+ next_word=\windex+1
+ load_next_word %next_word
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, VA.4s, 32 - 5
+ add VE.4s, VE.4s, VK.4s
+ sli VT.4s, VA.4s, 5
+ eor VF.16b, VC.16b, VD.16b
+ add VE.4s, VE.4s, WORD\windex\().4s
+ and VF.16b, VB.16b, VF.16b
+ add VE.4s, VE.4s, VT.4s
+ eor VF.16b, VD.16b, VF.16b
+ ushr VT.4s, VB.4s, 32 - 30
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+ eor vT0.16b,\reg_3\().16b,\reg_8\().16b
+ eor VT.16b,\reg_14\().16b,\reg_16\().16b
+ eor vT0.16b,vT0.16b,VT.16b
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, vT0.4s, 32 - 1
+ add VE.4s, VE.4s, VK.4s
+ ushr vT1.4s, VA.4s, 32 - 5
+ sli VT.4s, vT0.4s, 1
+ add VE.4s, VE.4s, VT.4s
+ sli vT1.4s, VA.4s, 5
+ mov \reg_16\().16b,VT.16b
+ add VE.4s, VE.4s, vT1.4s
+ ushr VT.4s, VB.4s, 32 - 30
+ \func_f
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+ VA .req v0
+ VB .req v1
+ VC .req v2
+ VD .req v3
+ VE .req v4
+ VT .req v5
+ VF .req v6
+ VK .req v7
+ WORD0 .req v8
+ WORD1 .req v9
+ WORD2 .req v10
+ WORD3 .req v11
+ WORD4 .req v12
+ WORD5 .req v13
+ WORD6 .req v14
+ WORD7 .req v15
+ WORD8 .req v16
+ WORD9 .req v17
+ WORD10 .req v18
+ WORD11 .req v19
+ WORD12 .req v20
+ WORD13 .req v21
+ WORD14 .req v22
+ WORD15 .req v23
+ vT0 .req v24
+ vT1 .req v25
+ vT2 .req v26
+ vAA .req v27
+ vBB .req v28
+ vCC .req v29
+ vDD .req v30
+ vEE .req v31
+ TT .req v0
+ sha1key_adr .req x15
+
+.macro SWAP_STATES
+ // shifted VB is held in VT after each step
+ .unreq TT
+ TT .req VE
+ .unreq VE
+ VE .req VD
+ .unreq VD
+ VD .req VC
+ .unreq VC
+ VC .req VT
+ .unreq VT
+ VT .req VB
+ .unreq VB
+ VB .req VA
+ .unreq VA
+ VA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+ SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+ .if \windex <= 15
+ SHA1_STEP_00_15_F0 windex
+ .else
+ idx14=((\windex - 14) & 15)
+ idx8=((\windex - 8) & 15)
+ idx3=((\windex - 3) & 15)
+ idx16=(\windex & 15)
+ .if \windex <= 19
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 20 && \windex <= 39
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 40 && \windex <= 59
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 60 && \windex <= 79
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .endif
+
+ SWAP_STATES
+
+ .if \windex == 79
+ // after 80 steps, the registers ABCDET has shifted from
+ // its orignal order of 012345 to 341520
+ // have to swap back for both compile- and run-time correctness
+ mov v0.16b,v3.16b
+ .unreq VA
+ VA .req v0
+
+ mov vT0.16b,v2.16b
+ mov v2.16b,v1.16b
+ mov v1.16b,v4.16b
+ .unreq VB
+ VB .req v1
+ .unreq VC
+ VC .req v2
+
+ mov v3.16b,v5.16b
+ .unreq VD
+ VD .req v3
+
+ mov v4.16b,vT0.16b
+ .unreq VE
+ VE .req v4
+
+ .unreq VT
+ VT .req v5
+ .endif
+.endm
+
+.macro exec_steps idx:req,more:vararg
+ exec_step \idx
+ .ifnb \more
+ exec_steps \more
+ .endif
+.endm
+
+.macro sha1_single
+ load_x4_word 0
+
+ mov vAA.16B, VA.16B
+ mov vBB.16B, VB.16B
+ mov vCC.16B, VC.16B
+ mov vDD.16B, VD.16B
+ mov vEE.16B, VE.16B
+
+ adr sha1key_adr, KEY_0
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+ // 20 ~ 39
+ adr sha1key_adr, KEY_1
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+ // 40 ~ 59
+ adr sha1key_adr, KEY_2
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+ // 60 ~ 79
+ adr sha1key_adr, KEY_3
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+ add VA.4s, vAA.4s, VA.4s
+ add VB.4s, vBB.4s, VB.4s
+ add VC.4s, vCC.4s, VC.4s
+ add VD.4s, vDD.4s, VD.4s
+ add VE.4s, vEE.4s, VE.4s
+.endm
+
+.macro sha1_asimd_save_stack
+ stp d8,d9,[sp, -64]!
+ stp d10,d11,[sp, 16]
+ stp d12,d13,[sp, 32]
+ stp d14,d15,[sp, 48]
+.endm
+
+.macro sha1_asimd_restore_stack
+ ldp d10,d11,[sp, 16]
+ ldp d12,d13,[sp, 32]
+ ldp d14,d15,[sp, 48]
+ ldp d8,d9,[sp],64
+.endm