summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64
diff options
context:
space:
mode:
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S294
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S269
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c250
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c250
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c93
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S192
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c217
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c208
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S36
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S194
-rw-r--r--src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S253
11 files changed, 2256 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S
new file mode 100644
index 000000000..55d6f932f
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S
@@ -0,0 +1,294 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+ input_data .req x0
+ num_blocks .req w1
+ digest .req x2
+
+ // x2 is reused intentionally between digest/tmp
+ // due to running out of registers
+ TMP .req x2
+ TMPW .req w2
+ sha1key_adr .req x3
+ WK .req w3
+ WF .req w4
+ WA .req w5
+ WB .req w6
+ WC .req w7
+ WD .req w8
+ WE .req w9
+ WORD0 .req w10
+ WORD1 .req w11
+ WORD2 .req w12
+ WORD3 .req w13
+ WORD4 .req w14
+ WORD5 .req w15
+ WORD6 .req w16
+ WORD7 .req w17
+ WORD8 .req w18
+ WORD9 .req w19
+ WORD10 .req w20
+ WORD11 .req w21
+ WORD12 .req w22
+ WORD13 .req w23
+ WORD14 .req w24
+ WORD15 .req w25
+ AA .req w26
+ BB .req w27
+ CC .req w28
+ DD .req w29
+ EE .req w30
+
+ TT .req w0
+
+.macro save_stack
+ stp x16,x17,[sp, -128]!
+ stp x18,x19,[sp, 16]
+ stp x20,x21,[sp, 32]
+ stp x22,x23,[sp, 48]
+ stp x24,x25,[sp, 64]
+ stp x26,x27,[sp, 80]
+ stp x28,x29,[sp, 96]
+ str x30,[sp, 112]
+ // have to reuse x2, which is digest address
+ str x2,[sp, 120]
+.endm
+
+.macro restore_stack
+ ldp x18,x19,[sp, 16]
+ ldp x20,x21,[sp, 32]
+ ldp x22,x23,[sp, 48]
+ ldp x24,x25,[sp, 64]
+ ldp x26,x27,[sp, 80]
+ ldp x28,x29,[sp, 96]
+ ldr x30,[sp, 112]
+ ldr x2,[sp, 120]
+ ldp x16,x17,[sp],128
+.endm
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+ eor WF, WC, WD
+ and WF, WB, WF
+ eor WF, WD, WF
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+ eor WF, WB, WC
+ eor WF, WF, WD
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+ and TMPW, WB, WC
+ and WF, WB, WD
+ orr WF, WF, TMPW
+ and TMPW, WC, WD
+ orr WF, WF, TMPW
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+ FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+ .if \windex < 16
+ load_word_at \windex
+ .endif
+.endm
+
+.macro SHA1_STEP_00_15 windex:req
+ rev WORD\windex\(),WORD\windex\()
+ next_word=\windex+1
+ load_next_word %next_word
+
+ ror TMPW,WA,#32-5
+ add WE,WE,TMPW
+ add WE,WE,WK
+ FUNC_F0
+ ror WB,WB,#32-30
+ add WE,WE,WORD\windex\()
+ add WE,WE,WF
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+ eor TMPW,\reg_14,\reg_8
+ eor \reg_16,\reg_16,\reg_3
+ eor \reg_16,\reg_16,TMPW
+
+ ror TMPW,WA,#32-5
+ ror \reg_16,\reg_16, #32 - 1
+
+ add WE,WE,TMPW
+ add WE,WE,WK
+ \func_f
+ ror WB,WB,#32-30
+ add WE,WE,\reg_16
+ add WE,WE,WF
+.endm
+
+.macro SWAP_STATES
+ .unreq TT
+ TT .req WE
+ .unreq WE
+ WE .req WD
+ .unreq WD
+ WD .req WC
+ .unreq WC
+ WC .req WB
+ .unreq WB
+ WB .req WA
+ .unreq WA
+ WA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+ SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+ .if \windex <= 15
+ SHA1_STEP_00_15 windex
+ .else
+ idx14=((\windex - 14) & 15)
+ idx8=((\windex - 8) & 15)
+ idx3=((\windex - 3) & 15)
+ idx16=(\windex & 15)
+ .if \windex <= 19
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 20 && \windex <= 39
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 40 && \windex <= 59
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 60 && \windex <= 79
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .endif
+
+ SWAP_STATES
+.endm
+
+.macro exec_steps idx:req,more:vararg
+ exec_step \idx
+ .ifnb \more
+ exec_steps \more
+ .endif
+.endm
+
+.altmacro
+
+.macro load_two_words_at idx0:req,idx1:req
+ ldp WORD\idx0\(),WORD\idx1\(),[input_data],8
+.endm
+
+.macro load_word_at idx:req
+ .if \idx % 2 == 0
+ idx1=\idx+1
+ load_two_words_at \idx,%idx1
+ .endif
+.endm
+
+/*
+ * void sha1_aarch64_x1(uint32_t *input_data, int num_blocks, uint32_t digest[5])
+ */
+ .global sha1_aarch64_x1
+ .type sha1_aarch64_x1, %function
+sha1_aarch64_x1:
+ cmp num_blocks, #0
+ beq .return
+
+ ldp WA,WB,[digest]
+ ldp WC,WD,[digest,8]
+ ldr WE,[digest,16]
+ save_stack
+
+.block_loop:
+ mov AA, WA
+ mov BB, WB
+ mov CC, WC
+ mov DD, WD
+ mov EE, WE
+
+ load_word_at 0
+
+ adr sha1key_adr, KEY_0
+ ldr WK, [sha1key_adr]
+ exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+ // 20 ~ 39
+ adr sha1key_adr, KEY_1
+ ldr WK, [sha1key_adr]
+ exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+ // 40 ~ 59
+ adr sha1key_adr, KEY_2
+ ldr WK, [sha1key_adr]
+ exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+ // 60 ~ 79
+ adr sha1key_adr, KEY_3
+ ldr WK, [sha1key_adr]
+ exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+ add WA, AA, WA
+ add WB, BB, WB
+ add WC, CC, WC
+ add WD, DD, WD
+ add WE, EE, WE
+
+ subs num_blocks, num_blocks, 1
+ bne .block_loop
+
+ restore_stack
+ stp WA,WB,[digest]
+ stp WC,WD,[digest,8]
+ str WE,[digest,16]
+
+.return:
+ ret
+
+ .size sha1_aarch64_x1, .-sha1_aarch64_x1
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+KEY_0:
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S
new file mode 100644
index 000000000..c8b8dd982
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S
@@ -0,0 +1,269 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+ eor VF.16b, VC.16b, VD.16b
+ and VF.16b, VB.16b, VF.16b
+ eor VF.16b, VD.16b, VF.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+ eor VF.16b, VB.16b, VC.16b
+ eor VF.16b, VF.16b, VD.16b
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+ and vT0.16b, VB.16b, VC.16b
+ and vT1.16b, VB.16b, VD.16b
+ and vT2.16b, VC.16b, VD.16b
+ orr VF.16b, vT0.16b, vT1.16b
+ orr VF.16b, VF.16b, vT2.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+ FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+ .if \windex < 16
+ load_x4_word \windex
+ .endif
+.endm
+
+// FUNC_F0 is merged into STEP_00_15 for efficiency
+.macro SHA1_STEP_00_15_F0 windex:req
+ rev32 WORD\windex\().16b,WORD\windex\().16b
+ next_word=\windex+1
+ load_next_word %next_word
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, VA.4s, 32 - 5
+ add VE.4s, VE.4s, VK.4s
+ sli VT.4s, VA.4s, 5
+ eor VF.16b, VC.16b, VD.16b
+ add VE.4s, VE.4s, WORD\windex\().4s
+ and VF.16b, VB.16b, VF.16b
+ add VE.4s, VE.4s, VT.4s
+ eor VF.16b, VD.16b, VF.16b
+ ushr VT.4s, VB.4s, 32 - 30
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+ eor vT0.16b,\reg_3\().16b,\reg_8\().16b
+ eor VT.16b,\reg_14\().16b,\reg_16\().16b
+ eor vT0.16b,vT0.16b,VT.16b
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, vT0.4s, 32 - 1
+ add VE.4s, VE.4s, VK.4s
+ ushr vT1.4s, VA.4s, 32 - 5
+ sli VT.4s, vT0.4s, 1
+ add VE.4s, VE.4s, VT.4s
+ sli vT1.4s, VA.4s, 5
+ mov \reg_16\().16b,VT.16b
+ add VE.4s, VE.4s, vT1.4s
+ ushr VT.4s, VB.4s, 32 - 30
+ \func_f
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+ VA .req v0
+ VB .req v1
+ VC .req v2
+ VD .req v3
+ VE .req v4
+ VT .req v5
+ VF .req v6
+ VK .req v7
+ WORD0 .req v8
+ WORD1 .req v9
+ WORD2 .req v10
+ WORD3 .req v11
+ WORD4 .req v12
+ WORD5 .req v13
+ WORD6 .req v14
+ WORD7 .req v15
+ WORD8 .req v16
+ WORD9 .req v17
+ WORD10 .req v18
+ WORD11 .req v19
+ WORD12 .req v20
+ WORD13 .req v21
+ WORD14 .req v22
+ WORD15 .req v23
+ vT0 .req v24
+ vT1 .req v25
+ vT2 .req v26
+ vAA .req v27
+ vBB .req v28
+ vCC .req v29
+ vDD .req v30
+ vEE .req v31
+ TT .req v0
+ sha1key_adr .req x15
+
+.macro SWAP_STATES
+ // shifted VB is held in VT after each step
+ .unreq TT
+ TT .req VE
+ .unreq VE
+ VE .req VD
+ .unreq VD
+ VD .req VC
+ .unreq VC
+ VC .req VT
+ .unreq VT
+ VT .req VB
+ .unreq VB
+ VB .req VA
+ .unreq VA
+ VA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+ SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+ .if \windex <= 15
+ SHA1_STEP_00_15_F0 windex
+ .else
+ idx14=((\windex - 14) & 15)
+ idx8=((\windex - 8) & 15)
+ idx3=((\windex - 3) & 15)
+ idx16=(\windex & 15)
+ .if \windex <= 19
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 20 && \windex <= 39
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 40 && \windex <= 59
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 60 && \windex <= 79
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .endif
+
+ SWAP_STATES
+
+ .if \windex == 79
+ // after 80 steps, the registers ABCDET has shifted from
+ // its orignal order of 012345 to 341520
+ // have to swap back for both compile- and run-time correctness
+ mov v0.16b,v3.16b
+ .unreq VA
+ VA .req v0
+
+ mov vT0.16b,v2.16b
+ mov v2.16b,v1.16b
+ mov v1.16b,v4.16b
+ .unreq VB
+ VB .req v1
+ .unreq VC
+ VC .req v2
+
+ mov v3.16b,v5.16b
+ .unreq VD
+ VD .req v3
+
+ mov v4.16b,vT0.16b
+ .unreq VE
+ VE .req v4
+
+ .unreq VT
+ VT .req v5
+ .endif
+.endm
+
+.macro exec_steps idx:req,more:vararg
+ exec_step \idx
+ .ifnb \more
+ exec_steps \more
+ .endif
+.endm
+
+.macro sha1_single
+ load_x4_word 0
+
+ mov vAA.16B, VA.16B
+ mov vBB.16B, VB.16B
+ mov vCC.16B, VC.16B
+ mov vDD.16B, VD.16B
+ mov vEE.16B, VE.16B
+
+ adr sha1key_adr, KEY_0
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+ // 20 ~ 39
+ adr sha1key_adr, KEY_1
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+ // 40 ~ 59
+ adr sha1key_adr, KEY_2
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+ // 60 ~ 79
+ adr sha1key_adr, KEY_3
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+ add VA.4s, vAA.4s, VA.4s
+ add VB.4s, vBB.4s, VB.4s
+ add VC.4s, vCC.4s, VC.4s
+ add VD.4s, vDD.4s, VD.4s
+ add VE.4s, vEE.4s, VE.4s
+.endm
+
+.macro sha1_asimd_save_stack
+ stp d8,d9,[sp, -64]!
+ stp d10,d11,[sp, 16]
+ stp d12,d13,[sp, 32]
+ stp d14,d15,[sp, 48]
+.endm
+
+.macro sha1_asimd_restore_stack
+ ldp d10,d11,[sp, 16]
+ ldp d12,d13,[sp, 32]
+ ldp d14,d15,[sp, 48]
+ ldp d8,d9,[sp],64
+.endm
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c
new file mode 100644
index 000000000..9a9952ff6
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c
@@ -0,0 +1,250 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <string.h>
+#include "sha1_mb.h"
+#include "memcpy_inline.h"
+#include "endian_helper.h"
+void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state);
+SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job);
+SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state);
+static inline void hash_init_digest(SHA1_WORD_T * digest);
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len);
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx);
+
+void sha1_ctx_mgr_init_asimd(SHA1_HASH_CTX_MGR * mgr)
+{
+ sha1_mb_mgr_init_asimd(&mgr->mgr);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_submit_asimd(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx,
+ const void *buffer, uint32_t len, HASH_CTX_FLAG flags)
+{
+ if (flags & (~HASH_ENTIRE)) {
+ // User should not pass anything other than FIRST, UPDATE, or LAST
+ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+ return ctx;
+ }
+
+ if (ctx->status & HASH_CTX_STS_PROCESSING) {
+ // Cannot submit to a currently processing job.
+ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+ return ctx;
+ }
+
+ if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ // Cannot update a finished job.
+ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+ return ctx;
+ }
+
+ if (flags & HASH_FIRST) {
+ // Init digest
+ hash_init_digest(ctx->job.result_digest);
+
+ // Reset byte counter
+ ctx->total_length = 0;
+
+ // Clear extra blocks
+ ctx->partial_block_buffer_length = 0;
+ }
+ // If we made it here, there were no errors during this call to submit
+ ctx->error = HASH_CTX_ERROR_NONE;
+
+ // Store buffer ptr info from user
+ ctx->incoming_buffer = buffer;
+ ctx->incoming_buffer_length = len;
+
+ // Store the user's request flags and mark this ctx as currently being processed.
+ ctx->status = (flags & HASH_LAST) ?
+ (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+ HASH_CTX_STS_PROCESSING;
+
+ // Advance byte counter
+ ctx->total_length += len;
+
+ // If there is anything currently buffered in the extra blocks, append to it until it contains a whole block.
+ // Or if the user's buffer contains less than a whole block, append as much as possible to the extra block.
+ if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
+ // Compute how many bytes to copy from user buffer into extra block
+ uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+ if (len < copy_len)
+ copy_len = len;
+
+ if (copy_len) {
+ // Copy and update relevant pointers and counters
+ memcpy_fixedlen(&ctx->partial_block_buffer
+ [ctx->partial_block_buffer_length], buffer, copy_len);
+
+ ctx->partial_block_buffer_length += copy_len;
+ ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+ ctx->incoming_buffer_length = len - copy_len;
+ }
+ // The extra block should never contain more than 1 block here
+ assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
+
+ // If the extra block buffer contains exactly 1 block, it can be hashed.
+ if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
+ ctx->partial_block_buffer_length = 0;
+
+ ctx->job.buffer = ctx->partial_block_buffer;
+ ctx->job.len = 1;
+
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job);
+ }
+ }
+
+ return sha1_ctx_mgr_resubmit(mgr, ctx);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_flush_asimd(SHA1_HASH_CTX_MGR * mgr)
+{
+ SHA1_HASH_CTX *ctx;
+
+ while (1) {
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_flush_asimd(&mgr->mgr);
+
+ // If flush returned 0, there are no more jobs in flight.
+ if (!ctx)
+ return NULL;
+
+ // If flush returned a job, verify that it is safe to return to the user.
+ // If it is not ready, resubmit the job to finish processing.
+ ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
+
+ // If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
+ if (ctx)
+ return ctx;
+
+ // Otherwise, all jobs currently being managed by the SHA1_HASH_CTX_MGR still need processing. Loop.
+ }
+}
+
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx)
+{
+ while (ctx) {
+
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
+ ctx->status = HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit
+ return ctx;
+ }
+ // If the extra blocks are empty, begin hashing what remains in the user's buffer.
+ if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
+ const void *buffer = ctx->incoming_buffer;
+ uint32_t len = ctx->incoming_buffer_length;
+
+ // Only entire blocks can be hashed. Copy remainder to extra blocks buffer.
+ uint32_t copy_len = len & (SHA1_BLOCK_SIZE - 1);
+
+ if (copy_len) {
+ len -= copy_len;
+ memcpy_fixedlen(ctx->partial_block_buffer,
+ ((const char *)buffer + len), copy_len);
+ ctx->partial_block_buffer_length = copy_len;
+ }
+
+ ctx->incoming_buffer_length = 0;
+
+ // len should be a multiple of the block size now
+ assert((len % SHA1_BLOCK_SIZE) == 0);
+
+ // Set len to the number of blocks to be hashed in the user's buffer
+ len >>= SHA1_LOG2_BLOCK_SIZE;
+
+ if (len) {
+ ctx->job.buffer = (uint8_t *) buffer;
+ ctx->job.len = len;
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr,
+ &ctx->job);
+ continue;
+ }
+ }
+ // If the extra blocks are not empty, then we are either on the last block(s)
+ // or we need more user input before continuing.
+ if (ctx->status & HASH_CTX_STS_LAST) {
+ uint8_t *buf = ctx->partial_block_buffer;
+ uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
+
+ ctx->status =
+ (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE);
+ ctx->job.buffer = buf;
+ ctx->job.len = (uint32_t) n_extra_blocks;
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job);
+ continue;
+ }
+
+ if (ctx)
+ ctx->status = HASH_CTX_STS_IDLE;
+ return ctx;
+ }
+
+ return NULL;
+}
+
+static inline void hash_init_digest(SHA1_WORD_T * digest)
+{
+ static const SHA1_WORD_T hash_initial_digest[SHA1_DIGEST_NWORDS] =
+ { SHA1_INITIAL_DIGEST };
+ memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
+}
+
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len)
+{
+ uint32_t i = (uint32_t) (total_len & (SHA1_BLOCK_SIZE - 1));
+
+ memclr_fixedlen(&padblock[i], SHA1_BLOCK_SIZE);
+ padblock[i] = 0x80;
+
+ // Move i to the end of either 1st or 2nd extra block depending on length
+ i += ((SHA1_BLOCK_SIZE - 1) & (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + 1 +
+ SHA1_PADLENGTHFIELD_SIZE;
+
+#if SHA1_PADLENGTHFIELD_SIZE == 16
+ *((uint64_t *) & padblock[i - 16]) = 0;
+#endif
+
+ *((uint64_t *) & padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
+
+ return i >> SHA1_LOG2_BLOCK_SIZE; // Number of extra blocks to hash
+}
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+struct slver sha1_ctx_mgr_init_asimd_slver_02020142;
+struct slver sha1_ctx_mgr_init_asimd_slver = { 0x0142, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_submit_asimd_slver_02020143;
+struct slver sha1_ctx_mgr_submit_asimd_slver = { 0x0143, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_flush_asimd_slver_02020144;
+struct slver sha1_ctx_mgr_flush_asimd_slver = { 0x0144, 0x02, 0x02 };
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c
new file mode 100644
index 000000000..e40a344ff
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c
@@ -0,0 +1,250 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <string.h>
+#include "sha1_mb.h"
+#include "memcpy_inline.h"
+#include "endian_helper.h"
+void sha1_mb_mgr_init_ce(SHA1_MB_JOB_MGR * state);
+SHA1_JOB *sha1_mb_mgr_submit_ce(SHA1_MB_JOB_MGR * state, SHA1_JOB * job);
+SHA1_JOB *sha1_mb_mgr_flush_ce(SHA1_MB_JOB_MGR * state);
+static inline void hash_init_digest(SHA1_WORD_T * digest);
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len);
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx);
+
+void sha1_ctx_mgr_init_ce(SHA1_HASH_CTX_MGR * mgr)
+{
+ sha1_mb_mgr_init_ce(&mgr->mgr);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_submit_ce(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx,
+ const void *buffer, uint32_t len, HASH_CTX_FLAG flags)
+{
+ if (flags & (~HASH_ENTIRE)) {
+ // User should not pass anything other than FIRST, UPDATE, or LAST
+ ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+ return ctx;
+ }
+
+ if (ctx->status & HASH_CTX_STS_PROCESSING) {
+ // Cannot submit to a currently processing job.
+ ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+ return ctx;
+ }
+
+ if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+ // Cannot update a finished job.
+ ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+ return ctx;
+ }
+
+ if (flags & HASH_FIRST) {
+ // Init digest
+ hash_init_digest(ctx->job.result_digest);
+
+ // Reset byte counter
+ ctx->total_length = 0;
+
+ // Clear extra blocks
+ ctx->partial_block_buffer_length = 0;
+ }
+ // If we made it here, there were no errors during this call to submit
+ ctx->error = HASH_CTX_ERROR_NONE;
+
+ // Store buffer ptr info from user
+ ctx->incoming_buffer = buffer;
+ ctx->incoming_buffer_length = len;
+
+ // Store the user's request flags and mark this ctx as currently being processed.
+ ctx->status = (flags & HASH_LAST) ?
+ (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+ HASH_CTX_STS_PROCESSING;
+
+ // Advance byte counter
+ ctx->total_length += len;
+
+ // If there is anything currently buffered in the extra blocks, append to it until it contains a whole block.
+ // Or if the user's buffer contains less than a whole block, append as much as possible to the extra block.
+ if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
+ // Compute how many bytes to copy from user buffer into extra block
+ uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+ if (len < copy_len)
+ copy_len = len;
+
+ if (copy_len) {
+ // Copy and update relevant pointers and counters
+ memcpy_fixedlen(&ctx->partial_block_buffer
+ [ctx->partial_block_buffer_length], buffer, copy_len);
+
+ ctx->partial_block_buffer_length += copy_len;
+ ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+ ctx->incoming_buffer_length = len - copy_len;
+ }
+ // The extra block should never contain more than 1 block here
+ assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
+
+ // If the extra block buffer contains exactly 1 block, it can be hashed.
+ if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
+ ctx->partial_block_buffer_length = 0;
+
+ ctx->job.buffer = ctx->partial_block_buffer;
+ ctx->job.len = 1;
+
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, &ctx->job);
+ }
+ }
+
+ return sha1_ctx_mgr_resubmit(mgr, ctx);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_flush_ce(SHA1_HASH_CTX_MGR * mgr)
+{
+ SHA1_HASH_CTX *ctx;
+
+ while (1) {
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_flush_ce(&mgr->mgr);
+
+ // If flush returned 0, there are no more jobs in flight.
+ if (!ctx)
+ return NULL;
+
+ // If flush returned a job, verify that it is safe to return to the user.
+ // If it is not ready, resubmit the job to finish processing.
+ ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
+
+ // If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
+ if (ctx)
+ return ctx;
+
+ // Otherwise, all jobs currently being managed by the SHA1_HASH_CTX_MGR still need processing. Loop.
+ }
+}
+
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx)
+{
+ while (ctx) {
+
+ if (ctx->status & HASH_CTX_STS_COMPLETE) {
+ ctx->status = HASH_CTX_STS_COMPLETE; // Clear PROCESSING bit
+ return ctx;
+ }
+ // If the extra blocks are empty, begin hashing what remains in the user's buffer.
+ if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
+ const void *buffer = ctx->incoming_buffer;
+ uint32_t len = ctx->incoming_buffer_length;
+
+ // Only entire blocks can be hashed. Copy remainder to extra blocks buffer.
+ uint32_t copy_len = len & (SHA1_BLOCK_SIZE - 1);
+
+ if (copy_len) {
+ len -= copy_len;
+ memcpy_fixedlen(ctx->partial_block_buffer,
+ ((const char *)buffer + len), copy_len);
+ ctx->partial_block_buffer_length = copy_len;
+ }
+
+ ctx->incoming_buffer_length = 0;
+
+ // len should be a multiple of the block size now
+ assert((len % SHA1_BLOCK_SIZE) == 0);
+
+ // Set len to the number of blocks to be hashed in the user's buffer
+ len >>= SHA1_LOG2_BLOCK_SIZE;
+
+ if (len) {
+ ctx->job.buffer = (uint8_t *) buffer;
+ ctx->job.len = len;
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr,
+ &ctx->job);
+ continue;
+ }
+ }
+ // If the extra blocks are not empty, then we are either on the last block(s)
+ // or we need more user input before continuing.
+ if (ctx->status & HASH_CTX_STS_LAST) {
+ uint8_t *buf = ctx->partial_block_buffer;
+ uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
+
+ ctx->status =
+ (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE);
+ ctx->job.buffer = buf;
+ ctx->job.len = (uint32_t) n_extra_blocks;
+ ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, &ctx->job);
+ continue;
+ }
+
+ if (ctx)
+ ctx->status = HASH_CTX_STS_IDLE;
+ return ctx;
+ }
+
+ return NULL;
+}
+
+static inline void hash_init_digest(SHA1_WORD_T * digest)
+{
+ static const SHA1_WORD_T hash_initial_digest[SHA1_DIGEST_NWORDS] =
+ { SHA1_INITIAL_DIGEST };
+ memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
+}
+
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len)
+{
+ uint32_t i = (uint32_t) (total_len & (SHA1_BLOCK_SIZE - 1));
+
+ memclr_fixedlen(&padblock[i], SHA1_BLOCK_SIZE);
+ padblock[i] = 0x80;
+
+ // Move i to the end of either 1st or 2nd extra block depending on length
+ i += ((SHA1_BLOCK_SIZE - 1) & (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + 1 +
+ SHA1_PADLENGTHFIELD_SIZE;
+
+#if SHA1_PADLENGTHFIELD_SIZE == 16
+ *((uint64_t *) & padblock[i - 16]) = 0;
+#endif
+
+ *((uint64_t *) & padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
+
+ return i >> SHA1_LOG2_BLOCK_SIZE; // Number of extra blocks to hash
+}
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+struct slver sha1_ctx_mgr_init_ce_slver_02020142;
+struct slver sha1_ctx_mgr_init_ce_slver = { 0x0142, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_submit_ce_slver_02020143;
+struct slver sha1_ctx_mgr_submit_ce_slver = { 0x0143, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_flush_ce_slver_02020144;
+struct slver sha1_ctx_mgr_flush_ce_slver = { 0x0144, 0x02, 0x02 };
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c
new file mode 100644
index 000000000..0942c1a95
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c
@@ -0,0 +1,93 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_submit)
+{
+
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(sha1_ctx_mgr_submit_ce);
+
+ if (auxval & HWCAP_ASIMD) {
+ switch (get_micro_arch_id()) {
+ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): // fall through
+ case MICRO_ARCH_ID(ARM, CORTEX_A57): // fall through
+ case MICRO_ARCH_ID(ARM, CORTEX_A72): // fall through
+ return PROVIDER_INFO(sha1_ctx_mgr_submit_asimd);
+ default:
+ break;
+ }
+ }
+
+ return PROVIDER_BASIC(sha1_ctx_mgr_submit);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_init)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(sha1_ctx_mgr_init_ce);
+
+ if (auxval & HWCAP_ASIMD) {
+ switch (get_micro_arch_id()) {
+ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): // fall through
+ case MICRO_ARCH_ID(ARM, CORTEX_A57): // fall through
+ case MICRO_ARCH_ID(ARM, CORTEX_A72): // fall through
+ return PROVIDER_INFO(sha1_ctx_mgr_init_asimd);
+ default:
+ break;
+ }
+ }
+
+ return PROVIDER_BASIC(sha1_ctx_mgr_init);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_flush)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(sha1_ctx_mgr_flush_ce);
+
+ if (auxval & HWCAP_ASIMD) {
+ switch (get_micro_arch_id()) {
+ case MICRO_ARCH_ID(ARM, NEOVERSE_N1): // fall through
+ case MICRO_ARCH_ID(ARM, CORTEX_A57): // fall through
+ case MICRO_ARCH_ID(ARM, CORTEX_A72): // fall through
+ return PROVIDER_INFO(sha1_ctx_mgr_flush_asimd);
+ default:
+ break;
+ }
+ }
+
+ return PROVIDER_BASIC(sha1_ctx_mgr_flush);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S
new file mode 100644
index 000000000..012b15c14
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S
@@ -0,0 +1,192 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+#include "sha1_asimd_common.S"
+
+.macro internal_load windex
+ // load 64-bytes from each address to maximize usage of cache line
+ .if \windex == 0
+ mov tmp,dataptr
+ ld1 {WORD0.4s},[data0],16
+ ld1 {WORD4.4s},[data0],16
+ ld1 {WORD8.4s},[data0],16
+ ld1 {WORD12.4s},[data0],16
+
+ ld1 {WORD1.4s},[data1],16
+ ld1 {WORD5.4s},[data1],16
+ ld1 {WORD9.4s},[data1],16
+ ld1 {WORD13.4s},[data1],16
+
+ ld1 {WORD2.4s},[data2],16
+ ld1 {WORD6.4s},[data2],16
+ ld1 {WORD10.4s},[data2],16
+ ld1 {WORD14.4s},[data2],16
+
+ ld1 {WORD3.4s},[data3],16
+ ld1 {WORD7.4s},[data3],16
+ ld1 {WORD11.4s},[data3],16
+ ld1 {WORD15.4s},[data3],16
+
+ st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[0],[tmp],16
+ st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[1],[tmp],16
+ st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[2],[tmp],16
+ st4 {WORD0.s,WORD1.s,WORD2.s,WORD3.s}[3],[tmp],16
+ .endif
+
+ .if \windex == 4
+ mov tmp,dataptr
+ st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[0],[tmp],16
+ st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[1],[tmp],16
+ st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[2],[tmp],16
+ st4 {WORD4.s,WORD5.s,WORD6.s,WORD7.s}[3],[tmp],16
+ .endif
+
+ .if \windex == 8
+ mov tmp,dataptr
+ st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[0],[tmp],16
+ st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[1],[tmp],16
+ st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[2],[tmp],16
+ st4 {WORD8.s,WORD9.s,WORD10.s,WORD11.s}[3],[tmp],16
+ .endif
+
+ .if \windex == 12
+ mov tmp,dataptr
+ st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[0],[tmp],16
+ st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[1],[tmp],16
+ st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[2],[tmp],16
+ st4 {WORD12.s,WORD13.s,WORD14.s,WORD15.s}[3],[tmp],16
+ .endif
+.endm
+
+.macro load_x4_word idx:req
+ internal_load \idx
+ ld1 {WORD\idx\().16b},[dataptr],16
+.endm
+
+/*
+ * void sha1_mb_asimd_x4(SHA1_JOB *j0, SHA1_JOB*j1, SHA1_JOB*j2, SHA1_JOB *j3, int blocks)
+ */
+ job0 .req x0
+ job1 .req x1
+ job2 .req x2
+ job3 .req x3
+ num_blocks .req w4
+ tmp .req x5
+ data0 .req x6
+ data1 .req x7
+ data2 .req x8
+ data3 .req x9
+ databuf .req x10
+ dataptr .req x11
+ savedsp .req x12
+
+ .global sha1_mb_asimd_x4
+ .type sha1_mb_asimd_x4, %function
+sha1_mb_asimd_x4:
+ cmp num_blocks, #0
+ beq .return
+ sha1_asimd_save_stack
+ mov savedsp,sp
+ sub databuf,sp,256
+ mov tmp,63
+ bic databuf,databuf,tmp
+ mov sp,databuf
+
+ add tmp,job0,64
+ ld4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16
+ ld1 {VE.s}[0],[tmp]
+ ldr data0,[job0]
+
+ add tmp,job1,64
+ ld4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16
+ ld1 {VE.s}[1],[tmp]
+ ldr data1,[job1]
+
+ add tmp,job2,64
+ ld4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16
+ ld1 {VE.s}[2],[tmp]
+ ldr data2,[job2]
+
+ add tmp,job3,64
+ ld4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16
+ ld1 {VE.s}[3],[tmp]
+ ldr data3,[job3]
+
+.block_loop:
+ mov dataptr,databuf
+ sha1_single
+ subs num_blocks, num_blocks, 1
+ bne .block_loop
+
+ add tmp,job0,64
+ st4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16
+ st1 {VE.s}[0],[tmp]
+
+ add tmp,job1,64
+ st4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16
+ st1 {VE.s}[1],[tmp]
+
+ add tmp,job2,64
+ st4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16
+ st1 {VE.s}[2],[tmp]
+
+ add tmp,job3,64
+ st4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16
+ st1 {VE.s}[3],[tmp]
+
+ mov sp,savedsp
+ sha1_asimd_restore_stack
+.return:
+ ret
+
+ .size sha1_mb_asimd_x4, .-sha1_mb_asimd_x4
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c
new file mode 100644
index 000000000..4b34e7b53
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c
@@ -0,0 +1,217 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stddef.h>
+#include <sha1_mb.h>
+#include <assert.h>
+#include "endian_helper.h"
+
+extern void sha1_aarch64_x1(const uint8_t * data, int num_blocks, uint32_t digest[]);
+static inline void sha1_job_x1(SHA1_JOB * job, int blocks)
+{
+ sha1_aarch64_x1(job->buffer, blocks, job->result_digest);
+}
+
+#ifndef min
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#define SHA1_MB_ASIMD_MAX_LANES 4
+void sha1_mb_asimd_x4(SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, int);
+
+#define LANE_IS_NOT_FINISHED(state,i) \
+ (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL)
+#define LANE_IS_FINISHED(state,i) \
+ (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL)
+#define LANE_IS_FREE(state,i) \
+ (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL)
+#define LANE_IS_INVALID(state,i) \
+ (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL)
+
+void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state)
+{
+ unsigned int i;
+
+ state->unused_lanes = 0xf;
+ state->num_lanes_inuse = 0;
+ for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) {
+ state->unused_lanes <<= 4;
+ state->unused_lanes |= SHA1_MB_ASIMD_MAX_LANES - 1 - i;
+ state->lens[i] = i;
+ state->ldata[i].job_in_lane = 0;
+ }
+
+ // lanes > SHA1_MB_ASIMD_MAX_LANES is invalid lane
+ for (; i < SHA1_MAX_LANES; i++) {
+ state->lens[i] = 0xf;
+ state->ldata[i].job_in_lane = 0;
+ }
+}
+
+static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state)
+{
+ int lane_idx, len, i, lanes, blocks;
+ int lane_idx_array[SHA1_MAX_LANES];
+
+ if (state->num_lanes_inuse == 0) {
+ return -1;
+ }
+ lanes = 0, len = 0;
+ for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) {
+ if (LANE_IS_NOT_FINISHED(state, i)) {
+ if (lanes)
+ len = min(len, state->lens[i]);
+ else
+ len = state->lens[i];
+ lane_idx_array[lanes] = i;
+ lanes++;
+ }
+ }
+
+ if (lanes == 0)
+ return -1;
+ lane_idx = len & 0xf;
+ len = len & (~0xf);
+ blocks = len >> 4;
+
+ /* for less-than-3-lane job, ASIMD really does not have much advantage
+ * compared to scalar due to wasted >= 50% capacity
+ * therefore we only run ASIMD for 3/4 lanes of data
+ */
+ if (lanes == SHA1_MB_ASIMD_MAX_LANES) {
+ sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane,
+ state->ldata[lane_idx_array[1]].job_in_lane,
+ state->ldata[lane_idx_array[2]].job_in_lane,
+ state->ldata[lane_idx_array[3]].job_in_lane, blocks);
+ } else if (lanes == 3) {
+ /* in case of 3 lanes, apparently ASIMD will still operate as if
+ * there were four lanes of data in processing (waste 25% capacity)
+ * theoretically we can let ASIMD implementation know the number of lanes
+ * so that it could "at least" save some memory loading time
+ * but in practice, we can just pass lane 0 as dummy for similar
+ * cache performance
+ */
+ SHA1_JOB dummy;
+ dummy.buffer = state->ldata[lane_idx_array[0]].job_in_lane->buffer;
+ dummy.len = state->ldata[lane_idx_array[0]].job_in_lane->len;
+ sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane,
+ &dummy,
+ state->ldata[lane_idx_array[1]].job_in_lane,
+ state->ldata[lane_idx_array[2]].job_in_lane, blocks);
+ } else {
+ sha1_job_x1(state->ldata[lane_idx_array[0]].job_in_lane, blocks);
+ if (lanes >= 2) {
+ sha1_job_x1(state->ldata[lane_idx_array[1]].job_in_lane, blocks);
+ }
+ }
+
+ // only return the min length job
+ for (i = 0; i < SHA1_MAX_LANES; i++) {
+ if (LANE_IS_NOT_FINISHED(state, i)) {
+ state->lens[i] -= len;
+ state->ldata[i].job_in_lane->len -= len;
+ state->ldata[i].job_in_lane->buffer += len << 2;
+ }
+ }
+ return lane_idx;
+
+}
+
+static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state)
+{
+ int i;
+ SHA1_JOB *ret = NULL;
+
+ for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) {
+ if (LANE_IS_FINISHED(state, i)) {
+ state->unused_lanes <<= 4;
+ state->unused_lanes |= i;
+ state->num_lanes_inuse--;
+ ret = state->ldata[i].job_in_lane;
+ ret->status = STS_COMPLETED;
+ state->ldata[i].job_in_lane = NULL;
+ break;
+ }
+ }
+ return ret;
+}
+
+static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+ int lane_idx;
+ // add job into lanes
+ lane_idx = state->unused_lanes & 0xf;
+ // fatal error
+ assert(lane_idx < SHA1_MB_ASIMD_MAX_LANES);
+ state->lens[lane_idx] = (job->len << 4) | lane_idx;
+ state->ldata[lane_idx].job_in_lane = job;
+ state->unused_lanes >>= 4;
+ state->num_lanes_inuse++;
+}
+
+SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+#ifndef NDEBUG
+ int lane_idx;
+#endif
+ SHA1_JOB *ret;
+
+ // add job into lanes
+ sha1_mb_mgr_insert_job(state, job);
+
+ ret = sha1_mb_mgr_free_lane(state);
+ if (ret != NULL) {
+ return ret;
+ }
+ // submit will wait all lane has data
+ if (state->num_lanes_inuse < SHA1_MB_ASIMD_MAX_LANES)
+ return NULL;
+#ifndef NDEBUG
+ lane_idx = sha1_mb_mgr_do_jobs(state);
+ assert(lane_idx != -1);
+#else
+ sha1_mb_mgr_do_jobs(state);
+#endif
+
+ // ~ i = lane_idx;
+ ret = sha1_mb_mgr_free_lane(state);
+ return ret;
+}
+
+SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state)
+{
+ SHA1_JOB *ret;
+ ret = sha1_mb_mgr_free_lane(state);
+ if (ret) {
+ return ret;
+ }
+
+ sha1_mb_mgr_do_jobs(state);
+ return sha1_mb_mgr_free_lane(state);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c
new file mode 100644
index 000000000..1dfd67d0c
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c
@@ -0,0 +1,208 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stddef.h>
+#include <sha1_mb.h>
+#include <assert.h>
+
+#ifndef max
+#define max(a,b) (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a,b) (((a) < (b)) ? (a) : (b))
+#endif
+
+#define SHA1_MB_CE_MAX_LANES 2
+#if SHA1_MB_CE_MAX_LANES >=2
+void sha1_mb_ce_x2(SHA1_JOB *, SHA1_JOB *, int);
+#endif
+void sha1_mb_ce_x1(SHA1_JOB *, int);
+
+#define LANE_IS_NOT_FINISHED(state,i) \
+ (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL)
+#define LANE_IS_FINISHED(state,i) \
+ (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL)
+#define LANE_IS_FREE(state,i) \
+ (((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL)
+#define LANE_IS_INVALID(state,i) \
+ (((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL)
+void sha1_mb_mgr_init_ce(SHA1_MB_JOB_MGR * state)
+{
+ unsigned int i;
+
+ state->unused_lanes = 0xf;
+ state->num_lanes_inuse = 0;
+ for (i = 0; i < SHA1_MB_CE_MAX_LANES; i++) {
+ state->unused_lanes <<= 4;
+ state->unused_lanes |= i;
+ state->lens[i] = i;
+ state->ldata[i].job_in_lane = 0;
+ }
+
+ //lanes > SHA1_MB_CE_MAX_LANES is invalid lane
+ for (; i < SHA1_MAX_LANES; i++) {
+ state->lens[i] = 0xf;
+ state->ldata[i].job_in_lane = 0;
+ }
+}
+
+static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state)
+{
+ int lane_idx, len, i, lanes;
+
+ int lane_idx_array[SHA1_MAX_LANES];
+
+ if (state->num_lanes_inuse == 0) {
+ return -1;
+ }
+#if SHA1_MB_CE_MAX_LANES == 2
+ if (state->num_lanes_inuse == 2) {
+ len = min(state->lens[0], state->lens[1]);
+ lane_idx = len & 0xf;
+ len &= ~0xf;
+
+ sha1_mb_ce_x2(state->ldata[0].job_in_lane,
+ state->ldata[1].job_in_lane, len >> 4);
+
+ } else
+#endif
+ {
+ lanes = 0, len = 0;
+ for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) {
+ if (LANE_IS_NOT_FINISHED(state, i)) {
+ if (lanes)
+ len = min(len, state->lens[i]);
+ else
+ len = state->lens[i];
+ lane_idx_array[lanes] = i;
+ lanes++;
+ }
+ }
+ if (lanes == 0)
+ return -1;
+ lane_idx = len & 0xf;
+ len = len & (~0xf);
+
+#if SHA1_MB_CE_MAX_LANES >=2
+ if (lanes == 2) {
+ sha1_mb_ce_x2(state->ldata[lane_idx_array[0]].job_in_lane,
+ state->ldata[lane_idx_array[1]].job_in_lane, len >> 4);
+ } else
+#endif
+ {
+ sha1_mb_ce_x1(state->ldata[lane_idx_array[0]].job_in_lane, len >> 4);
+ }
+ }
+ //only return the min length job
+ for (i = 0; i < SHA1_MAX_LANES; i++) {
+ if (LANE_IS_NOT_FINISHED(state, i)) {
+ state->lens[i] -= len;
+ state->ldata[i].job_in_lane->len -= len;
+ state->ldata[i].job_in_lane->buffer += len << 2;
+ }
+ }
+
+ return lane_idx;
+
+}
+
+static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state)
+{
+ int i;
+ SHA1_JOB *ret = NULL;
+
+ for (i = 0; i < SHA1_MB_CE_MAX_LANES; i++) {
+ if (LANE_IS_FINISHED(state, i)) {
+
+ state->unused_lanes <<= 4;
+ state->unused_lanes |= i;
+ state->num_lanes_inuse--;
+ ret = state->ldata[i].job_in_lane;
+ ret->status = STS_COMPLETED;
+ state->ldata[i].job_in_lane = NULL;
+ break;
+ }
+ }
+ return ret;
+}
+
+static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+ int lane_idx;
+ //add job into lanes
+ lane_idx = state->unused_lanes & 0xf;
+ //fatal error
+ assert(lane_idx < SHA1_MB_CE_MAX_LANES);
+ state->lens[lane_idx] = (job->len << 4) | lane_idx;
+ state->ldata[lane_idx].job_in_lane = job;
+ state->unused_lanes >>= 4;
+ state->num_lanes_inuse++;
+}
+
+SHA1_JOB *sha1_mb_mgr_submit_ce(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+#ifndef NDEBUG
+ int lane_idx;
+#endif
+ SHA1_JOB *ret;
+
+ //add job into lanes
+ sha1_mb_mgr_insert_job(state, job);
+
+ ret = sha1_mb_mgr_free_lane(state);
+ if (ret != NULL) {
+ return ret;
+ }
+ //submit will wait all lane has data
+ if (state->num_lanes_inuse < SHA1_MB_CE_MAX_LANES)
+ return NULL;
+#ifndef NDEBUG
+ lane_idx = sha1_mb_mgr_do_jobs(state);
+ assert(lane_idx != -1);
+#else
+ sha1_mb_mgr_do_jobs(state);
+#endif
+
+ //~ i = lane_idx;
+ ret = sha1_mb_mgr_free_lane(state);
+ return ret;
+}
+
+SHA1_JOB *sha1_mb_mgr_flush_ce(SHA1_MB_JOB_MGR * state)
+{
+ SHA1_JOB *ret;
+ ret = sha1_mb_mgr_free_lane(state);
+ if (ret) {
+ return ret;
+ }
+
+ sha1_mb_mgr_do_jobs(state);
+ return sha1_mb_mgr_free_lane(state);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S
new file mode 100644
index 000000000..bb1929d76
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S
@@ -0,0 +1,36 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface sha1_ctx_mgr_submit
+mbin_interface sha1_ctx_mgr_init
+mbin_interface sha1_ctx_mgr_flush
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S
new file mode 100644
index 000000000..22f736793
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S
@@ -0,0 +1,194 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 2
+ .p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+.endm
+
+/**
+maros for round 4-67
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req,k:req
+ sha1h \e0\()_s, \abcd\()_s
+ \inst \abcd\()_q,\e1\()_s,\tmp1\()_v.4s
+ add \tmp1\()_v.4s,\msg3\()_v.4s,\k\()_v.4s
+ sha1su1 \msg0\()_v.4s,\msg3\()_v.4s
+ sha1su0 \msg1\()_v.4s,\msg2\()_v.4s,\msg3\()_v.4s
+.endm
+
+
+/*
+Variable list
+*/
+
+ declare_var_vector_reg key_0,28
+ declare_var_vector_reg key_1,29
+ declare_var_vector_reg key_2,30
+ declare_var_vector_reg key_3,31
+
+
+/*
+digest variables
+*/
+ declare_var_vector_reg abcd,0
+ declare_var_vector_reg e0,1
+ declare_var_vector_reg e1,2
+ declare_var_vector_reg abcd_saved,3
+ declare_var_vector_reg e0_saved,4
+/*
+Message variables
+*/
+ declare_var_vector_reg msg_0,16
+ declare_var_vector_reg msg_1,17
+ declare_var_vector_reg msg_2,18
+ declare_var_vector_reg msg_3,19
+/*
+Temporay variables
+*/
+ declare_var_vector_reg tmp_0,5
+ declare_var_vector_reg tmp_1,6
+
+/*
+ void sha1_mb_ce_x1(SHA1_JOB * job, int len);
+*/
+/*
+Arguements list
+*/
+ job .req x0
+ len .req w1
+ data .req x2
+ tmp .req x3
+ .global sha1_mb_ce_x1
+ .type sha1_mb_ce_x1, %function
+sha1_mb_ce_x1:
+ ldr data, [job]
+ ldr abcd_q, [job, 64]
+ ldr e0_s, [job, 80]
+ adr tmp, KEY
+ ld1 {key_0_v.4s-key_3_v.4s},[tmp]
+
+start_loop:
+
+ //load msgs
+ ld1 {msg_0_v.4s-msg_3_v.4s},[data]
+
+ //adjust loop parameter
+ add data,data,64
+ sub len, len, #1
+ cmp len, 0
+ //backup digest
+ mov abcd_saved_v.16b,abcd_v.16b
+ mov e0_saved_v.16b,e0_v.16b
+
+ rev32 msg_0_v.16b,msg_0_v.16b
+ rev32 msg_1_v.16b,msg_1_v.16b
+ add tmp_0_v.4s,msg_0_v.4s,key_0_v.4s
+ rev32 msg_2_v.16b,msg_2_v.16b
+ add tmp_1_v.4s,msg_1_v.4s,key_0_v.4s
+ rev32 msg_3_v.16b,msg_3_v.16b
+
+ /* rounds 0-3 */
+ sha1h e1_s,abcd_s
+ sha1c abcd_q,e0_s,tmp_0_v.4s
+ add tmp_0_v.4s,msg_2_v.4s,key_0_v.4s
+ sha1su0 msg_0_v.4s,msg_1_v.4s,msg_2_v.4s
+
+ sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_0 /* rounds 4-7 */
+ sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_0
+ sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 12-15 */
+ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_1
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 20-23 */
+ sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_1
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_2 /* rounds 36-39 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_2
+ sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_2
+ sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+ sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_3 /* rounds 52-55 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_3
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_3
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_3
+
+ /* rounds 68-71 */
+ sha1h e0_s,abcd_s
+ sha1p abcd_q,e1_s,tmp_1_v.4s
+ add tmp_1_v.4s,msg_3_v.4s,key_3_v.4s
+ sha1su1 msg_0_v.4s,msg_3_v.4s
+
+ /* rounds 72-75 */
+ sha1h e1_s,abcd_s
+ sha1p abcd_q,e0_s,tmp_0_v.4s
+
+ /* rounds 76-79 */
+ sha1h e0_s,abcd_s
+ sha1p abcd_q,e1_s,tmp_1_v.4s
+
+
+
+ add abcd_v.4s,abcd_v.4s,abcd_saved_v.4s
+ add e0_v.2s,e0_v.2s,e0_saved_v.2s
+
+
+ bgt start_loop
+ str abcd_q, [job, 64]
+ str e0_s, [job, 80]
+
+ ret
+
+ .size sha1_mb_ce_x1, .-sha1_mb_ce_x1
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+KEY:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S
new file mode 100644
index 000000000..93f653ad2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S
@@ -0,0 +1,253 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 2
+ .p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+.endm
+
+/**
+maros for round 4-67
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req,k:req
+ sha1h l0_\e0\()_s, l0_\abcd\()_s
+ sha1h l1_\e0\()_s, l1_\abcd\()_s
+
+ \inst l0_\abcd\()_q,l0_\e1\()_s,l0_\tmp1\()_v.4s
+ \inst l1_\abcd\()_q,l1_\e1\()_s,l1_\tmp1\()_v.4s
+
+ add l0_\tmp1\()_v.4s,l0_\msg3\()_v.4s,\k\()_v.4s
+ add l1_\tmp1\()_v.4s,l1_\msg3\()_v.4s,\k\()_v.4s
+
+ sha1su1 l0_\msg0\()_v.4s,l0_\msg3\()_v.4s
+ sha1su1 l1_\msg0\()_v.4s,l1_\msg3\()_v.4s
+
+ sha1su0 l0_\msg1\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s
+ sha1su0 l1_\msg1\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s
+.endm
+
+
+/*
+Variable list
+*/
+
+ declare_var_vector_reg key_0,28
+ declare_var_vector_reg key_1,29
+ declare_var_vector_reg key_2,30
+ declare_var_vector_reg key_3,31
+
+
+/*
+lane variables
+*/
+ declare_var_vector_reg l0_abcd,0
+ declare_var_vector_reg l0_e0,1
+ declare_var_vector_reg l0_e1,2
+ declare_var_vector_reg l0_abcd_saved,3
+ declare_var_vector_reg l0_e0_saved,4
+ declare_var_vector_reg l0_tmp_0,5
+ declare_var_vector_reg l0_tmp_1,6
+ declare_var_vector_reg l0_msg_0,16
+ declare_var_vector_reg l0_msg_1,17
+ declare_var_vector_reg l0_msg_2,18
+ declare_var_vector_reg l0_msg_3,19
+
+ declare_var_vector_reg l1_abcd,7
+ declare_var_vector_reg l1_e0,8
+ declare_var_vector_reg l1_e1,9
+ declare_var_vector_reg l1_abcd_saved,24
+ declare_var_vector_reg l1_e0_saved,25
+ declare_var_vector_reg l1_tmp_0,26
+ declare_var_vector_reg l1_tmp_1,27
+ declare_var_vector_reg l1_msg_0,20
+ declare_var_vector_reg l1_msg_1,21
+ declare_var_vector_reg l1_msg_2,22
+ declare_var_vector_reg l1_msg_3,23
+
+/*
+ void sha1_mb_ce_x2(SHA1_JOB * job_0, SHA1_JOB * job_1,int len);
+*/
+ l0_job .req x0
+ l1_job .req x1
+ len .req w2
+
+ l0_data .req x3
+ l1_data .req x4
+ tmp .req x5
+ .global sha1_mb_ce_x2
+ .type sha1_mb_ce_x2, %function
+sha1_mb_ce_x2:
+ //push d8,d9 to stack
+ stp d8, d9, [sp, -256]!
+
+ adr tmp, KEY
+ ld1 {key_0_v.4s-key_3_v.4s},[tmp]
+ ldr l0_data, [l0_job]
+ ldr l1_data, [l1_job]
+ ldr l0_abcd_q, [l0_job, 64]
+ ldr l0_e0_s, [l0_job, 80]
+ ldr l1_abcd_q, [l1_job, 64]
+ ldr l1_e0_s, [l1_job, 80]
+
+start_loop:
+
+ //load msgs
+ ld1 {l0_msg_0_v.4s-l0_msg_3_v.4s},[l0_data]
+ ld1 {l1_msg_0_v.4s-l1_msg_3_v.4s},[l1_data]
+
+ //adjust loop parameter
+ add l0_data,l0_data,64
+ add l1_data,l1_data,64
+ sub len, len, #1
+ cmp len, 0
+ //backup digest
+ mov l0_abcd_saved_v.16b, l0_abcd_v.16b
+ mov l0_e0_saved_v.16b, l0_e0_v.16b
+ mov l1_abcd_saved_v.16b, l1_abcd_v.16b
+ mov l1_e0_saved_v.16b, l1_e0_v.16b
+
+ rev32 l0_msg_0_v.16b, l0_msg_0_v.16b
+ rev32 l0_msg_1_v.16b, l0_msg_1_v.16b
+ add l0_tmp_0_v.4s, l0_msg_0_v.4s, key_0_v.4s
+ rev32 l0_msg_2_v.16b, l0_msg_2_v.16b
+ add l0_tmp_1_v.4s, l0_msg_1_v.4s, key_0_v.4s
+ rev32 l0_msg_3_v.16b, l0_msg_3_v.16b
+
+ rev32 l1_msg_0_v.16b, l1_msg_0_v.16b
+ rev32 l1_msg_1_v.16b, l1_msg_1_v.16b
+ add l1_tmp_0_v.4s, l1_msg_0_v.4s, key_0_v.4s
+ rev32 l1_msg_2_v.16b, l1_msg_2_v.16b
+ add l1_tmp_1_v.4s, l1_msg_1_v.4s, key_0_v.4s
+ rev32 l1_msg_3_v.16b, l1_msg_3_v.16b
+
+ /* rounds 0-3 */
+ sha1h l0_e1_s, l0_abcd_s
+ sha1c l0_abcd_q, l0_e0_s, l0_tmp_0_v.4s
+ add l0_tmp_0_v.4s, l0_msg_2_v.4s, key_0_v.4s
+ sha1su0 l0_msg_0_v.4s, l0_msg_1_v.4s, l0_msg_2_v.4s
+
+ sha1h l1_e1_s, l1_abcd_s
+ sha1c l1_abcd_q, l1_e0_s, l1_tmp_0_v.4s
+ add l1_tmp_0_v.4s, l1_msg_2_v.4s, key_0_v.4s
+ sha1su0 l1_msg_0_v.4s, l1_msg_1_v.4s, l1_msg_2_v.4s
+
+ sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_0 /* rounds 4-7 */
+ sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_0
+ sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 12-15 */
+ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_1
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_1 /* rounds 20-23 */
+ sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_1
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_2 /* rounds 36-39 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_2
+ sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_2
+ sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+ sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_3 /* rounds 52-55 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_3
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_3
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_3
+
+ /* rounds 68-71 */
+ sha1h l0_e0_s, l0_abcd_s
+ sha1p l0_abcd_q, l0_e1_s, l0_tmp_1_v.4s
+ add l0_tmp_1_v.4s, l0_msg_3_v.4s, key_3_v.4s
+ sha1su1 l0_msg_0_v.4s, l0_msg_3_v.4s
+
+ sha1h l1_e0_s, l1_abcd_s
+ sha1p l1_abcd_q, l1_e1_s, l1_tmp_1_v.4s
+ add l1_tmp_1_v.4s, l1_msg_3_v.4s, key_3_v.4s
+ sha1su1 l1_msg_0_v.4s, l1_msg_3_v.4s
+
+ /* rounds 72-75 */
+ sha1h l0_e1_s, l0_abcd_s
+ sha1p l0_abcd_q, l0_e0_s, l0_tmp_0_v.4s
+
+ sha1h l1_e1_s, l1_abcd_s
+ sha1p l1_abcd_q, l1_e0_s, l1_tmp_0_v.4s
+
+ /* rounds 76-79 */
+ sha1h l0_e0_s, l0_abcd_s
+ sha1p l0_abcd_q, l0_e1_s, l0_tmp_1_v.4s
+
+ sha1h l1_e0_s, l1_abcd_s
+ sha1p l1_abcd_q, l1_e1_s, l1_tmp_1_v.4s
+
+
+
+ add l0_abcd_v.4s, l0_abcd_v.4s, l0_abcd_saved_v.4s
+ add l0_e0_v.2s, l0_e0_v.2s, l0_e0_saved_v.2s
+ add l1_abcd_v.4s, l1_abcd_v.4s, l1_abcd_saved_v.4s
+ add l1_e0_v.2s, l1_e0_v.2s, l1_e0_saved_v.2s
+
+
+
+
+ bgt start_loop
+
+ str l0_abcd_q, [l0_job, 64]
+ str l0_e0_s, [l0_job, 80]
+
+
+ str l1_abcd_q, [l1_job, 64]
+ str l1_e0_s, [l1_job, 80]
+
+ //pop d8,d9 from stack
+ ldp d8, d9, [sp], 256
+ ret
+
+ .size sha1_mb_ce_x2, .-sha1_mb_ce_x2
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+KEY:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6