11 files changed, 2256 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S
new file mode 100644
index 000000000..55d6f932f
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_aarch64_x1.S
@@ -0,0 +1,294 @@
+/**********************************************************************
+  Copyright(c) 2021 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+	.arch armv8-a
+
+	input_data	.req	x0
+	num_blocks	.req	w1
+	digest	.req	x2
+
+	// x2 is reused intentionally between digest/tmp
+	// due to running out of registers
+	TMP	.req	x2
+	TMPW	.req	w2
+	sha1key_adr	.req	x3
+	WK	.req	w3
+	WF	.req	w4
+	WA	.req w5
+	WB	.req w6
+	WC	.req w7
+	WD	.req w8
+	WE	.req w9
+	WORD0	.req w10
+	WORD1	.req w11
+	WORD2	.req w12
+	WORD3	.req w13
+	WORD4	.req w14
+	WORD5	.req w15
+	WORD6	.req w16
+	WORD7	.req w17
+	WORD8	.req w18
+	WORD9	.req w19
+	WORD10	.req w20
+	WORD11	.req w21
+	WORD12	.req w22
+	WORD13	.req w23
+	WORD14	.req w24
+	WORD15	.req w25
+	AA	.req w26
+	BB	.req w27
+	CC	.req w28
+	DD	.req w29
+	EE	.req w30
+
+	TT	.req w0
+
+.macro save_stack
+	stp	x16,x17,[sp, -128]!
+	stp	x18,x19,[sp, 16]
+	stp	x20,x21,[sp, 32]
+	stp	x22,x23,[sp, 48]
+	stp	x24,x25,[sp, 64]
+	stp	x26,x27,[sp, 80]
+	stp	x28,x29,[sp, 96]
+	str	x30,[sp, 112]
+	// have to reuse x2, which is digest address
+	str	x2,[sp, 120]
+.endm
+
+.macro restore_stack
+	ldp	x18,x19,[sp, 16]
+	ldp	x20,x21,[sp, 32]
+	ldp	x22,x23,[sp, 48]
+	ldp	x24,x25,[sp, 64]
+	ldp	x26,x27,[sp, 80]
+	ldp	x28,x29,[sp, 96]
+	ldr	x30,[sp, 112]
+	ldr	x2,[sp, 120]
+	ldp	x16,x17,[sp],128
+.endm
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+	eor	WF, WC, WD
+	and	WF, WB, WF
+	eor	WF, WD, WF
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+	eor	WF, WB, WC
+	eor	WF, WF, WD
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+	and	TMPW, WB, WC
+	and	WF, WB, WD
+	orr	WF, WF, TMPW
+	and	TMPW, WC, WD
+	orr	WF, WF, TMPW
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+	FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+	.if \windex < 16
+		load_word_at	\windex
+	.endif
+.endm
+
+.macro SHA1_STEP_00_15 windex:req
+	rev	WORD\windex\(),WORD\windex\()
+	next_word=\windex+1
+	load_next_word	%next_word
+
+	ror	TMPW,WA,#32-5
+	add	WE,WE,TMPW
+	add	WE,WE,WK
+	FUNC_F0
+	ror	WB,WB,#32-30
+	add	WE,WE,WORD\windex\()
+	add	WE,WE,WF
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+	eor	TMPW,\reg_14,\reg_8
+	eor	\reg_16,\reg_16,\reg_3
+	eor	\reg_16,\reg_16,TMPW
+
+	ror	TMPW,WA,#32-5
+	ror	\reg_16,\reg_16, #32 - 1
+
+	add	WE,WE,TMPW
+	add	WE,WE,WK
+	\func_f
+	ror WB,WB,#32-30
+	add	WE,WE,\reg_16
+	add	WE,WE,WF
+.endm
+
+.macro SWAP_STATES
+	.unreq TT
+	TT .req WE
+	.unreq WE
+	WE .req WD
+	.unreq WD
+	WD .req WC
+	.unreq WC
+	WC .req WB
+	.unreq WB
+	WB .req WA
+	.unreq WA
+	WA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+	SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+	.if \windex <= 15
+		SHA1_STEP_00_15	windex
+	.else
+		idx14=((\windex - 14) & 15)
+		idx8=((\windex - 8) & 15)
+		idx3=((\windex - 3) & 15)
+		idx16=(\windex & 15)
+		.if \windex <= 19
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+		.endif
+		.if \windex >= 20 && \windex <= 39
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+		.endif
+		.if \windex >= 40 && \windex <= 59
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+		.endif
+		.if \windex >= 60 && \windex <= 79
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+		.endif
+	.endif
+
+	SWAP_STATES
+.endm
+
+.macro exec_steps idx:req,more:vararg
+	exec_step	\idx
+	.ifnb \more
+		exec_steps	\more
+	.endif
+.endm
+
+.altmacro
+
+.macro load_two_words_at idx0:req,idx1:req
+	ldp	WORD\idx0\(),WORD\idx1\(),[input_data],8
+.endm
+
+.macro load_word_at idx:req
+	.if \idx % 2 == 0
+		idx1=\idx+1
+		load_two_words_at	\idx,%idx1
+	.endif
+.endm
+
+/*
+ *  void sha1_aarch64_x1(uint32_t *input_data, int num_blocks, uint32_t digest[5])
+ */
+	.global sha1_aarch64_x1
+	.type sha1_aarch64_x1, %function
+sha1_aarch64_x1:
+	cmp	num_blocks, #0
+	beq	.return
+
+	ldp	WA,WB,[digest]
+	ldp	WC,WD,[digest,8]
+	ldr	WE,[digest,16]
+	save_stack
+
+.block_loop:
+	mov	AA, WA
+	mov	BB, WB
+	mov	CC, WC
+	mov	DD, WD
+	mov	EE, WE
+
+	load_word_at	0
+
+	adr	sha1key_adr, KEY_0
+	ldr	WK, [sha1key_adr]
+	exec_steps	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+	// 20 ~ 39
+	adr	sha1key_adr, KEY_1
+	ldr	WK, [sha1key_adr]
+	exec_steps	20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+	// 40 ~ 59
+	adr	sha1key_adr, KEY_2
+	ldr	WK, [sha1key_adr]
+	exec_steps	40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+	// 60 ~ 79
+	adr	sha1key_adr, KEY_3
+	ldr	WK, [sha1key_adr]
+	exec_steps	60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+	add	WA, AA, WA
+	add	WB, BB, WB
+	add	WC, CC, WC
+	add	WD, DD, WD
+	add	WE, EE, WE
+
+	subs	num_blocks, num_blocks, 1
+	bne	.block_loop
+
+	restore_stack
+	stp	WA,WB,[digest]
+	stp	WC,WD,[digest,8]
+	str	WE,[digest,16]
+
+.return:
+	ret
+
+	.size sha1_aarch64_x1, .-sha1_aarch64_x1
+	.section .rodata.cst16,"aM",@progbits,16
+	.align  16
+KEY_0:
+	.word	0x5a827999
+KEY_1:
+	.word	0x6ed9eba1
+KEY_2:
+	.word	0x8f1bbcdc
+KEY_3:
+	.word	0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S
new file mode 100644
index 000000000..c8b8dd982
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_asimd_common.S
@@ -0,0 +1,269 @@
+/**********************************************************************
+  Copyright(c) 2021 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+	.arch armv8-a
+
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+	eor	VF.16b, VC.16b, VD.16b
+	and	VF.16b, VB.16b, VF.16b
+	eor	VF.16b, VD.16b, VF.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+	eor	VF.16b, VB.16b, VC.16b
+	eor	VF.16b, VF.16b, VD.16b
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+	and	vT0.16b, VB.16b, VC.16b
+	and	vT1.16b, VB.16b, VD.16b
+	and	vT2.16b, VC.16b, VD.16b
+	orr	VF.16b, vT0.16b, vT1.16b
+	orr	VF.16b, VF.16b, vT2.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+	FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+	.if \windex < 16
+		load_x4_word	\windex
+	.endif
+.endm
+
+// FUNC_F0 is merged into STEP_00_15 for efficiency
+.macro SHA1_STEP_00_15_F0 windex:req
+	rev32	WORD\windex\().16b,WORD\windex\().16b
+	next_word=\windex+1
+	load_next_word %next_word
+	// e = (a leftrotate 5) + f + e + k + w[i]
+	ushr	VT.4s, VA.4s, 32 - 5
+	add	VE.4s, VE.4s, VK.4s
+	sli	VT.4s, VA.4s, 5
+	eor	VF.16b, VC.16b, VD.16b
+	add	VE.4s, VE.4s, WORD\windex\().4s
+	and	VF.16b, VB.16b, VF.16b
+	add	VE.4s, VE.4s, VT.4s
+	eor	VF.16b, VD.16b, VF.16b
+	ushr	VT.4s, VB.4s, 32 - 30
+	add	VE.4s, VE.4s, VF.4s
+	sli	VT.4s, VB.4s, 30
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+	eor	vT0.16b,\reg_3\().16b,\reg_8\().16b
+	eor	VT.16b,\reg_14\().16b,\reg_16\().16b
+	eor	vT0.16b,vT0.16b,VT.16b
+	// e = (a leftrotate 5) + f + e + k + w[i]
+	ushr	VT.4s, vT0.4s, 32 - 1
+	add	VE.4s, VE.4s, VK.4s
+	ushr	vT1.4s, VA.4s, 32 - 5
+	sli	VT.4s, vT0.4s, 1
+	add	VE.4s, VE.4s, VT.4s
+	sli	vT1.4s, VA.4s, 5
+	mov	\reg_16\().16b,VT.16b
+	add	VE.4s, VE.4s, vT1.4s
+	ushr	VT.4s, VB.4s, 32 - 30
+	\func_f
+	add	VE.4s, VE.4s, VF.4s
+	sli	VT.4s, VB.4s, 30
+.endm
+
+	VA	.req v0
+	VB	.req v1
+	VC	.req v2
+	VD	.req v3
+	VE	.req v4
+	VT	.req v5
+	VF	.req v6
+	VK	.req v7
+	WORD0	.req v8
+	WORD1	.req v9
+	WORD2	.req v10
+	WORD3	.req v11
+	WORD4	.req v12
+	WORD5	.req v13
+	WORD6	.req v14
+	WORD7	.req v15
+	WORD8	.req v16
+	WORD9	.req v17
+	WORD10	.req v18
+	WORD11	.req v19
+	WORD12	.req v20
+	WORD13	.req v21
+	WORD14	.req v22
+	WORD15	.req v23
+	vT0	.req v24
+	vT1	.req v25
+	vT2	.req v26
+	vAA	.req v27
+	vBB	.req v28
+	vCC	.req v29
+	vDD	.req v30
+	vEE	.req v31
+	TT	.req v0
+	sha1key_adr	.req	x15
+
+.macro SWAP_STATES
+	// shifted VB is held in VT after each step
+	.unreq TT
+	TT .req VE
+	.unreq VE
+	VE .req VD
+	.unreq VD
+	VD .req VC
+	.unreq VC
+	VC .req VT
+	.unreq	VT
+	VT .req VB
+	.unreq VB
+	VB .req VA
+	.unreq VA
+	VA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+	SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+	.if \windex <= 15
+		SHA1_STEP_00_15_F0	windex
+	.else
+		idx14=((\windex - 14) & 15)
+		idx8=((\windex - 8) & 15)
+		idx3=((\windex - 3) & 15)
+		idx16=(\windex & 15)
+		.if \windex <= 19
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+		.endif
+		.if \windex >= 20 && \windex <= 39
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+		.endif
+		.if \windex >= 40 && \windex <= 59
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+		.endif
+		.if \windex >= 60 && \windex <= 79
+			SHA1_STEP_16_79_WRAPPER	\windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+		.endif
+	.endif
+
+	SWAP_STATES
+
+	.if \windex == 79
+		// after 80 steps, the registers ABCDET has shifted from
+		// its orignal order of 012345 to 341520
+		// have to swap back for both compile- and run-time correctness
+		mov	v0.16b,v3.16b
+		.unreq VA
+		VA	.req v0
+
+		mov	vT0.16b,v2.16b
+		mov	v2.16b,v1.16b
+		mov	v1.16b,v4.16b
+		.unreq VB
+		VB	.req v1
+		.unreq VC
+		VC	.req v2
+
+		mov	v3.16b,v5.16b
+		.unreq VD
+		VD	.req v3
+
+		mov	v4.16b,vT0.16b
+		.unreq VE
+		VE	.req v4
+
+		.unreq VT
+		VT	.req v5
+	.endif
+.endm
+
+.macro exec_steps idx:req,more:vararg
+	exec_step	\idx
+	.ifnb \more
+		exec_steps	\more
+	.endif
+.endm
+
+.macro sha1_single
+	load_x4_word 0
+
+	mov	vAA.16B, VA.16B
+	mov	vBB.16B, VB.16B
+	mov	vCC.16B, VC.16B
+	mov	vDD.16B, VD.16B
+	mov	vEE.16B, VE.16B
+
+	adr	sha1key_adr, KEY_0
+	ld1	{VK.4s}, [sha1key_adr]
+	exec_steps	0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+	// 20 ~ 39
+	adr	sha1key_adr, KEY_1
+	ld1	{VK.4s}, [sha1key_adr]
+	exec_steps	20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+	// 40 ~ 59
+	adr	sha1key_adr, KEY_2
+	ld1	{VK.4s}, [sha1key_adr]
+	exec_steps	40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+	// 60 ~ 79
+	adr	sha1key_adr, KEY_3
+	ld1	{VK.4s}, [sha1key_adr]
+	exec_steps	60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+	add	VA.4s, vAA.4s, VA.4s
+	add	VB.4s, vBB.4s, VB.4s
+	add	VC.4s, vCC.4s, VC.4s
+	add	VD.4s, vDD.4s, VD.4s
+	add	VE.4s, vEE.4s, VE.4s
+.endm
+
+.macro sha1_asimd_save_stack
+	stp	d8,d9,[sp, -64]!
+	stp	d10,d11,[sp, 16]
+	stp	d12,d13,[sp, 32]
+	stp	d14,d15,[sp, 48]
+.endm
+
+.macro sha1_asimd_restore_stack
+	ldp	d10,d11,[sp, 16]
+	ldp	d12,d13,[sp, 32]
+	ldp	d14,d15,[sp, 48]
+	ldp	d8,d9,[sp],64
+.endm
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c
new file mode 100644
index 000000000..9a9952ff6
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_asimd.c
@@ -0,0 +1,250 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <string.h>
+#include "sha1_mb.h"
+#include "memcpy_inline.h"
+#include "endian_helper.h"
+void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state);
+SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job);
+SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state);
+static inline void hash_init_digest(SHA1_WORD_T * digest);
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len);
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx);
+
+void sha1_ctx_mgr_init_asimd(SHA1_HASH_CTX_MGR * mgr)
+{
+	sha1_mb_mgr_init_asimd(&mgr->mgr);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_submit_asimd(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx,
+					 const void *buffer, uint32_t len, HASH_CTX_FLAG flags)
+{
+	if (flags & (~HASH_ENTIRE)) {
+		// User should not pass anything other than FIRST, UPDATE, or LAST
+		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+		return ctx;
+	}
+
+	if (ctx->status & HASH_CTX_STS_PROCESSING) {
+		// Cannot submit to a currently processing job.
+		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+		return ctx;
+	}
+
+	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+		// Cannot update a finished job.
+		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+		return ctx;
+	}
+
+	if (flags & HASH_FIRST) {
+		// Init digest
+		hash_init_digest(ctx->job.result_digest);
+
+		// Reset byte counter
+		ctx->total_length = 0;
+
+		// Clear extra blocks
+		ctx->partial_block_buffer_length = 0;
+	}
+	// If we made it here, there were no errors during this call to submit
+	ctx->error = HASH_CTX_ERROR_NONE;
+
+	// Store buffer ptr info from user
+	ctx->incoming_buffer = buffer;
+	ctx->incoming_buffer_length = len;
+
+	// Store the user's request flags and mark this ctx as currently being processed.
+	ctx->status = (flags & HASH_LAST) ?
+	    (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+	    HASH_CTX_STS_PROCESSING;
+
+	// Advance byte counter
+	ctx->total_length += len;
+
+	// If there is anything currently buffered in the extra blocks, append to it until it contains a whole block.
+	// Or if the user's buffer contains less than a whole block, append as much as possible to the extra block.
+	if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
+		// Compute how many bytes to copy from user buffer into extra block
+		uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+		if (len < copy_len)
+			copy_len = len;
+
+		if (copy_len) {
+			// Copy and update relevant pointers and counters
+			memcpy_fixedlen(&ctx->partial_block_buffer
+					[ctx->partial_block_buffer_length], buffer, copy_len);
+
+			ctx->partial_block_buffer_length += copy_len;
+			ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+			ctx->incoming_buffer_length = len - copy_len;
+		}
+		// The extra block should never contain more than 1 block here
+		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
+
+		// If the extra block buffer contains exactly 1 block, it can be hashed.
+		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
+			ctx->partial_block_buffer_length = 0;
+
+			ctx->job.buffer = ctx->partial_block_buffer;
+			ctx->job.len = 1;
+
+			ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job);
+		}
+	}
+
+	return sha1_ctx_mgr_resubmit(mgr, ctx);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_flush_asimd(SHA1_HASH_CTX_MGR * mgr)
+{
+	SHA1_HASH_CTX *ctx;
+
+	while (1) {
+		ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_flush_asimd(&mgr->mgr);
+
+		// If flush returned 0, there are no more jobs in flight.
+		if (!ctx)
+			return NULL;
+
+		// If flush returned a job, verify that it is safe to return to the user.
+		// If it is not ready, resubmit the job to finish processing.
+		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
+
+		// If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
+		if (ctx)
+			return ctx;
+
+		// Otherwise, all jobs currently being managed by the SHA1_HASH_CTX_MGR still need processing. Loop.
+	}
+}
+
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx)
+{
+	while (ctx) {
+
+		if (ctx->status & HASH_CTX_STS_COMPLETE) {
+			ctx->status = HASH_CTX_STS_COMPLETE;	// Clear PROCESSING bit
+			return ctx;
+		}
+		// If the extra blocks are empty, begin hashing what remains in the user's buffer.
+		if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
+			const void *buffer = ctx->incoming_buffer;
+			uint32_t len = ctx->incoming_buffer_length;
+
+			// Only entire blocks can be hashed. Copy remainder to extra blocks buffer.
+			uint32_t copy_len = len & (SHA1_BLOCK_SIZE - 1);
+
+			if (copy_len) {
+				len -= copy_len;
+				memcpy_fixedlen(ctx->partial_block_buffer,
+						((const char *)buffer + len), copy_len);
+				ctx->partial_block_buffer_length = copy_len;
+			}
+
+			ctx->incoming_buffer_length = 0;
+
+			// len should be a multiple of the block size now
+			assert((len % SHA1_BLOCK_SIZE) == 0);
+
+			// Set len to the number of blocks to be hashed in the user's buffer
+			len >>= SHA1_LOG2_BLOCK_SIZE;
+
+			if (len) {
+				ctx->job.buffer = (uint8_t *) buffer;
+				ctx->job.len = len;
+				ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr,
+										 &ctx->job);
+				continue;
+			}
+		}
+		// If the extra blocks are not empty, then we are either on the last block(s)
+		// or we need more user input before continuing.
+		if (ctx->status & HASH_CTX_STS_LAST) {
+			uint8_t *buf = ctx->partial_block_buffer;
+			uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
+
+			ctx->status =
+			    (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE);
+			ctx->job.buffer = buf;
+			ctx->job.len = (uint32_t) n_extra_blocks;
+			ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_asimd(&mgr->mgr, &ctx->job);
+			continue;
+		}
+
+		if (ctx)
+			ctx->status = HASH_CTX_STS_IDLE;
+		return ctx;
+	}
+
+	return NULL;
+}
+
+static inline void hash_init_digest(SHA1_WORD_T * digest)
+{
+	static const SHA1_WORD_T hash_initial_digest[SHA1_DIGEST_NWORDS] =
+	    { SHA1_INITIAL_DIGEST };
+	memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
+}
+
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len)
+{
+	uint32_t i = (uint32_t) (total_len & (SHA1_BLOCK_SIZE - 1));
+
+	memclr_fixedlen(&padblock[i], SHA1_BLOCK_SIZE);
+	padblock[i] = 0x80;
+
+	// Move i to the end of either 1st or 2nd extra block depending on length
+	i += ((SHA1_BLOCK_SIZE - 1) & (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + 1 +
+	    SHA1_PADLENGTHFIELD_SIZE;
+
+#if SHA1_PADLENGTHFIELD_SIZE == 16
+	*((uint64_t *) & padblock[i - 16]) = 0;
+#endif
+
+	*((uint64_t *) & padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
+
+	return i >> SHA1_LOG2_BLOCK_SIZE;	// Number of extra blocks to hash
+}
+
+struct slver {
+	uint16_t snum;
+	uint8_t ver;
+	uint8_t core;
+};
+struct slver sha1_ctx_mgr_init_asimd_slver_02020142;
+struct slver sha1_ctx_mgr_init_asimd_slver = { 0x0142, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_submit_asimd_slver_02020143;
+struct slver sha1_ctx_mgr_submit_asimd_slver = { 0x0143, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_flush_asimd_slver_02020144;
+struct slver sha1_ctx_mgr_flush_asimd_slver = { 0x0144, 0x02, 0x02 };
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c
new file mode 100644
index 000000000..e40a344ff
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_ctx_ce.c
@@ -0,0 +1,250 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdint.h>
+#include <string.h>
+#include "sha1_mb.h"
+#include "memcpy_inline.h"
+#include "endian_helper.h"
+void sha1_mb_mgr_init_ce(SHA1_MB_JOB_MGR * state);
+SHA1_JOB *sha1_mb_mgr_submit_ce(SHA1_MB_JOB_MGR * state, SHA1_JOB * job);
+SHA1_JOB *sha1_mb_mgr_flush_ce(SHA1_MB_JOB_MGR * state);
+static inline void hash_init_digest(SHA1_WORD_T * digest);
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len);
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx);
+
+void sha1_ctx_mgr_init_ce(SHA1_HASH_CTX_MGR * mgr)
+{
+	sha1_mb_mgr_init_ce(&mgr->mgr);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_submit_ce(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx,
+				      const void *buffer, uint32_t len, HASH_CTX_FLAG flags)
+{
+	if (flags & (~HASH_ENTIRE)) {
+		// User should not pass anything other than FIRST, UPDATE, or LAST
+		ctx->error = HASH_CTX_ERROR_INVALID_FLAGS;
+		return ctx;
+	}
+
+	if (ctx->status & HASH_CTX_STS_PROCESSING) {
+		// Cannot submit to a currently processing job.
+		ctx->error = HASH_CTX_ERROR_ALREADY_PROCESSING;
+		return ctx;
+	}
+
+	if ((ctx->status & HASH_CTX_STS_COMPLETE) && !(flags & HASH_FIRST)) {
+		// Cannot update a finished job.
+		ctx->error = HASH_CTX_ERROR_ALREADY_COMPLETED;
+		return ctx;
+	}
+
+	if (flags & HASH_FIRST) {
+		// Init digest
+		hash_init_digest(ctx->job.result_digest);
+
+		// Reset byte counter
+		ctx->total_length = 0;
+
+		// Clear extra blocks
+		ctx->partial_block_buffer_length = 0;
+	}
+	// If we made it here, there were no errors during this call to submit
+	ctx->error = HASH_CTX_ERROR_NONE;
+
+	// Store buffer ptr info from user
+	ctx->incoming_buffer = buffer;
+	ctx->incoming_buffer_length = len;
+
+	// Store the user's request flags and mark this ctx as currently being processed.
+	ctx->status = (flags & HASH_LAST) ?
+	    (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_LAST) :
+	    HASH_CTX_STS_PROCESSING;
+
+	// Advance byte counter
+	ctx->total_length += len;
+
+	// If there is anything currently buffered in the extra blocks, append to it until it contains a whole block.
+	// Or if the user's buffer contains less than a whole block, append as much as possible to the extra block.
+	if ((ctx->partial_block_buffer_length) | (len < SHA1_BLOCK_SIZE)) {
+		// Compute how many bytes to copy from user buffer into extra block
+		uint32_t copy_len = SHA1_BLOCK_SIZE - ctx->partial_block_buffer_length;
+		if (len < copy_len)
+			copy_len = len;
+
+		if (copy_len) {
+			// Copy and update relevant pointers and counters
+			memcpy_fixedlen(&ctx->partial_block_buffer
+					[ctx->partial_block_buffer_length], buffer, copy_len);
+
+			ctx->partial_block_buffer_length += copy_len;
+			ctx->incoming_buffer = (const void *)((const char *)buffer + copy_len);
+			ctx->incoming_buffer_length = len - copy_len;
+		}
+		// The extra block should never contain more than 1 block here
+		assert(ctx->partial_block_buffer_length <= SHA1_BLOCK_SIZE);
+
+		// If the extra block buffer contains exactly 1 block, it can be hashed.
+		if (ctx->partial_block_buffer_length >= SHA1_BLOCK_SIZE) {
+			ctx->partial_block_buffer_length = 0;
+
+			ctx->job.buffer = ctx->partial_block_buffer;
+			ctx->job.len = 1;
+
+			ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, &ctx->job);
+		}
+	}
+
+	return sha1_ctx_mgr_resubmit(mgr, ctx);
+}
+
+SHA1_HASH_CTX *sha1_ctx_mgr_flush_ce(SHA1_HASH_CTX_MGR * mgr)
+{
+	SHA1_HASH_CTX *ctx;
+
+	while (1) {
+		ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_flush_ce(&mgr->mgr);
+
+		// If flush returned 0, there are no more jobs in flight.
+		if (!ctx)
+			return NULL;
+
+		// If flush returned a job, verify that it is safe to return to the user.
+		// If it is not ready, resubmit the job to finish processing.
+		ctx = sha1_ctx_mgr_resubmit(mgr, ctx);
+
+		// If sha1_ctx_mgr_resubmit returned a job, it is ready to be returned.
+		if (ctx)
+			return ctx;
+
+		// Otherwise, all jobs currently being managed by the SHA1_HASH_CTX_MGR still need processing. Loop.
+	}
+}
+
+static SHA1_HASH_CTX *sha1_ctx_mgr_resubmit(SHA1_HASH_CTX_MGR * mgr, SHA1_HASH_CTX * ctx)
+{
+	while (ctx) {
+
+		if (ctx->status & HASH_CTX_STS_COMPLETE) {
+			ctx->status = HASH_CTX_STS_COMPLETE;	// Clear PROCESSING bit
+			return ctx;
+		}
+		// If the extra blocks are empty, begin hashing what remains in the user's buffer.
+		if (ctx->partial_block_buffer_length == 0 && ctx->incoming_buffer_length) {
+			const void *buffer = ctx->incoming_buffer;
+			uint32_t len = ctx->incoming_buffer_length;
+
+			// Only entire blocks can be hashed. Copy remainder to extra blocks buffer.
+			uint32_t copy_len = len & (SHA1_BLOCK_SIZE - 1);
+
+			if (copy_len) {
+				len -= copy_len;
+				memcpy_fixedlen(ctx->partial_block_buffer,
+						((const char *)buffer + len), copy_len);
+				ctx->partial_block_buffer_length = copy_len;
+			}
+
+			ctx->incoming_buffer_length = 0;
+
+			// len should be a multiple of the block size now
+			assert((len % SHA1_BLOCK_SIZE) == 0);
+
+			// Set len to the number of blocks to be hashed in the user's buffer
+			len >>= SHA1_LOG2_BLOCK_SIZE;
+
+			if (len) {
+				ctx->job.buffer = (uint8_t *) buffer;
+				ctx->job.len = len;
+				ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr,
+									      &ctx->job);
+				continue;
+			}
+		}
+		// If the extra blocks are not empty, then we are either on the last block(s)
+		// or we need more user input before continuing.
+		if (ctx->status & HASH_CTX_STS_LAST) {
+			uint8_t *buf = ctx->partial_block_buffer;
+			uint32_t n_extra_blocks = hash_pad(buf, ctx->total_length);
+
+			ctx->status =
+			    (HASH_CTX_STS) (HASH_CTX_STS_PROCESSING | HASH_CTX_STS_COMPLETE);
+			ctx->job.buffer = buf;
+			ctx->job.len = (uint32_t) n_extra_blocks;
+			ctx = (SHA1_HASH_CTX *) sha1_mb_mgr_submit_ce(&mgr->mgr, &ctx->job);
+			continue;
+		}
+
+		if (ctx)
+			ctx->status = HASH_CTX_STS_IDLE;
+		return ctx;
+	}
+
+	return NULL;
+}
+
+static inline void hash_init_digest(SHA1_WORD_T * digest)
+{
+	static const SHA1_WORD_T hash_initial_digest[SHA1_DIGEST_NWORDS] =
+	    { SHA1_INITIAL_DIGEST };
+	memcpy_fixedlen(digest, hash_initial_digest, sizeof(hash_initial_digest));
+}
+
+static inline uint32_t hash_pad(uint8_t padblock[SHA1_BLOCK_SIZE * 2], uint64_t total_len)
+{
+	uint32_t i = (uint32_t) (total_len & (SHA1_BLOCK_SIZE - 1));
+
+	memclr_fixedlen(&padblock[i], SHA1_BLOCK_SIZE);
+	padblock[i] = 0x80;
+
+	// Move i to the end of either 1st or 2nd extra block depending on length
+	i += ((SHA1_BLOCK_SIZE - 1) & (0 - (total_len + SHA1_PADLENGTHFIELD_SIZE + 1))) + 1 +
+	    SHA1_PADLENGTHFIELD_SIZE;
+
+#if SHA1_PADLENGTHFIELD_SIZE == 16
+	*((uint64_t *) & padblock[i - 16]) = 0;
+#endif
+
+	*((uint64_t *) & padblock[i - 8]) = to_be64((uint64_t) total_len << 3);
+
+	return i >> SHA1_LOG2_BLOCK_SIZE;	// Number of extra blocks to hash
+}
+
+struct slver {
+	uint16_t snum;
+	uint8_t ver;
+	uint8_t core;
+};
+struct slver sha1_ctx_mgr_init_ce_slver_02020142;
+struct slver sha1_ctx_mgr_init_ce_slver = { 0x0142, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_submit_ce_slver_02020143;
+struct slver sha1_ctx_mgr_submit_ce_slver = { 0x0143, 0x02, 0x02 };
+
+struct slver sha1_ctx_mgr_flush_ce_slver_02020144;
+struct slver sha1_ctx_mgr_flush_ce_slver = { 0x0144, 0x02, 0x02 };
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c
new file mode 100644
index 000000000..0942c1a95
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_aarch64_dispatcher.c
@@ -0,0 +1,93 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_submit)
+{
+
+	unsigned long auxval = getauxval(AT_HWCAP);
+	if (auxval & HWCAP_SHA1)
+		return PROVIDER_INFO(sha1_ctx_mgr_submit_ce);
+
+	if (auxval & HWCAP_ASIMD) {
+		switch (get_micro_arch_id()) {
+		case MICRO_ARCH_ID(ARM, NEOVERSE_N1):	// fall through
+		case MICRO_ARCH_ID(ARM, CORTEX_A57):	// fall through
+		case MICRO_ARCH_ID(ARM, CORTEX_A72):	// fall through
+			return PROVIDER_INFO(sha1_ctx_mgr_submit_asimd);
+		default:
+			break;
+		}
+	}
+
+	return PROVIDER_BASIC(sha1_ctx_mgr_submit);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_init)
+{
+	unsigned long auxval = getauxval(AT_HWCAP);
+	if (auxval & HWCAP_SHA1)
+		return PROVIDER_INFO(sha1_ctx_mgr_init_ce);
+
+	if (auxval & HWCAP_ASIMD) {
+		switch (get_micro_arch_id()) {
+		case MICRO_ARCH_ID(ARM, NEOVERSE_N1):	// fall through
+		case MICRO_ARCH_ID(ARM, CORTEX_A57):	// fall through
+		case MICRO_ARCH_ID(ARM, CORTEX_A72):	// fall through
+			return PROVIDER_INFO(sha1_ctx_mgr_init_asimd);
+		default:
+			break;
+		}
+	}
+
+	return PROVIDER_BASIC(sha1_ctx_mgr_init);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(sha1_ctx_mgr_flush)
+{
+	unsigned long auxval = getauxval(AT_HWCAP);
+	if (auxval & HWCAP_SHA1)
+		return PROVIDER_INFO(sha1_ctx_mgr_flush_ce);
+
+	if (auxval & HWCAP_ASIMD) {
+		switch (get_micro_arch_id()) {
+		case MICRO_ARCH_ID(ARM, NEOVERSE_N1):	// fall through
+		case MICRO_ARCH_ID(ARM, CORTEX_A57):	// fall through
+		case MICRO_ARCH_ID(ARM, CORTEX_A72):	// fall through
+			return PROVIDER_INFO(sha1_ctx_mgr_flush_asimd);
+		default:
+			break;
+		}
+	}
+
+	return PROVIDER_BASIC(sha1_ctx_mgr_flush);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S
new file mode 100644
index 000000000..012b15c14
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_asimd_x4.S
@@ -0,0 +1,192 @@
+/**********************************************************************
+  Copyright(c) 2021 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+	.arch armv8-a
+
+#include "sha1_asimd_common.S"
+
+.macro internal_load windex
+	// load 64-bytes from each address to maximize usage of cache line
+	.if \windex == 0
+		mov     tmp,dataptr
+		ld1	{WORD0.4s},[data0],16
+		ld1	{WORD4.4s},[data0],16
+		ld1	{WORD8.4s},[data0],16
+		ld1	{WORD12.4s},[data0],16
+
+		ld1	{WORD1.4s},[data1],16
+		ld1	{WORD5.4s},[data1],16
+		ld1	{WORD9.4s},[data1],16
+		ld1	{WORD13.4s},[data1],16
+
+		ld1	{WORD2.4s},[data2],16
+		ld1	{WORD6.4s},[data2],16
+		ld1	{WORD10.4s},[data2],16
+		ld1	{WORD14.4s},[data2],16
+
+		ld1	{WORD3.4s},[data3],16
+		ld1	{WORD7.4s},[data3],16
+		ld1	{WORD11.4s},[data3],16
+		ld1	{WORD15.4s},[data3],16
+
+		st4	{WORD0.s,WORD1.s,WORD2.s,WORD3.s}[0],[tmp],16
+		st4	{WORD0.s,WORD1.s,WORD2.s,WORD3.s}[1],[tmp],16
+		st4	{WORD0.s,WORD1.s,WORD2.s,WORD3.s}[2],[tmp],16
+		st4	{WORD0.s,WORD1.s,WORD2.s,WORD3.s}[3],[tmp],16
+	.endif
+
+	.if \windex == 4
+		mov     tmp,dataptr
+		st4	{WORD4.s,WORD5.s,WORD6.s,WORD7.s}[0],[tmp],16
+		st4	{WORD4.s,WORD5.s,WORD6.s,WORD7.s}[1],[tmp],16
+		st4	{WORD4.s,WORD5.s,WORD6.s,WORD7.s}[2],[tmp],16
+		st4	{WORD4.s,WORD5.s,WORD6.s,WORD7.s}[3],[tmp],16
+	.endif
+
+	.if \windex == 8
+		mov     tmp,dataptr
+		st4	{WORD8.s,WORD9.s,WORD10.s,WORD11.s}[0],[tmp],16
+		st4	{WORD8.s,WORD9.s,WORD10.s,WORD11.s}[1],[tmp],16
+		st4	{WORD8.s,WORD9.s,WORD10.s,WORD11.s}[2],[tmp],16
+		st4	{WORD8.s,WORD9.s,WORD10.s,WORD11.s}[3],[tmp],16
+	.endif
+
+	.if \windex == 12
+		mov     tmp,dataptr
+		st4	{WORD12.s,WORD13.s,WORD14.s,WORD15.s}[0],[tmp],16
+		st4	{WORD12.s,WORD13.s,WORD14.s,WORD15.s}[1],[tmp],16
+		st4	{WORD12.s,WORD13.s,WORD14.s,WORD15.s}[2],[tmp],16
+		st4	{WORD12.s,WORD13.s,WORD14.s,WORD15.s}[3],[tmp],16
+	.endif
+.endm
+
+.macro load_x4_word idx:req
+	internal_load	\idx
+	ld1	{WORD\idx\().16b},[dataptr],16
+.endm
+
+/*
+ *  void sha1_mb_asimd_x4(SHA1_JOB *j0, SHA1_JOB*j1, SHA1_JOB*j2, SHA1_JOB *j3, int blocks)
+ */
+	job0	.req	x0
+	job1	.req	x1
+	job2	.req	x2
+	job3	.req	x3
+	num_blocks	.req	w4
+	tmp	.req	x5
+	data0	.req	x6
+	data1	.req	x7
+	data2	.req	x8
+	data3	.req	x9
+	databuf	.req	x10
+	dataptr	.req	x11
+	savedsp	.req	x12
+
+	.global sha1_mb_asimd_x4
+	.type sha1_mb_asimd_x4, %function
+sha1_mb_asimd_x4:
+	cmp	num_blocks, #0
+	beq	.return
+	sha1_asimd_save_stack
+	mov	savedsp,sp
+	sub	databuf,sp,256
+	mov	tmp,63
+	bic	databuf,databuf,tmp
+	mov	sp,databuf
+
+	add	tmp,job0,64
+	ld4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16
+	ld1	{VE.s}[0],[tmp]
+	ldr	data0,[job0]
+
+	add	tmp,job1,64
+	ld4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16
+	ld1	{VE.s}[1],[tmp]
+	ldr	data1,[job1]
+
+	add	tmp,job2,64
+	ld4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16
+	ld1	{VE.s}[2],[tmp]
+	ldr	data2,[job2]
+
+	add	tmp,job3,64
+	ld4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16
+	ld1	{VE.s}[3],[tmp]
+	ldr	data3,[job3]
+
+.block_loop:
+	mov	dataptr,databuf
+	sha1_single
+	subs	num_blocks, num_blocks, 1
+	bne	.block_loop
+
+	add	tmp,job0,64
+	st4 {VA.s,VB.s,VC.s,VD.s}[0],[tmp],#16
+	st1	{VE.s}[0],[tmp]
+
+	add	tmp,job1,64
+	st4 {VA.s,VB.s,VC.s,VD.s}[1],[tmp],#16
+	st1	{VE.s}[1],[tmp]
+
+	add	tmp,job2,64
+	st4 {VA.s,VB.s,VC.s,VD.s}[2],[tmp],#16
+	st1	{VE.s}[2],[tmp]
+
+	add	tmp,job3,64
+	st4 {VA.s,VB.s,VC.s,VD.s}[3],[tmp],#16
+	st1	{VE.s}[3],[tmp]
+
+	mov	sp,savedsp
+	sha1_asimd_restore_stack
+.return:
+	ret
+
+	.size sha1_mb_asimd_x4, .-sha1_mb_asimd_x4
+	.section .rodata.cst16,"aM",@progbits,16
+	.align  16
+KEY_0:
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x5a827999
+KEY_1:
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+KEY_2:
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+KEY_3:
+	.word	0xca62c1d6
+	.word	0xca62c1d6
+	.word	0xca62c1d6
+	.word	0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c
new file mode 100644
index 000000000..4b34e7b53
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_asimd.c
@@ -0,0 +1,217 @@
+/**********************************************************************
+  Copyright(c) 2021 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stddef.h>
+#include <sha1_mb.h>
+#include <assert.h>
+#include "endian_helper.h"
+
+extern void sha1_aarch64_x1(const uint8_t * data, int num_blocks, uint32_t digest[]);
+static inline void sha1_job_x1(SHA1_JOB * job, int blocks)
+{
+	sha1_aarch64_x1(job->buffer, blocks, job->result_digest);
+}
+
+#ifndef min
+#define min(a,b)            (((a) < (b)) ? (a) : (b))
+#endif
+
+#define SHA1_MB_ASIMD_MAX_LANES	4
+void sha1_mb_asimd_x4(SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, SHA1_JOB *, int);
+
+#define LANE_IS_NOT_FINISHED(state,i)  	\
+	(((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL)
+#define LANE_IS_FINISHED(state,i)  	\
+	(((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL)
+#define	LANE_IS_FREE(state,i)		\
+	(((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL)
+#define LANE_IS_INVALID(state,i)	\
+	(((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL)
+
+void sha1_mb_mgr_init_asimd(SHA1_MB_JOB_MGR * state)
+{
+	unsigned int i;
+
+	state->unused_lanes = 0xf;
+	state->num_lanes_inuse = 0;
+	for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) {
+		state->unused_lanes <<= 4;
+		state->unused_lanes |= SHA1_MB_ASIMD_MAX_LANES - 1 - i;
+		state->lens[i] = i;
+		state->ldata[i].job_in_lane = 0;
+	}
+
+	// lanes > SHA1_MB_ASIMD_MAX_LANES is invalid lane
+	for (; i < SHA1_MAX_LANES; i++) {
+		state->lens[i] = 0xf;
+		state->ldata[i].job_in_lane = 0;
+	}
+}
+
+static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state)
+{
+	int lane_idx, len, i, lanes, blocks;
+	int lane_idx_array[SHA1_MAX_LANES];
+
+	if (state->num_lanes_inuse == 0) {
+		return -1;
+	}
+	lanes = 0, len = 0;
+	for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) {
+		if (LANE_IS_NOT_FINISHED(state, i)) {
+			if (lanes)
+				len = min(len, state->lens[i]);
+			else
+				len = state->lens[i];
+			lane_idx_array[lanes] = i;
+			lanes++;
+		}
+	}
+
+	if (lanes == 0)
+		return -1;
+	lane_idx = len & 0xf;
+	len = len & (~0xf);
+	blocks = len >> 4;
+
+	/* for less-than-3-lane job, ASIMD really does not have much advantage
+	 * compared to scalar due to wasted >= 50% capacity
+	 * therefore we only run ASIMD for 3/4 lanes of data
+	 */
+	if (lanes == SHA1_MB_ASIMD_MAX_LANES) {
+		sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane,
+				 state->ldata[lane_idx_array[1]].job_in_lane,
+				 state->ldata[lane_idx_array[2]].job_in_lane,
+				 state->ldata[lane_idx_array[3]].job_in_lane, blocks);
+	} else if (lanes == 3) {
+		/* in case of 3 lanes, apparently ASIMD will still operate as if
+		 * there were four lanes of data in processing (waste 25% capacity)
+		 * theoretically we can let ASIMD implementation know the number of lanes
+		 * so that it could "at least" save some memory loading time
+		 * but in practice, we can just pass lane 0 as dummy for similar
+		 * cache performance
+		 */
+		SHA1_JOB dummy;
+		dummy.buffer = state->ldata[lane_idx_array[0]].job_in_lane->buffer;
+		dummy.len = state->ldata[lane_idx_array[0]].job_in_lane->len;
+		sha1_mb_asimd_x4(state->ldata[lane_idx_array[0]].job_in_lane,
+				 &dummy,
+				 state->ldata[lane_idx_array[1]].job_in_lane,
+				 state->ldata[lane_idx_array[2]].job_in_lane, blocks);
+	} else {
+		sha1_job_x1(state->ldata[lane_idx_array[0]].job_in_lane, blocks);
+		if (lanes >= 2) {
+			sha1_job_x1(state->ldata[lane_idx_array[1]].job_in_lane, blocks);
+		}
+	}
+
+	// only return the min length job
+	for (i = 0; i < SHA1_MAX_LANES; i++) {
+		if (LANE_IS_NOT_FINISHED(state, i)) {
+			state->lens[i] -= len;
+			state->ldata[i].job_in_lane->len -= len;
+			state->ldata[i].job_in_lane->buffer += len << 2;
+		}
+	}
+	return lane_idx;
+
+}
+
+static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state)
+{
+	int i;
+	SHA1_JOB *ret = NULL;
+
+	for (i = 0; i < SHA1_MB_ASIMD_MAX_LANES; i++) {
+		if (LANE_IS_FINISHED(state, i)) {
+			state->unused_lanes <<= 4;
+			state->unused_lanes |= i;
+			state->num_lanes_inuse--;
+			ret = state->ldata[i].job_in_lane;
+			ret->status = STS_COMPLETED;
+			state->ldata[i].job_in_lane = NULL;
+			break;
+		}
+	}
+	return ret;
+}
+
+static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+	int lane_idx;
+	// add job into lanes
+	lane_idx = state->unused_lanes & 0xf;
+	// fatal error
+	assert(lane_idx < SHA1_MB_ASIMD_MAX_LANES);
+	state->lens[lane_idx] = (job->len << 4) | lane_idx;
+	state->ldata[lane_idx].job_in_lane = job;
+	state->unused_lanes >>= 4;
+	state->num_lanes_inuse++;
+}
+
+SHA1_JOB *sha1_mb_mgr_submit_asimd(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+#ifndef NDEBUG
+	int lane_idx;
+#endif
+	SHA1_JOB *ret;
+
+	// add job into lanes
+	sha1_mb_mgr_insert_job(state, job);
+
+	ret = sha1_mb_mgr_free_lane(state);
+	if (ret != NULL) {
+		return ret;
+	}
+	// submit will wait all lane has data
+	if (state->num_lanes_inuse < SHA1_MB_ASIMD_MAX_LANES)
+		return NULL;
+#ifndef NDEBUG
+	lane_idx = sha1_mb_mgr_do_jobs(state);
+	assert(lane_idx != -1);
+#else
+	sha1_mb_mgr_do_jobs(state);
+#endif
+
+	// ~ i = lane_idx;
+	ret = sha1_mb_mgr_free_lane(state);
+	return ret;
+}
+
+SHA1_JOB *sha1_mb_mgr_flush_asimd(SHA1_MB_JOB_MGR * state)
+{
+	SHA1_JOB *ret;
+	ret = sha1_mb_mgr_free_lane(state);
+	if (ret) {
+		return ret;
+	}
+
+	sha1_mb_mgr_do_jobs(state);
+	return sha1_mb_mgr_free_lane(state);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c
new file mode 100644
index 000000000..1dfd67d0c
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_mgr_ce.c
@@ -0,0 +1,208 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <stddef.h>
+#include <sha1_mb.h>
+#include <assert.h>
+
+#ifndef max
+#define max(a,b)            (((a) > (b)) ? (a) : (b))
+#endif
+
+#ifndef min
+#define min(a,b)            (((a) < (b)) ? (a) : (b))
+#endif
+
+#define SHA1_MB_CE_MAX_LANES	2
+#if SHA1_MB_CE_MAX_LANES >=2
+void sha1_mb_ce_x2(SHA1_JOB *, SHA1_JOB *, int);
+#endif
+void sha1_mb_ce_x1(SHA1_JOB *, int);
+
+#define LANE_IS_NOT_FINISHED(state,i)  	\
+	(((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane!=NULL)
+#define LANE_IS_FINISHED(state,i)  	\
+	(((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane!=NULL)
+#define	LANE_IS_FREE(state,i)		\
+	(((state->lens[i]&(~0xf))==0) && state->ldata[i].job_in_lane==NULL)
+#define LANE_IS_INVALID(state,i)	\
+	(((state->lens[i]&(~0xf))!=0) && state->ldata[i].job_in_lane==NULL)
+void sha1_mb_mgr_init_ce(SHA1_MB_JOB_MGR * state)
+{
+	unsigned int i;
+
+	state->unused_lanes = 0xf;
+	state->num_lanes_inuse = 0;
+	for (i = 0; i < SHA1_MB_CE_MAX_LANES; i++) {
+		state->unused_lanes <<= 4;
+		state->unused_lanes |= i;
+		state->lens[i] = i;
+		state->ldata[i].job_in_lane = 0;
+	}
+
+	//lanes > SHA1_MB_CE_MAX_LANES is invalid lane
+	for (; i < SHA1_MAX_LANES; i++) {
+		state->lens[i] = 0xf;
+		state->ldata[i].job_in_lane = 0;
+	}
+}
+
+static int sha1_mb_mgr_do_jobs(SHA1_MB_JOB_MGR * state)
+{
+	int lane_idx, len, i, lanes;
+
+	int lane_idx_array[SHA1_MAX_LANES];
+
+	if (state->num_lanes_inuse == 0) {
+		return -1;
+	}
+#if	SHA1_MB_CE_MAX_LANES == 2
+	if (state->num_lanes_inuse == 2) {
+		len = min(state->lens[0], state->lens[1]);
+		lane_idx = len & 0xf;
+		len &= ~0xf;
+
+		sha1_mb_ce_x2(state->ldata[0].job_in_lane,
+			      state->ldata[1].job_in_lane, len >> 4);
+
+	} else
+#endif
+	{
+		lanes = 0, len = 0;
+		for (i = 0; i < SHA1_MAX_LANES && lanes < state->num_lanes_inuse; i++) {
+			if (LANE_IS_NOT_FINISHED(state, i)) {
+				if (lanes)
+					len = min(len, state->lens[i]);
+				else
+					len = state->lens[i];
+				lane_idx_array[lanes] = i;
+				lanes++;
+			}
+		}
+		if (lanes == 0)
+			return -1;
+		lane_idx = len & 0xf;
+		len = len & (~0xf);
+
+#if SHA1_MB_CE_MAX_LANES >=2
+		if (lanes == 2) {
+			sha1_mb_ce_x2(state->ldata[lane_idx_array[0]].job_in_lane,
+				      state->ldata[lane_idx_array[1]].job_in_lane, len >> 4);
+		} else
+#endif
+		{
+			sha1_mb_ce_x1(state->ldata[lane_idx_array[0]].job_in_lane, len >> 4);
+		}
+	}
+	//only return the min length job
+	for (i = 0; i < SHA1_MAX_LANES; i++) {
+		if (LANE_IS_NOT_FINISHED(state, i)) {
+			state->lens[i] -= len;
+			state->ldata[i].job_in_lane->len -= len;
+			state->ldata[i].job_in_lane->buffer += len << 2;
+		}
+	}
+
+	return lane_idx;
+
+}
+
+static SHA1_JOB *sha1_mb_mgr_free_lane(SHA1_MB_JOB_MGR * state)
+{
+	int i;
+	SHA1_JOB *ret = NULL;
+
+	for (i = 0; i < SHA1_MB_CE_MAX_LANES; i++) {
+		if (LANE_IS_FINISHED(state, i)) {
+
+			state->unused_lanes <<= 4;
+			state->unused_lanes |= i;
+			state->num_lanes_inuse--;
+			ret = state->ldata[i].job_in_lane;
+			ret->status = STS_COMPLETED;
+			state->ldata[i].job_in_lane = NULL;
+			break;
+		}
+	}
+	return ret;
+}
+
+static void sha1_mb_mgr_insert_job(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+	int lane_idx;
+	//add job into lanes
+	lane_idx = state->unused_lanes & 0xf;
+	//fatal error
+	assert(lane_idx < SHA1_MB_CE_MAX_LANES);
+	state->lens[lane_idx] = (job->len << 4) | lane_idx;
+	state->ldata[lane_idx].job_in_lane = job;
+	state->unused_lanes >>= 4;
+	state->num_lanes_inuse++;
+}
+
+SHA1_JOB *sha1_mb_mgr_submit_ce(SHA1_MB_JOB_MGR * state, SHA1_JOB * job)
+{
+#ifndef NDEBUG
+	int lane_idx;
+#endif
+	SHA1_JOB *ret;
+
+	//add job into lanes
+	sha1_mb_mgr_insert_job(state, job);
+
+	ret = sha1_mb_mgr_free_lane(state);
+	if (ret != NULL) {
+		return ret;
+	}
+	//submit will wait all lane has data
+	if (state->num_lanes_inuse < SHA1_MB_CE_MAX_LANES)
+		return NULL;
+#ifndef NDEBUG
+	lane_idx = sha1_mb_mgr_do_jobs(state);
+	assert(lane_idx != -1);
+#else
+	sha1_mb_mgr_do_jobs(state);
+#endif
+
+	//~ i = lane_idx;
+	ret = sha1_mb_mgr_free_lane(state);
+	return ret;
+}
+
+SHA1_JOB *sha1_mb_mgr_flush_ce(SHA1_MB_JOB_MGR * state)
+{
+	SHA1_JOB *ret;
+	ret = sha1_mb_mgr_free_lane(state);
+	if (ret) {
+		return ret;
+	}
+
+	sha1_mb_mgr_do_jobs(state);
+	return sha1_mb_mgr_free_lane(state);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S
new file mode 100644
index 000000000..bb1929d76
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_multibinary.S
@@ -0,0 +1,36 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface sha1_ctx_mgr_submit
+mbin_interface sha1_ctx_mgr_init
+mbin_interface sha1_ctx_mgr_flush
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S
new file mode 100644
index 000000000..22f736793
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x1_ce.S
@@ -0,0 +1,194 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+	.arch armv8-a+crypto
+	.text
+	.align	2
+	.p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro	declare_var_vector_reg name:req,reg:req
+	\name\()_q	.req	q\reg
+	\name\()_v	.req	v\reg
+	\name\()_s	.req	s\reg
+.endm
+
+/**
+maros for round 4-67
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req,k:req
+	sha1h	\e0\()_s, \abcd\()_s
+	\inst	\abcd\()_q,\e1\()_s,\tmp1\()_v.4s
+	add 	\tmp1\()_v.4s,\msg3\()_v.4s,\k\()_v.4s
+	sha1su1	\msg0\()_v.4s,\msg3\()_v.4s
+	sha1su0	\msg1\()_v.4s,\msg2\()_v.4s,\msg3\()_v.4s
+.endm
+
+
+/*
+Variable list
+*/
+
+	declare_var_vector_reg	key_0,28
+	declare_var_vector_reg	key_1,29
+	declare_var_vector_reg	key_2,30
+	declare_var_vector_reg	key_3,31
+
+
+/*
+digest variables
+*/
+	declare_var_vector_reg	abcd,0
+	declare_var_vector_reg	e0,1
+	declare_var_vector_reg	e1,2
+	declare_var_vector_reg	abcd_saved,3
+	declare_var_vector_reg	e0_saved,4
+/*
+Message variables
+*/
+	declare_var_vector_reg	msg_0,16
+	declare_var_vector_reg	msg_1,17
+	declare_var_vector_reg	msg_2,18
+	declare_var_vector_reg	msg_3,19
+/*
+Temporay variables
+*/
+	declare_var_vector_reg	tmp_0,5
+	declare_var_vector_reg	tmp_1,6
+
+/*
+	void sha1_mb_ce_x1(SHA1_JOB * job, int len);
+*/
+/*
+Arguements list
+*/
+	job 	.req	x0
+	len	.req	w1
+	data	.req	x2
+	tmp	.req	x3
+	.global	sha1_mb_ce_x1
+	.type	sha1_mb_ce_x1, %function
+sha1_mb_ce_x1:
+	ldr	data, [job]
+	ldr	abcd_q, [job, 64]
+	ldr	e0_s, [job, 80]
+	adr	tmp, KEY
+	ld1	{key_0_v.4s-key_3_v.4s},[tmp]
+
+start_loop:
+
+	//load msgs
+	ld1	{msg_0_v.4s-msg_3_v.4s},[data]
+
+	//adjust loop parameter
+	add	data,data,64
+	sub	len, len, #1
+	cmp	len, 0
+	//backup digest
+	mov	abcd_saved_v.16b,abcd_v.16b
+	mov	e0_saved_v.16b,e0_v.16b
+
+	rev32	msg_0_v.16b,msg_0_v.16b
+	rev32	msg_1_v.16b,msg_1_v.16b
+	add	tmp_0_v.4s,msg_0_v.4s,key_0_v.4s
+	rev32	msg_2_v.16b,msg_2_v.16b
+	add	tmp_1_v.4s,msg_1_v.4s,key_0_v.4s
+	rev32	msg_3_v.16b,msg_3_v.16b
+
+	/* rounds 0-3 */
+	sha1h	e1_s,abcd_s
+	sha1c	abcd_q,e0_s,tmp_0_v.4s
+	add	tmp_0_v.4s,msg_2_v.4s,key_0_v.4s
+	sha1su0	msg_0_v.4s,msg_1_v.4s,msg_2_v.4s
+
+	sha1_4_rounds	sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_0     /* rounds 4-7 */
+	sha1_4_rounds	sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_0
+	sha1_4_rounds	sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1     /* rounds 12-15 */
+	sha1_4_rounds	sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_1
+	sha1_4_rounds	sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_1     /* rounds 20-23 */
+	sha1_4_rounds	sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_1
+	sha1_4_rounds	sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1
+	sha1_4_rounds	sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+	sha1_4_rounds	sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_2     /* rounds 36-39 */
+	sha1_4_rounds	sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_2
+	sha1_4_rounds	sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_2
+	sha1_4_rounds	sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+	sha1_4_rounds	sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_3     /* rounds 52-55 */
+	sha1_4_rounds	sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_3
+	sha1_4_rounds	sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_3
+	sha1_4_rounds	sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_3
+
+	/* rounds 68-71 */
+	sha1h   e0_s,abcd_s
+	sha1p   abcd_q,e1_s,tmp_1_v.4s
+	add     tmp_1_v.4s,msg_3_v.4s,key_3_v.4s
+	sha1su1 msg_0_v.4s,msg_3_v.4s
+
+	/* rounds 72-75 */
+	sha1h   e1_s,abcd_s
+	sha1p   abcd_q,e0_s,tmp_0_v.4s
+
+	/* rounds 76-79 */
+	sha1h   e0_s,abcd_s
+	sha1p   abcd_q,e1_s,tmp_1_v.4s
+
+
+
+	add     abcd_v.4s,abcd_v.4s,abcd_saved_v.4s
+	add     e0_v.2s,e0_v.2s,e0_saved_v.2s
+
+
+	bgt	start_loop
+	str	abcd_q,	[job, 64]
+	str	e0_s, 	[job, 80]
+
+	ret
+
+	.size	sha1_mb_ce_x1, .-sha1_mb_ce_x1
+	.section	.rodata.cst16,"aM",@progbits,16
+	.align	4
+KEY:
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0xca62c1d6
+	.word	0xca62c1d6
+	.word	0xca62c1d6
+	.word	0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S
new file mode 100644
index 000000000..93f653ad2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/sha1_mb/aarch64/sha1_mb_x2_ce.S
@@ -0,0 +1,253 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+	.arch armv8-a+crypto
+	.text
+	.align	2
+	.p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro	declare_var_vector_reg name:req,reg:req
+	\name\()_q	.req	q\reg
+	\name\()_v	.req	v\reg
+	\name\()_s	.req	s\reg
+.endm
+
+/**
+maros for round 4-67
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req,k:req
+	sha1h	l0_\e0\()_s, l0_\abcd\()_s
+	sha1h	l1_\e0\()_s, l1_\abcd\()_s
+
+	\inst	l0_\abcd\()_q,l0_\e1\()_s,l0_\tmp1\()_v.4s
+	\inst	l1_\abcd\()_q,l1_\e1\()_s,l1_\tmp1\()_v.4s
+
+	add 	l0_\tmp1\()_v.4s,l0_\msg3\()_v.4s,\k\()_v.4s
+	add 	l1_\tmp1\()_v.4s,l1_\msg3\()_v.4s,\k\()_v.4s
+
+	sha1su1	l0_\msg0\()_v.4s,l0_\msg3\()_v.4s
+	sha1su1	l1_\msg0\()_v.4s,l1_\msg3\()_v.4s
+
+	sha1su0	l0_\msg1\()_v.4s,l0_\msg2\()_v.4s,l0_\msg3\()_v.4s
+	sha1su0	l1_\msg1\()_v.4s,l1_\msg2\()_v.4s,l1_\msg3\()_v.4s
+.endm
+
+
+/*
+Variable list
+*/
+
+	declare_var_vector_reg	key_0,28
+	declare_var_vector_reg	key_1,29
+	declare_var_vector_reg	key_2,30
+	declare_var_vector_reg	key_3,31
+
+
+/*
+lane variables
+*/
+	declare_var_vector_reg	l0_abcd,0
+	declare_var_vector_reg	l0_e0,1
+	declare_var_vector_reg	l0_e1,2
+	declare_var_vector_reg	l0_abcd_saved,3
+	declare_var_vector_reg	l0_e0_saved,4
+	declare_var_vector_reg	l0_tmp_0,5
+	declare_var_vector_reg	l0_tmp_1,6
+	declare_var_vector_reg	l0_msg_0,16
+	declare_var_vector_reg	l0_msg_1,17
+	declare_var_vector_reg	l0_msg_2,18
+	declare_var_vector_reg	l0_msg_3,19
+
+	declare_var_vector_reg	l1_abcd,7
+	declare_var_vector_reg	l1_e0,8
+	declare_var_vector_reg	l1_e1,9
+	declare_var_vector_reg	l1_abcd_saved,24
+	declare_var_vector_reg	l1_e0_saved,25
+	declare_var_vector_reg	l1_tmp_0,26
+	declare_var_vector_reg	l1_tmp_1,27
+	declare_var_vector_reg	l1_msg_0,20
+	declare_var_vector_reg	l1_msg_1,21
+	declare_var_vector_reg	l1_msg_2,22
+	declare_var_vector_reg	l1_msg_3,23
+
+/*
+	void sha1_mb_ce_x2(SHA1_JOB * job_0, SHA1_JOB * job_1,int len);
+*/
+	l0_job .req x0
+	l1_job .req x1
+	len .req w2
+
+	l0_data		.req	x3
+	l1_data		.req	x4
+	tmp		.req	x5
+	.global	sha1_mb_ce_x2
+	.type	sha1_mb_ce_x2, %function
+sha1_mb_ce_x2:
+	//push 	d8,d9 to stack
+	stp	d8, d9, [sp, -256]!
+
+	adr	tmp, KEY
+	ld1	{key_0_v.4s-key_3_v.4s},[tmp]
+	ldr	l0_data,	[l0_job]
+	ldr	l1_data,	[l1_job]
+	ldr	l0_abcd_q,	[l0_job, 64]
+	ldr	l0_e0_s,	[l0_job, 80]
+	ldr	l1_abcd_q,	[l1_job, 64]
+	ldr	l1_e0_s,	[l1_job, 80]
+
+start_loop:
+
+	//load msgs
+	ld1	{l0_msg_0_v.4s-l0_msg_3_v.4s},[l0_data]
+	ld1	{l1_msg_0_v.4s-l1_msg_3_v.4s},[l1_data]
+
+	//adjust loop parameter
+	add	l0_data,l0_data,64
+	add	l1_data,l1_data,64
+	sub	len, len, #1
+	cmp	len, 0
+	//backup digest
+	mov	l0_abcd_saved_v.16b,	l0_abcd_v.16b
+	mov	l0_e0_saved_v.16b,	l0_e0_v.16b
+	mov	l1_abcd_saved_v.16b,	l1_abcd_v.16b
+	mov	l1_e0_saved_v.16b,	l1_e0_v.16b
+
+	rev32	l0_msg_0_v.16b,	l0_msg_0_v.16b
+	rev32	l0_msg_1_v.16b,	l0_msg_1_v.16b
+	add	l0_tmp_0_v.4s,	l0_msg_0_v.4s,	key_0_v.4s
+	rev32	l0_msg_2_v.16b,	l0_msg_2_v.16b
+	add	l0_tmp_1_v.4s,	l0_msg_1_v.4s,	key_0_v.4s
+	rev32	l0_msg_3_v.16b,	l0_msg_3_v.16b
+
+	rev32	l1_msg_0_v.16b,	l1_msg_0_v.16b
+	rev32	l1_msg_1_v.16b,	l1_msg_1_v.16b
+	add	l1_tmp_0_v.4s,	l1_msg_0_v.4s,	key_0_v.4s
+	rev32	l1_msg_2_v.16b,	l1_msg_2_v.16b
+	add	l1_tmp_1_v.4s,	l1_msg_1_v.4s,	key_0_v.4s
+	rev32	l1_msg_3_v.16b,	l1_msg_3_v.16b
+
+	/* rounds 0-3 */
+	sha1h	l0_e1_s,	l0_abcd_s
+	sha1c	l0_abcd_q,	l0_e0_s,	l0_tmp_0_v.4s
+	add	l0_tmp_0_v.4s,	l0_msg_2_v.4s,	key_0_v.4s
+	sha1su0	l0_msg_0_v.4s,	l0_msg_1_v.4s,	l0_msg_2_v.4s
+
+	sha1h	l1_e1_s,	l1_abcd_s
+	sha1c	l1_abcd_q,	l1_e0_s,	l1_tmp_0_v.4s
+	add	l1_tmp_0_v.4s,	l1_msg_2_v.4s,	key_0_v.4s
+	sha1su0	l1_msg_0_v.4s,	l1_msg_1_v.4s,	l1_msg_2_v.4s
+
+	sha1_4_rounds	sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_0     /* rounds 4-7 */
+	sha1_4_rounds	sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_0
+	sha1_4_rounds	sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1     /* rounds 12-15 */
+	sha1_4_rounds	sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_1
+	sha1_4_rounds	sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_1     /* rounds 20-23 */
+	sha1_4_rounds	sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_1
+	sha1_4_rounds	sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_1
+	sha1_4_rounds	sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+	sha1_4_rounds	sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_2     /* rounds 36-39 */
+	sha1_4_rounds	sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_2
+	sha1_4_rounds	sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_2
+	sha1_4_rounds	sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_2
+	sha1_4_rounds	sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp_0,e1,tmp_1,key_3     /* rounds 52-55 */
+	sha1_4_rounds	sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp_1,e0,tmp_0,key_3
+	sha1_4_rounds	sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp_0,e1,tmp_1,key_3
+	sha1_4_rounds	sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp_1,e0,tmp_0,key_3
+
+	/* rounds 68-71 */
+	sha1h	l0_e0_s,	l0_abcd_s
+	sha1p	l0_abcd_q,	l0_e1_s,	l0_tmp_1_v.4s
+	add	l0_tmp_1_v.4s,	l0_msg_3_v.4s,	key_3_v.4s
+	sha1su1	l0_msg_0_v.4s,	l0_msg_3_v.4s
+
+	sha1h	l1_e0_s,	l1_abcd_s
+	sha1p	l1_abcd_q,	l1_e1_s,	l1_tmp_1_v.4s
+	add	l1_tmp_1_v.4s,	l1_msg_3_v.4s,	key_3_v.4s
+	sha1su1	l1_msg_0_v.4s,	l1_msg_3_v.4s
+
+	/* rounds 72-75 */
+	sha1h   l0_e1_s,	l0_abcd_s
+	sha1p   l0_abcd_q,	l0_e0_s,	l0_tmp_0_v.4s
+
+	sha1h   l1_e1_s,	l1_abcd_s
+	sha1p   l1_abcd_q,	l1_e0_s,	l1_tmp_0_v.4s
+
+	/* rounds 76-79 */
+	sha1h   l0_e0_s,	l0_abcd_s
+	sha1p   l0_abcd_q,	l0_e1_s,	l0_tmp_1_v.4s
+
+	sha1h   l1_e0_s,	l1_abcd_s
+	sha1p   l1_abcd_q,	l1_e1_s,	l1_tmp_1_v.4s
+
+
+
+	add     l0_abcd_v.4s,	l0_abcd_v.4s,	l0_abcd_saved_v.4s
+	add     l0_e0_v.2s,	l0_e0_v.2s,	l0_e0_saved_v.2s
+	add     l1_abcd_v.4s,	l1_abcd_v.4s,	l1_abcd_saved_v.4s
+	add     l1_e0_v.2s,	l1_e0_v.2s,	l1_e0_saved_v.2s
+
+
+
+
+	bgt	start_loop
+
+	str	l0_abcd_q,	[l0_job, 64]
+	str	l0_e0_s, 	[l0_job, 80]
+
+
+	str	l1_abcd_q,	[l1_job, 64]
+	str	l1_e0_s, 	[l1_job, 80]
+
+	//pop d8,d9 from stack
+	ldp	d8, d9, [sp], 256
+	ret
+
+	.size	sha1_mb_ce_x2, .-sha1_mb_ce_x2
+	.section	.rodata.cst16,"aM",@progbits,16
+	.align	4
+KEY:
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x5a827999
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x6ed9eba1
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0x8f1bbcdc
+	.word	0xca62c1d6
+	.word	0xca62c1d6
+	.word	0xca62c1d6
+	.word	0xca62c1d6