Adding upstream version 18.2.2.upstream/18.2.2

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-21 11:54:28 +0000
commit: e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree: 64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/mh_sha256
parent: Initial commit. (diff)
download: ceph-upstream/18.2.2.tar.xz
ceph-upstream/18.2.2.zip
22 files changed, 5658 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha256/Makefile.am
new file mode 100644
index 000000000..d6e8b61ab
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/Makefile.am
@@ -0,0 +1,88 @@
+########################################################################
+#  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+#
+#  Redistribution and use in source and binary forms, with or without
+#  modification, are permitted provided that the following conditions
+#  are met:
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer.
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in
+#      the documentation and/or other materials provided with the
+#      distribution.
+#    * Neither the name of Intel Corporation nor the names of its
+#      contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.
+#
+#  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+#  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+#  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+#  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+#  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+#  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+#  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+#  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+#  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+#  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+#  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+lsrc_sha256     =	mh_sha256/sha256_for_mh_sha256.c
+
+lsrc_mh_sha256  =	mh_sha256/mh_sha256.c \
+		mh_sha256/mh_sha256_block_sse.asm \
+		mh_sha256/mh_sha256_block_avx.asm \
+		mh_sha256/mh_sha256_block_avx2.asm \
+		mh_sha256/mh_sha256_multibinary.asm \
+		mh_sha256/mh_sha256_finalize_base.c \
+		mh_sha256/mh_sha256_update_base.c \
+		mh_sha256/mh_sha256_block_base.c
+
+lsrc_mh_sha256 += mh_sha256/mh_sha256_block_avx512.asm \
+		mh_sha256/mh_sha256_avx512.c
+
+lsrc_x86_64    += $(lsrc_sha256) \
+		$(lsrc_mh_sha256)
+
+lsrc_x86_32    += $(lsrc_x86_64)
+
+other_src   += 	mh_sha256/mh_sha256_ref.c \
+		include/reg_sizes.asm \
+		include/multibinary.asm \
+		include/test.h \
+		mh_sha256/mh_sha256_internal.h
+
+lsrc_aarch64 += $(lsrc_sha256)	\
+		mh_sha256/aarch64/mh_sha256_multibinary.S \
+		mh_sha256/aarch64/mh_sha256_aarch64_dispatcher.c \
+		mh_sha256/aarch64/mh_sha256_block_ce.S \
+		mh_sha256/aarch64/mh_sha256_ce.c \
+		mh_sha256/mh_sha256.c \
+		mh_sha256/mh_sha256_finalize_base.c \
+		mh_sha256/mh_sha256_update_base.c \
+		mh_sha256/mh_sha256_block_base.c
+
+lsrc_base_aliases += $(lsrc_sha256)	\
+		mh_sha256/mh_sha256_base_aliases.c \
+		mh_sha256/mh_sha256.c \
+		mh_sha256/mh_sha256_finalize_base.c \
+		mh_sha256/mh_sha256_update_base.c \
+		mh_sha256/mh_sha256_block_base.c
+
+src_include += -I $(srcdir)/mh_sha256
+
+extern_hdrs +=	include/mh_sha256.h
+
+check_tests += 	mh_sha256/mh_sha256_test
+unit_tests  += 	mh_sha256/mh_sha256_update_test
+
+perf_tests  += 	mh_sha256/mh_sha256_perf
+
+
+mh_sha256_test: mh_sha256_ref.o
+mh_sha256_mh_sha256_test_LDADD = mh_sha256/mh_sha256_ref.lo libisal_crypto.la
+
+mh_sha256_update_test: mh_sha256_ref.o
+mh_sha256_mh_sha256_update_test_LDADD = mh_sha256/mh_sha256_ref.lo libisal_crypto.la
+
+mh_sha256_mh_sha256_perf_LDADD = libisal_crypto.la
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_aarch64_dispatcher.c
new file mode 100644
index 000000000..155790fc1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_aarch64_dispatcher.c
@@ -0,0 +1,49 @@
+/**********************************************************************
+  Copyright(c) 2020 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha256_update)
+{
+	unsigned long auxval = getauxval(AT_HWCAP);
+	if (auxval & HWCAP_SHA2)
+		return PROVIDER_INFO(mh_sha256_update_ce);
+
+	return PROVIDER_BASIC(mh_sha256_update);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha256_finalize)
+{
+	unsigned long auxval = getauxval(AT_HWCAP);
+	if (auxval & HWCAP_SHA2)
+		return PROVIDER_INFO(mh_sha256_finalize_ce);
+
+	return PROVIDER_BASIC(mh_sha256_finalize);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_block_ce.S
new file mode 100644
index 000000000..53a78ea7d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_block_ce.S
@@ -0,0 +1,731 @@
+/**********************************************************************
+  Copyright(c) 2020 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+	.arch armv8-a+crypto
+	.text
+	.align	6
+
+	.global	mh_sha256_block_ce
+	.type	mh_sha256_block_ce, %function
+
+/*
+Macros
+*/
+
+.macro	declare_vector_reg name:req,reg:req,default:req
+	\name		.req	\default\reg
+	q_\name		.req	q\reg
+	v_\name		.req	v\reg
+	s_\name		.req	s\reg
+.endm
+
+declare_vector_reg	lane0_msg0,	0,v
+declare_vector_reg	lane1_msg0,	1,v
+declare_vector_reg	lane2_msg0,	2,v
+declare_vector_reg	lane3_msg0,	3,v
+
+declare_vector_reg	lane0_msg1,	4,v
+declare_vector_reg	lane1_msg1,	5,v
+declare_vector_reg	lane2_msg1,	6,v
+declare_vector_reg	lane3_msg1,	7,v
+
+declare_vector_reg	lane0_msg2,	8,v
+declare_vector_reg	lane1_msg2,	9,v
+declare_vector_reg	lane2_msg2,	10,v
+declare_vector_reg	lane3_msg2,	11,v
+
+declare_vector_reg	lane0_msg3,	12,v
+declare_vector_reg	lane1_msg3,	13,v
+declare_vector_reg	lane2_msg3,	14,v
+declare_vector_reg	lane3_msg3,	15,v
+
+declare_vector_reg	lane0_state0,	16,v
+declare_vector_reg	lane1_state0,	17,v
+declare_vector_reg	lane2_state0,	18,v
+declare_vector_reg	lane3_state0,	19,v
+
+declare_vector_reg	lane0_state1,	20,v
+declare_vector_reg	lane1_state1,	21,v
+declare_vector_reg	lane2_state1,	22,v
+declare_vector_reg	lane3_state1,	23,v
+
+declare_vector_reg	lane0_tmp0,	24,v
+declare_vector_reg	lane1_tmp0,	25,v
+declare_vector_reg	lane2_tmp0,	26,v
+declare_vector_reg	lane3_tmp0,	27,v
+
+declare_vector_reg	lane0_tmp2,	28,v
+declare_vector_reg	lane1_tmp2,	29,v
+declare_vector_reg	lane2_tmp2,	30,v
+declare_vector_reg	lane3_tmp2,	31,v
+
+declare_vector_reg	key,		27,v
+declare_vector_reg	tmp,		29,v
+
+/*
+void mh_sha256_block_ce(const uint8_t * input_data,
+			uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE],
+			uint32_t num_blocks);
+*/
+	x_input_data		.req	x0
+	x_digests		.req	x1
+	x_frame_buffer		.req	x2
+	w_num_blocks		.req	w3
+
+	x_digest_addr		.req	x4
+	x_key_addr		.req	x5
+	x_msg_addr		.req	x6
+	x_lane_offs		.req	x7
+	x_offs			.req	x9
+	w_input_data_end	.req	w10
+	x_input_data_end	.req	x10
+	x_tmp			.req	x11
+mh_sha256_block_ce:
+	cbz		w_num_blocks, .exit
+	mov		w_input_data_end, w_num_blocks
+
+	ubfiz		x_input_data_end, x_input_data_end, 10, 32
+	add		x_input_data_end, x_input_data, x_input_data_end
+
+	adrp		x_key_addr, .key_addr
+	add		x_key_addr, x_key_addr, :lo12:.key_addr
+
+	stp		d8, d9, [sp, -192]!
+
+	stp		d10, d11, [sp, 16]
+	stp		d12, d13, [sp, 32]
+	stp		d14, d15, [sp, 48]
+
+	.p2align 3,,7
+.start_loop:
+	mov		x_lane_offs, 0
+	mov		x_digest_addr, x_digests
+
+.lane_loop:
+	add		x_msg_addr, x_input_data, x_lane_offs, lsl 2
+
+	.p2align 3,,7
+	mov		x_offs, 64
+	mov		x_tmp, x_digest_addr
+	ld4		{v_lane0_state0.S-v_lane3_state0.S}[0], [x_tmp], x_offs
+	ld4		{v_lane0_state0.S-v_lane3_state0.S}[1], [x_tmp], x_offs
+	ld4		{v_lane0_state0.S-v_lane3_state0.S}[2], [x_tmp], x_offs
+	ld4		{v_lane0_state0.S-v_lane3_state0.S}[3], [x_tmp], x_offs
+
+	add		x_tmp, x_digest_addr, 256
+	ld4		{v_lane0_state1.S-v_lane3_state1.S}[0], [x_tmp], x_offs
+	ld4		{v_lane0_state1.S-v_lane3_state1.S}[1], [x_tmp], x_offs
+	ld4		{v_lane0_state1.S-v_lane3_state1.S}[2], [x_tmp], x_offs
+	ld4		{v_lane0_state1.S-v_lane3_state1.S}[3], [x_tmp], x_offs
+
+	ld4		{v_lane0_msg0.S-v_lane3_msg0.S}[0], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg0.S-v_lane3_msg0.S}[1], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg0.S-v_lane3_msg0.S}[2], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg0.S-v_lane3_msg0.S}[3], [x_msg_addr], x_offs
+
+	ld4		{v_lane0_msg1.S-v_lane3_msg1.S}[0], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg1.S-v_lane3_msg1.S}[1], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg1.S-v_lane3_msg1.S}[2], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg1.S-v_lane3_msg1.S}[3], [x_msg_addr], x_offs
+
+	ld4		{v_lane0_msg2.S-v_lane3_msg2.S}[0], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg2.S-v_lane3_msg2.S}[1], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg2.S-v_lane3_msg2.S}[2], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg2.S-v_lane3_msg2.S}[3], [x_msg_addr], x_offs
+
+	ld4		{v_lane0_msg3.S-v_lane3_msg3.S}[0], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg3.S-v_lane3_msg3.S}[1], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg3.S-v_lane3_msg3.S}[2], [x_msg_addr], x_offs
+	ld4		{v_lane0_msg3.S-v_lane3_msg3.S}[3], [x_msg_addr], x_offs
+
+	// reverse for little endian
+	rev32		v_lane0_msg0.16b, v_lane0_msg0.16b
+	rev32		v_lane1_msg0.16b, v_lane1_msg0.16b
+	rev32		v_lane2_msg0.16b, v_lane2_msg0.16b
+	rev32		v_lane3_msg0.16b, v_lane3_msg0.16b
+
+	rev32		v_lane0_msg1.16b, v_lane0_msg1.16b
+	rev32		v_lane1_msg1.16b, v_lane1_msg1.16b
+	rev32		v_lane2_msg1.16b, v_lane2_msg1.16b
+	rev32		v_lane3_msg1.16b, v_lane3_msg1.16b
+
+	rev32		v_lane0_msg2.16b, v_lane0_msg2.16b
+	rev32		v_lane1_msg2.16b, v_lane1_msg2.16b
+	rev32		v_lane2_msg2.16b, v_lane2_msg2.16b
+	rev32		v_lane3_msg2.16b, v_lane3_msg2.16b
+
+	rev32		v_lane0_msg3.16b, v_lane0_msg3.16b
+	rev32		v_lane1_msg3.16b, v_lane1_msg3.16b
+	rev32		v_lane2_msg3.16b, v_lane2_msg3.16b
+	rev32		v_lane3_msg3.16b, v_lane3_msg3.16b
+
+	// rounds 0-3
+	ldr		q_key, [x_key_addr]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg0.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg0.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg0.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg0.4s
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	str		q_lane0_state1, [sp, 64]
+	str		q_lane1_state1, [sp, 80]
+	str		q_lane2_state1, [sp, 96]
+	str		q_lane3_state1, [sp, 112]
+
+	mov		x_offs, 64
+	mov		x_tmp, x_digest_addr
+	ld4		{v_lane0_tmp2.S-v_lane3_tmp2.S}[0], [x_tmp], x_offs
+	ld4		{v_lane0_tmp2.S-v_lane3_tmp2.S}[1], [x_tmp], x_offs
+	ld4		{v_lane0_tmp2.S-v_lane3_tmp2.S}[2], [x_tmp], x_offs
+	ld4		{v_lane0_tmp2.S-v_lane3_tmp2.S}[3], [x_tmp], x_offs
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg0.4s, v_lane0_msg1.4s
+	sha256su0	v_lane1_msg0.4s, v_lane1_msg1.4s
+	sha256su0	v_lane2_msg0.4s, v_lane2_msg1.4s
+	sha256su0	v_lane3_msg0.4s, v_lane3_msg1.4s
+
+	sha256su1	v_lane0_msg0.4s, v_lane0_msg2.4s, v_lane0_msg3.4s
+	sha256su1	v_lane1_msg0.4s, v_lane1_msg2.4s, v_lane1_msg3.4s
+	sha256su1	v_lane2_msg0.4s, v_lane2_msg2.4s, v_lane2_msg3.4s
+	sha256su1	v_lane3_msg0.4s, v_lane3_msg2.4s, v_lane3_msg3.4s
+
+	// rounds 4-7
+	ldr		q_key, [x_key_addr, 16]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg1.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg1.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg1.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg1.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg1.4s, v_lane0_msg2.4s
+	sha256su0	v_lane1_msg1.4s, v_lane1_msg2.4s
+	sha256su0	v_lane2_msg1.4s, v_lane2_msg2.4s
+	sha256su0	v_lane3_msg1.4s, v_lane3_msg2.4s
+
+	sha256su1	v_lane0_msg1.4s, v_lane0_msg3.4s, v_lane0_msg0.4s
+	sha256su1	v_lane1_msg1.4s, v_lane1_msg3.4s, v_lane1_msg0.4s
+	sha256su1	v_lane2_msg1.4s, v_lane2_msg3.4s, v_lane2_msg0.4s
+	sha256su1	v_lane3_msg1.4s, v_lane3_msg3.4s, v_lane3_msg0.4s
+
+	// rounds 8-11
+	ldr		q_key, [x_key_addr, 32]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg2.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg2.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg2.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg2.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg2.4s, v_lane0_msg3.4s
+	sha256su0	v_lane1_msg2.4s, v_lane1_msg3.4s
+	sha256su0	v_lane2_msg2.4s, v_lane2_msg3.4s
+	sha256su0	v_lane3_msg2.4s, v_lane3_msg3.4s
+
+	sha256su1	v_lane0_msg2.4s, v_lane0_msg0.4s, v_lane0_msg1.4s
+	sha256su1	v_lane1_msg2.4s, v_lane1_msg0.4s, v_lane1_msg1.4s
+	sha256su1	v_lane2_msg2.4s, v_lane2_msg0.4s, v_lane2_msg1.4s
+	sha256su1	v_lane3_msg2.4s, v_lane3_msg0.4s, v_lane3_msg1.4s
+
+	// rounds 12-15
+	ldr		q_key, [x_key_addr, 48]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg3.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg3.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg3.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg3.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg3.4s, v_lane0_msg0.4s
+	sha256su0	v_lane1_msg3.4s, v_lane1_msg0.4s
+	sha256su0	v_lane2_msg3.4s, v_lane2_msg0.4s
+	sha256su0	v_lane3_msg3.4s, v_lane3_msg0.4s
+
+	sha256su1	v_lane0_msg3.4s, v_lane0_msg1.4s, v_lane0_msg2.4s
+	sha256su1	v_lane1_msg3.4s, v_lane1_msg1.4s, v_lane1_msg2.4s
+	sha256su1	v_lane2_msg3.4s, v_lane2_msg1.4s, v_lane2_msg2.4s
+	sha256su1	v_lane3_msg3.4s, v_lane3_msg1.4s, v_lane3_msg2.4s
+
+	// rounds 16-19
+	ldr		q_key, [x_key_addr, 64]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg0.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg0.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg0.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg0.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg0.4s, v_lane0_msg1.4s
+	sha256su0	v_lane1_msg0.4s, v_lane1_msg1.4s
+	sha256su0	v_lane2_msg0.4s, v_lane2_msg1.4s
+	sha256su0	v_lane3_msg0.4s, v_lane3_msg1.4s
+
+	sha256su1	v_lane0_msg0.4s, v_lane0_msg2.4s, v_lane0_msg3.4s
+	sha256su1	v_lane1_msg0.4s, v_lane1_msg2.4s, v_lane1_msg3.4s
+	sha256su1	v_lane2_msg0.4s, v_lane2_msg2.4s, v_lane2_msg3.4s
+	sha256su1	v_lane3_msg0.4s, v_lane3_msg2.4s, v_lane3_msg3.4s
+
+	// rounds 20-23
+	ldr		q_key, [x_key_addr, 80]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg1.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg1.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg1.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg1.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg1.4s, v_lane0_msg2.4s
+	sha256su0	v_lane1_msg1.4s, v_lane1_msg2.4s
+	sha256su0	v_lane2_msg1.4s, v_lane2_msg2.4s
+	sha256su0	v_lane3_msg1.4s, v_lane3_msg2.4s
+
+	sha256su1	v_lane0_msg1.4s, v_lane0_msg3.4s, v_lane0_msg0.4s
+	sha256su1	v_lane1_msg1.4s, v_lane1_msg3.4s, v_lane1_msg0.4s
+	sha256su1	v_lane2_msg1.4s, v_lane2_msg3.4s, v_lane2_msg0.4s
+	sha256su1	v_lane3_msg1.4s, v_lane3_msg3.4s, v_lane3_msg0.4s
+
+	// rounds 24-27
+	ldr		q_key, [x_key_addr, 96]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg2.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg2.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg2.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg2.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg2.4s, v_lane0_msg3.4s
+	sha256su0	v_lane1_msg2.4s, v_lane1_msg3.4s
+	sha256su0	v_lane2_msg2.4s, v_lane2_msg3.4s
+	sha256su0	v_lane3_msg2.4s, v_lane3_msg3.4s
+
+	sha256su1	v_lane0_msg2.4s, v_lane0_msg0.4s, v_lane0_msg1.4s
+	sha256su1	v_lane1_msg2.4s, v_lane1_msg0.4s, v_lane1_msg1.4s
+	sha256su1	v_lane2_msg2.4s, v_lane2_msg0.4s, v_lane2_msg1.4s
+	sha256su1	v_lane3_msg2.4s, v_lane3_msg0.4s, v_lane3_msg1.4s
+
+	// rounds 28-31
+	ldr		q_key, [x_key_addr, 112]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg3.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg3.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg3.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg3.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg3.4s, v_lane0_msg0.4s
+	sha256su0	v_lane1_msg3.4s, v_lane1_msg0.4s
+	sha256su0	v_lane2_msg3.4s, v_lane2_msg0.4s
+	sha256su0	v_lane3_msg3.4s, v_lane3_msg0.4s
+
+	sha256su1	v_lane0_msg3.4s, v_lane0_msg1.4s, v_lane0_msg2.4s
+	sha256su1	v_lane1_msg3.4s, v_lane1_msg1.4s, v_lane1_msg2.4s
+	sha256su1	v_lane2_msg3.4s, v_lane2_msg1.4s, v_lane2_msg2.4s
+	sha256su1	v_lane3_msg3.4s, v_lane3_msg1.4s, v_lane3_msg2.4s
+
+	// rounds 32-35
+	ldr		q_key, [x_key_addr, 128]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg0.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg0.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg0.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg0.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg0.4s, v_lane0_msg1.4s
+	sha256su0	v_lane1_msg0.4s, v_lane1_msg1.4s
+	sha256su0	v_lane2_msg0.4s, v_lane2_msg1.4s
+	sha256su0	v_lane3_msg0.4s, v_lane3_msg1.4s
+
+	sha256su1	v_lane0_msg0.4s, v_lane0_msg2.4s, v_lane0_msg3.4s
+	sha256su1	v_lane1_msg0.4s, v_lane1_msg2.4s, v_lane1_msg3.4s
+	sha256su1	v_lane2_msg0.4s, v_lane2_msg2.4s, v_lane2_msg3.4s
+	sha256su1	v_lane3_msg0.4s, v_lane3_msg2.4s, v_lane3_msg3.4s
+
+	// rounds 36-39
+	ldr		q_key, [x_key_addr, 144]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg1.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg1.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg1.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg1.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg1.4s, v_lane0_msg2.4s
+	sha256su0	v_lane1_msg1.4s, v_lane1_msg2.4s
+	sha256su0	v_lane2_msg1.4s, v_lane2_msg2.4s
+	sha256su0	v_lane3_msg1.4s, v_lane3_msg2.4s
+
+	sha256su1	v_lane0_msg1.4s, v_lane0_msg3.4s, v_lane0_msg0.4s
+	sha256su1	v_lane1_msg1.4s, v_lane1_msg3.4s, v_lane1_msg0.4s
+	sha256su1	v_lane2_msg1.4s, v_lane2_msg3.4s, v_lane2_msg0.4s
+	sha256su1	v_lane3_msg1.4s, v_lane3_msg3.4s, v_lane3_msg0.4s
+
+	// rounds 40-43
+	ldr		q_key, [x_key_addr, 160]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg2.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg2.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg2.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg2.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg2.4s, v_lane0_msg3.4s
+	sha256su0	v_lane1_msg2.4s, v_lane1_msg3.4s
+	sha256su0	v_lane2_msg2.4s, v_lane2_msg3.4s
+	sha256su0	v_lane3_msg2.4s, v_lane3_msg3.4s
+
+	sha256su1	v_lane0_msg2.4s, v_lane0_msg0.4s, v_lane0_msg1.4s
+	sha256su1	v_lane1_msg2.4s, v_lane1_msg0.4s, v_lane1_msg1.4s
+	sha256su1	v_lane2_msg2.4s, v_lane2_msg0.4s, v_lane2_msg1.4s
+	sha256su1	v_lane3_msg2.4s, v_lane3_msg0.4s, v_lane3_msg1.4s
+
+	// rounds 44-47
+	ldr		q_key, [x_key_addr, 176]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg3.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg3.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg3.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg3.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	sha256su0	v_lane0_msg3.4s, v_lane0_msg0.4s
+	sha256su0	v_lane1_msg3.4s, v_lane1_msg0.4s
+	sha256su0	v_lane2_msg3.4s, v_lane2_msg0.4s
+	sha256su0	v_lane3_msg3.4s, v_lane3_msg0.4s
+
+	sha256su1	v_lane0_msg3.4s, v_lane0_msg1.4s, v_lane0_msg2.4s
+	sha256su1	v_lane1_msg3.4s, v_lane1_msg1.4s, v_lane1_msg2.4s
+	sha256su1	v_lane2_msg3.4s, v_lane2_msg1.4s, v_lane2_msg2.4s
+	sha256su1	v_lane3_msg3.4s, v_lane3_msg1.4s, v_lane3_msg2.4s
+
+	// rounds 48-51
+	ldr		q_key, [x_key_addr, 192]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg0.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg0.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg0.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg0.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	// rounds 52-55
+	ldr		q_key, [x_key_addr, 208]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg1.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg1.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg1.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg1.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	// rounds 56-59
+	ldr		q_key, [x_key_addr, 224]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg2.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg2.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg2.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg2.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	// rounds 60-63
+	ldr		q_key, [x_key_addr, 240]
+	add		v_lane0_tmp0.4s, v_key.4s, v_lane0_msg3.4s
+	add		v_lane1_tmp0.4s, v_key.4s, v_lane1_msg3.4s
+	add		v_lane2_tmp0.4s, v_key.4s, v_lane2_msg3.4s
+	add		v_lane3_tmp0.4s, v_key.4s, v_lane3_msg3.4s
+
+	mov		v_lane0_tmp2.16b, v_lane0_state0.16b
+	mov		v_lane1_tmp2.16b, v_lane1_state0.16b
+	mov		v_lane2_tmp2.16b, v_lane2_state0.16b
+	mov		v_lane3_tmp2.16b, v_lane3_state0.16b
+
+	sha256h		q_lane0_state0, q_lane0_state1, v_lane0_tmp0.4s
+	sha256h		q_lane1_state0, q_lane1_state1, v_lane1_tmp0.4s
+	sha256h		q_lane2_state0, q_lane2_state1, v_lane2_tmp0.4s
+	sha256h		q_lane3_state0, q_lane3_state1, v_lane3_tmp0.4s
+
+	sha256h2	q_lane0_state1, q_lane0_tmp2, v_lane0_tmp0.4s
+	sha256h2	q_lane1_state1, q_lane1_tmp2, v_lane1_tmp0.4s
+	sha256h2	q_lane2_state1, q_lane2_tmp2, v_lane2_tmp0.4s
+	sha256h2	q_lane3_state1, q_lane3_tmp2, v_lane3_tmp0.4s
+
+	mov		x_offs, 64
+	mov		x_tmp, x_digest_addr
+	ld4		{v_lane0_tmp0.S-v_lane3_tmp0.S}[0], [x_tmp], x_offs
+	ld4		{v_lane0_tmp0.S-v_lane3_tmp0.S}[1], [x_tmp], x_offs
+	ld4		{v_lane0_tmp0.S-v_lane3_tmp0.S}[2], [x_tmp], x_offs
+	ld4		{v_lane0_tmp0.S-v_lane3_tmp0.S}[3], [x_tmp], x_offs
+
+	add		v_lane0_state0.4s, v_lane0_tmp0.4s, v_lane0_state0.4s
+	add		v_lane1_state0.4s, v_lane1_tmp0.4s, v_lane1_state0.4s
+	add		v_lane2_state0.4s, v_lane2_tmp0.4s, v_lane2_state0.4s
+	add		v_lane3_state0.4s, v_lane3_tmp0.4s, v_lane3_state0.4s
+
+	mov		x_offs, 64
+	mov		x_tmp, x_digest_addr
+	st4		{v_lane0_state0.S-v_lane3_state0.S}[0], [x_tmp], x_offs
+	st4		{v_lane0_state0.S-v_lane3_state0.S}[1], [x_tmp], x_offs
+	st4		{v_lane0_state0.S-v_lane3_state0.S}[2], [x_tmp], x_offs
+	st4		{v_lane0_state0.S-v_lane3_state0.S}[3], [x_tmp], x_offs
+
+	ldp		q_lane0_tmp2, q_lane1_tmp2, [sp, 64]
+	ldp		q_lane2_tmp2, q_lane3_tmp2, [sp, 96]
+
+	add		v_lane0_state1.4s, v_lane0_tmp2.4s, v_lane0_state1.4s
+	add		v_lane1_state1.4s, v_lane1_tmp2.4s, v_lane1_state1.4s
+	add		v_lane2_state1.4s, v_lane2_tmp2.4s, v_lane2_state1.4s
+	add		v_lane3_state1.4s, v_lane3_tmp2.4s, v_lane3_state1.4s
+
+	mov		x_offs, 64
+	add		x_tmp, x_digest_addr, 256
+	st4		{v_lane0_state1.S-v_lane3_state1.S}[0], [x_tmp], x_offs
+	st4		{v_lane0_state1.S-v_lane3_state1.S}[1], [x_tmp], x_offs
+	st4		{v_lane0_state1.S-v_lane3_state1.S}[2], [x_tmp], x_offs
+	st4		{v_lane0_state1.S-v_lane3_state1.S}[3], [x_tmp], x_offs
+
+	add		x_digest_addr, x_digest_addr, 16
+	add		x_lane_offs, x_lane_offs, 4
+	cmp		x_lane_offs, 16
+	bne		.lane_loop
+
+	add		x_input_data, x_input_data, 1024
+	cmp		x_input_data, x_input_data_end
+	bne		.start_loop
+
+	ldp		d10, d11, [sp, 16]
+	ldp		d12, d13, [sp, 32]
+	ldp		d14, d15, [sp, 48]
+	ldp		d8, d9, [sp], 192
+.exit:
+	ret
+	.size	mh_sha256_block_ce, .-mh_sha256_block_ce
+
+	.section	.rodata
+	.align	4
+	.set	.key_addr,. + 0
+	.type	K, %object
+	.size	K, 256
+K:
+	.word	0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
+	.word	0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
+	.word	0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
+	.word	0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
+	.word	0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
+	.word	0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
+	.word	0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
+	.word	0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_ce.c
new file mode 100644
index 000000000..c42333ed5
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_ce.c
@@ -0,0 +1,53 @@
+/**********************************************************************
+  Copyright(c) 2020 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <string.h>
+#include "mh_sha256_internal.h"
+
+void mh_sha256_block_ce(const uint8_t * input_data,
+			uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+/***************mh_sha256_update***********/
+// mh_sha256_update_ce.c
+#define MH_SHA256_UPDATE_FUNCTION	mh_sha256_update_ce
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_ce
+#include "mh_sha256_update_base.c"
+#undef MH_SHA256_UPDATE_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+/***************mh_sha256_finalize AND mh_sha256_tail***********/
+// mh_sha256_tail is used to calculate the last incomplete src data block
+// mh_sha256_finalize is a mh_sha256_ctx wrapper of mh_sha256_tail
+// mh_sha256_finalize_ce.c and mh_sha256_tail_ce.c
+#define MH_SHA256_FINALIZE_FUNCTION	mh_sha256_finalize_ce
+#define MH_SHA256_TAIL_FUNCTION		mh_sha256_tail_ce
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_ce
+#include "mh_sha256_finalize_base.c"
+#undef MH_SHA256_FINALIZE_FUNCTION
+#undef MH_SHA256_TAIL_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_multibinary.S
new file mode 100644
index 000000000..54eece175
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/aarch64/mh_sha256_multibinary.S
@@ -0,0 +1,35 @@
+/**********************************************************************
+  Copyright(c) 2020 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface mh_sha256_update
+mbin_interface mh_sha256_finalize
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256.c
new file mode 100644
index 000000000..242c3e218
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256.c
@@ -0,0 +1,143 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha256_internal.h"
+
+int mh_sha256_init(struct mh_sha256_ctx *ctx)
+{
+	uint32_t(*mh_sha256_segs_digests)[HASH_SEGS];
+	uint32_t i;
+
+	if (ctx == NULL)
+		return MH_SHA256_CTX_ERROR_NULL;
+
+	memset(ctx, 0, sizeof(*ctx));
+
+	mh_sha256_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha256_interim_digests;
+	for (i = 0; i < HASH_SEGS; i++) {
+		mh_sha256_segs_digests[0][i] = MH_SHA256_H0;
+		mh_sha256_segs_digests[1][i] = MH_SHA256_H1;
+		mh_sha256_segs_digests[2][i] = MH_SHA256_H2;
+		mh_sha256_segs_digests[3][i] = MH_SHA256_H3;
+		mh_sha256_segs_digests[4][i] = MH_SHA256_H4;
+		mh_sha256_segs_digests[5][i] = MH_SHA256_H5;
+		mh_sha256_segs_digests[6][i] = MH_SHA256_H6;
+		mh_sha256_segs_digests[7][i] = MH_SHA256_H7;
+	}
+
+	return MH_SHA256_CTX_ERROR_NONE;
+}
+
+#if (!defined(NOARCH)) && (defined(__i386__) || defined(__x86_64__) \
+	|| defined( _M_X64) || defined(_M_IX86))
+/***************mh_sha256_update***********/
+// mh_sha256_update_sse.c
+#define MH_SHA256_UPDATE_FUNCTION	mh_sha256_update_sse
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_sse
+#include "mh_sha256_update_base.c"
+#undef MH_SHA256_UPDATE_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+// mh_sha256_update_avx.c
+#define MH_SHA256_UPDATE_FUNCTION	mh_sha256_update_avx
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_avx
+#include "mh_sha256_update_base.c"
+#undef MH_SHA256_UPDATE_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+// mh_sha256_update_avx2.c
+#define MH_SHA256_UPDATE_FUNCTION	mh_sha256_update_avx2
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_avx2
+#include "mh_sha256_update_base.c"
+#undef MH_SHA256_UPDATE_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+/***************mh_sha256_finalize AND mh_sha256_tail***********/
+// mh_sha256_tail is used to calculate the last incomplete src data block
+// mh_sha256_finalize is a mh_sha256_ctx wrapper of mh_sha256_tail
+
+// mh_sha256_finalize_sse.c and mh_sha256_tail_sse.c
+#define MH_SHA256_FINALIZE_FUNCTION	mh_sha256_finalize_sse
+#define MH_SHA256_TAIL_FUNCTION		mh_sha256_tail_sse
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_sse
+#include "mh_sha256_finalize_base.c"
+#undef MH_SHA256_FINALIZE_FUNCTION
+#undef MH_SHA256_TAIL_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+// mh_sha256_finalize_avx.c and mh_sha256_tail_avx.c
+#define MH_SHA256_FINALIZE_FUNCTION	mh_sha256_finalize_avx
+#define MH_SHA256_TAIL_FUNCTION		mh_sha256_tail_avx
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_avx
+#include "mh_sha256_finalize_base.c"
+#undef MH_SHA256_FINALIZE_FUNCTION
+#undef MH_SHA256_TAIL_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+// mh_sha256_finalize_avx2.c and mh_sha256_tail_avx2.c
+#define MH_SHA256_FINALIZE_FUNCTION	mh_sha256_finalize_avx2
+#define MH_SHA256_TAIL_FUNCTION		mh_sha256_tail_avx2
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_avx2
+#include "mh_sha256_finalize_base.c"
+#undef MH_SHA256_FINALIZE_FUNCTION
+#undef MH_SHA256_TAIL_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+	uint16_t snum;
+	uint8_t ver;
+	uint8_t core;
+};
+// Version info
+struct slver mh_sha256_init_slver_000002b1;
+struct slver mh_sha256_init_slver = { 0x02b1, 0x00, 0x00 };
+
+// mh_sha256_update version info
+struct slver mh_sha256_update_sse_slver_000002b4;
+struct slver mh_sha256_update_sse_slver = { 0x02b4, 0x00, 0x00 };
+
+struct slver mh_sha256_update_avx_slver_020002b6;
+struct slver mh_sha256_update_avx_slver = { 0x02b6, 0x00, 0x02 };
+
+struct slver mh_sha256_update_avx2_slver_040002b8;
+struct slver mh_sha256_update_avx2_slver = { 0x02b8, 0x00, 0x04 };
+
+// mh_sha256_finalize version info
+struct slver mh_sha256_finalize_sse_slver_000002b5;
+struct slver mh_sha256_finalize_sse_slver = { 0x02b5, 0x00, 0x00 };
+
+struct slver mh_sha256_finalize_avx_slver_020002b7;
+struct slver mh_sha256_finalize_avx_slver = { 0x02b7, 0x00, 0x02 };
+
+struct slver mh_sha256_finalize_avx2_slver_040002b9;
+struct slver mh_sha256_finalize_avx2_slver = { 0x02b9, 0x00, 0x04 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_avx512.c
new file mode 100644
index 000000000..35fb0fbad
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_avx512.c
@@ -0,0 +1,70 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha256_internal.h"
+
+#ifdef HAVE_AS_KNOWS_AVX512
+
+/***************mh_sha256_update***********/
+// mh_sha256_update_avx512.c
+#define MH_SHA256_UPDATE_FUNCTION mh_sha256_update_avx512
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_avx512
+#include "mh_sha256_update_base.c"
+#undef MH_SHA256_UPDATE_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+/***************mh_sha256_finalize AND mh_sha256_tail***********/
+// mh_sha256_tail is used to calculate the last incomplete src data block
+// mh_sha256_finalize is a mh_sha256_ctx wrapper of mh_sha256_tail
+// mh_sha256_finalize_avx512.c and mh_sha256_tail_avx512.c
+#define MH_SHA256_FINALIZE_FUNCTION	mh_sha256_finalize_avx512
+#define MH_SHA256_TAIL_FUNCTION		mh_sha256_tail_avx512
+#define MH_SHA256_BLOCK_FUNCTION		mh_sha256_block_avx512
+#include "mh_sha256_finalize_base.c"
+#undef MH_SHA256_FINALIZE_FUNCTION
+#undef MH_SHA256_TAIL_FUNCTION
+#undef MH_SHA256_BLOCK_FUNCTION
+
+/***************version info***********/
+struct slver {
+	uint16_t snum;
+	uint8_t ver;
+	uint8_t core;
+};
+
+// mh_sha256_update version info
+struct slver mh_sha256_update_avx512_slver_060002bc;
+struct slver mh_sha256_update_avx512_slver = { 0x02bc, 0x00, 0x06 };
+
+// mh_sha256_finalize version info
+struct slver mh_sha256_finalize_avx512_slver_060002bd;
+struct slver mh_sha256_finalize_avx512_slver = { 0x02bd, 0x00, 0x06 };
+
+#endif // HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_base_aliases.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_base_aliases.c
new file mode 100644
index 000000000..343ffb024
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_base_aliases.c
@@ -0,0 +1,40 @@
+/**********************************************************************
+  Copyright(c) 2019 Arm Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Arm Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include "mh_sha256_internal.h"
+#include <string.h>
+int mh_sha256_update(struct mh_sha256_ctx *ctx, const void *buffer, uint32_t len)
+{
+	return mh_sha256_update_base(ctx, buffer, len);
+
+}
+
+int mh_sha256_finalize(struct mh_sha256_ctx *ctx, void *mh_sha256_digest)
+{
+	return mh_sha256_finalize_base(ctx, mh_sha256_digest);
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx.asm
new file mode 100644
index 000000000..c2eff350d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx.asm
@@ -0,0 +1,557 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA256 using AVX
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0  rdi
+ %define arg1  rsi
+ %define arg2  rdx
+ %define arg3  rcx
+
+ %define arg4  r8
+ %define arg5  r9
+
+ %define tmp1  r10
+ %define tmp2  r11
+ %define tmp3  r12		; must be saved and restored
+ %define tmp4  r13		; must be saved and restored
+ %define tmp5  r14		; must be saved and restored
+ %define tmp6  r15		; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0   rcx
+ %define arg1   rdx
+ %define arg2   r8
+ %define arg3   r9
+
+ %define arg4   r10
+ %define arg5   r11
+ %define tmp1   r12		; must be saved and restored
+ %define tmp2   r13		; must be saved and restored
+ %define tmp3   r14		; must be saved and restored
+ %define tmp4   r15		; must be saved and restored
+ %define tmp5   rdi		; must be saved and restored
+ %define tmp6   rsi		; must be saved and restored
+ %define return rax
+
+ %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+	alloc_stack	stack_size
+	save_xmm128	xmm6, 0*16
+	save_xmm128	xmm7, 1*16
+	save_xmm128	xmm8, 2*16
+	save_xmm128	xmm9, 3*16
+	save_xmm128	xmm10, 4*16
+	save_xmm128	xmm11, 5*16
+	save_xmm128	xmm12, 6*16
+	save_xmm128	xmm13, 7*16
+	save_xmm128	xmm14, 8*16
+	save_xmm128	xmm15, 9*16
+	save_reg	r12,  10*16 + 0*8
+	save_reg	r13,  10*16 + 1*8
+	save_reg	r14,  10*16 + 2*8
+	save_reg	r15,  10*16 + 3*8
+	save_reg	rdi,  10*16 + 4*8
+	save_reg	rsi,  10*16 + 5*8
+	end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+	movdqa	xmm6, [rsp + 0*16]
+	movdqa	xmm7, [rsp + 1*16]
+	movdqa	xmm8, [rsp + 2*16]
+	movdqa	xmm9, [rsp + 3*16]
+	movdqa	xmm10, [rsp + 4*16]
+	movdqa	xmm11, [rsp + 5*16]
+	movdqa	xmm12, [rsp + 6*16]
+	movdqa	xmm13, [rsp + 7*16]
+	movdqa	xmm14, [rsp + 8*16]
+	movdqa	xmm15, [rsp + 9*16]
+	mov	r12,  [rsp + 10*16 + 0*8]
+	mov	r13,  [rsp + 10*16 + 1*8]
+	mov	r14,  [rsp + 10*16 + 2*8]
+	mov	r15,  [rsp + 10*16 + 3*8]
+	mov	rdi,  [rsp + 10*16 + 4*8]
+	mov	rsi,  [rsp + 10*16 + 5*8]
+	add	rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops 		arg3
+;variables of mh_sha256
+%define mh_in_p  	arg0
+%define mh_digests_p 	arg1
+%define mh_data_p	arg2
+%define mh_segs  	tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE	tmp2
+%define FRAMESZ 	4*8*16		;BYTES*DWORDS*SEGS
+
+; Common definitions
+%define ROUND	tmp4
+%define TBL	tmp5
+
+%define pref	tmp3
+%macro PREFETCH_X 1
+%define %%mem  %1
+	prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS  vmovups
+
+%define SZ	4
+%define SZ4	4*SZ
+%define ROUNDS 64*SZ4
+
+%define a xmm0
+%define b xmm1
+%define c xmm2
+%define d xmm3
+%define e xmm4
+%define f xmm5
+%define g xmm6
+%define h xmm7
+
+%define a0 xmm8
+%define a1 xmm9
+%define a2 xmm10
+
+%define TT0 xmm14
+%define TT1 xmm13
+%define TT2 xmm12
+%define TT3 xmm11
+%define TT4 xmm10
+%define TT5 xmm9
+
+%define T1  xmm14
+%define TMP xmm15
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro ROTATE_ARGS 0
+%xdefine TMP_ h
+%xdefine h g
+%xdefine g f
+%xdefine f e
+%xdefine e d
+%xdefine d c
+%xdefine c b
+%xdefine b a
+%xdefine a TMP_
+%endm
+
+; PRORD reg, imm, tmp
+%macro PRORD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+	vpslld	%%tmp, %%reg, (32-(%%imm))
+	vpsrld	%%reg, %%reg, %%imm
+	vpor	%%reg, %%reg, %%tmp
+%endmacro
+
+; non-destructive
+; PRORD_nd reg, imm, tmp, src
+%macro PRORD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+	vpslld	%%tmp, %%src, (32-(%%imm))
+	vpsrld	%%reg, %%src, %%imm
+	vpor	%%reg, %%reg, %%tmp
+%endmacro
+
+; PRORD dst/src, amt
+%macro PRORD 2
+	PRORD	%1, %2, TMP
+%endmacro
+
+; PRORD_nd dst, src, amt
+%macro PRORD_nd 3
+	PRORD_nd	%1, %3, TMP, %2
+%endmacro
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_00_15_R 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	PRORD_nd	a0, e, (11-6)	; sig1: a0 = (e >> 5)
+
+	vpxor	a2, f, g	; ch: a2 = f^g
+	vpand	a2, e		; ch: a2 = (f^g)&e
+	vpxor	a2, g		; a2 = ch
+
+	PRORD_nd	a1, e, 25		; sig1: a1 = (e >> 25)
+	vmovdqa	%%T1, [SZ4*(%%i&0xf) + %%data]
+	vpaddd	%%T1, %%T1, [TBL + ROUND]	; T1 = W + K
+	vpxor	a0, a0, e	; sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		; sig1: a0 = (e >> 6) ^ (e >> 11)
+	vpaddd	h, h, a2	; h = h + ch
+	PRORD_nd	a2, a, (13-2)	; sig0: a2 = (a >> 11)
+	vpaddd	h, h, %%T1	; h = h + ch + W + K
+	vpxor	a0, a0, a1	; a0 = sigma1
+	PRORD_nd	a1, a, 22	; sig0: a1 = (a >> 22)
+	vpxor	%%T1, a, c	; maj: T1 = a^c
+	add	ROUND, SZ4	; ROUND++
+	vpand	%%T1, %%T1, b	; maj: T1 = (a^c)&b
+	vpaddd	h, h, a0
+
+	vpaddd	d, d, h
+
+	vpxor	a2, a2, a	; sig0: a2 = a ^ (a >> 11)
+	PRORD	a2, 2		; sig0: a2 = (a >> 2) ^ (a >> 13)
+	vpxor	a2, a2, a1	; a2 = sig0
+	vpand	a1, a, c	; maj: a1 = a&c
+	vpor	a1, a1, %%T1	; a1 = maj
+	vpaddd	h, h, a1	; h = h + ch + W + K + maj
+	vpaddd	h, h, a2	; h = h + ch + W + K + maj + sigma0
+
+	ROTATE_ARGS
+%endm
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_00_15_W 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	PRORD_nd	a0, e, (11-6)	; sig1: a0 = (e >> 5)
+
+	vpxor	a2, f, g	; ch: a2 = f^g
+	vpand	a2, e		; ch: a2 = (f^g)&e
+	vpxor	a2, g		; a2 = ch
+
+	PRORD_nd	a1, e, 25		; sig1: a1 = (e >> 25)
+	vmovdqa	[SZ4*(%%i&0xf) + %%data], %%T1
+	vpaddd	%%T1, %%T1, [TBL + ROUND]	; T1 = W + K
+	vpxor	a0, a0, e	; sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		; sig1: a0 = (e >> 6) ^ (e >> 11)
+	vpaddd	h, h, a2	; h = h + ch
+	PRORD_nd	a2, a, (13-2)	; sig0: a2 = (a >> 11)
+	vpaddd	h, h, %%T1	; h = h + ch + W + K
+	vpxor	a0, a0, a1	; a0 = sigma1
+	PRORD_nd	a1, a, 22	; sig0: a1 = (a >> 22)
+	vpxor	%%T1, a, c	; maj: T1 = a^c
+	add	ROUND, SZ4	; ROUND++
+	vpand	%%T1, %%T1, b	; maj: T1 = (a^c)&b
+	vpaddd	h, h, a0
+
+	vpaddd	d, d, h
+
+	vpxor	a2, a2, a	; sig0: a2 = a ^ (a >> 11)
+	PRORD	a2, 2		; sig0: a2 = (a >> 2) ^ (a >> 13)
+	vpxor	a2, a2, a1	; a2 = sig0
+	vpand	a1, a, c	; maj: a1 = a&c
+	vpor	a1, a1, %%T1	; a1 = maj
+	vpaddd	h, h, a1	; h = h + ch + W + K + maj
+	vpaddd	h, h, a2	; h = h + ch + W + K + maj + sigma0
+
+	ROTATE_ARGS
+%endm
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_16_XX 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	vmovdqa	%%T1, [SZ4*((%%i-15)&0xf) + %%data]
+	vmovdqa	a1, [SZ4*((%%i-2)&0xf) + %%data]
+	vmovdqa	a0, %%T1
+	PRORD	%%T1, 18-7
+	vmovdqa	a2, a1
+	PRORD	a1, 19-17
+	vpxor	%%T1, %%T1, a0
+	PRORD	%%T1, 7
+	vpxor	a1, a1, a2
+	PRORD	a1, 17
+	vpsrld	a0, a0, 3
+	vpxor	%%T1, %%T1, a0
+	vpsrld	a2, a2, 10
+	vpxor	a1, a1, a2
+	vpaddd	%%T1, %%T1, [SZ4*((%%i-16)&0xf) + %%data]
+	vpaddd	a1, a1, [SZ4*((%%i-7)&0xf) + %%data]
+	vpaddd	%%T1, %%T1, a1
+
+	ROUND_00_15_W %%T1, %%i, %%data
+%endm
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a  | b  |  c | ...|  p | (16)
+; h0 | h0 | h0 | ...| h0 |    | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 |    | Ba| Bb | Bc |...| Bp |
+; ....
+; h7 | h7 | h7 | ...| h7 |    | Ha| Hb | Hc |...| Hp |
+
+align 32
+
+;void mh_sha256_block_avx(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+;		uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][8])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number  of 1KB blocks
+;
+mk_global mh_sha256_block_avx, function, internal
+func(mh_sha256_block_avx)
+	endbranch
+	FUNC_SAVE
+	; save rsp
+	mov	RSP_SAVE, rsp
+
+	cmp	loops, 0
+	jle	.return
+
+	; leave enough space to store segs_digests
+	sub     rsp, FRAMESZ
+	; align rsp to 16 Bytes needed by avx
+	and	rsp, ~0x0F
+	lea	TBL,[TABLE]
+
+ %assign I 0					; copy segs_digests into stack
+ %rep 8
+	VMOVPS  a, [mh_digests_p + I*64 + 16*0]
+	VMOVPS  b, [mh_digests_p + I*64 + 16*1]
+	VMOVPS  c, [mh_digests_p + I*64 + 16*2]
+	VMOVPS  d, [mh_digests_p + I*64 + 16*3]
+
+	vmovdqa [rsp + I*64 + 16*0], a
+	vmovdqa [rsp + I*64 + 16*1], b
+	vmovdqa [rsp + I*64 + 16*2], c
+	vmovdqa [rsp + I*64 + 16*3], d
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+	;transform to big-endian data and store on aligned_frame
+	vmovdqa  TMP, [PSHUFFLE_BYTE_FLIP_MASK]
+	;transform input data from DWORD*16_SEGS*8 to DWORD*4_SEGS*8*4
+ %assign I 0
+ %rep 16
+	VMOVPS   TT0,[mh_in_p + I*64+0*16]
+	VMOVPS   TT1,[mh_in_p + I*64+1*16]
+	VMOVPS   TT2,[mh_in_p + I*64+2*16]
+	VMOVPS   TT3,[mh_in_p + I*64+3*16]
+
+	vpshufb  TT0, TMP
+	vmovdqa  [mh_data_p +(I)*16 +0*256],TT0
+	vpshufb  TT1, TMP
+	vmovdqa  [mh_data_p +(I)*16 +1*256],TT1
+	vpshufb  TT2, TMP
+	vmovdqa  [mh_data_p +(I)*16 +2*256],TT2
+	vpshufb  TT3, TMP
+	vmovdqa  [mh_data_p +(I)*16 +3*256],TT3
+ %assign I (I+1)
+ %endrep
+
+	mov	mh_segs, 0			;start from the first 4 segments
+	mov	pref, 1024			;avoid prefetch repeadtedly
+ .segs_loop:
+	xor	ROUND, ROUND
+	;; Initialize digests
+	vmovdqa  a, [rsp + 0*64 + mh_segs]
+	vmovdqa  b, [rsp + 1*64 + mh_segs]
+	vmovdqa  c, [rsp + 2*64 + mh_segs]
+	vmovdqa  d, [rsp + 3*64 + mh_segs]
+	vmovdqa  e, [rsp + 4*64 + mh_segs]
+	vmovdqa  f, [rsp + 5*64 + mh_segs]
+	vmovdqa  g, [rsp + 6*64 + mh_segs]
+	vmovdqa  h, [rsp + 7*64 + mh_segs]
+
+  %assign i 0
+  %rep 4
+	ROUND_00_15_R	TT0, (i*4+0), mh_data_p
+	ROUND_00_15_R	TT1, (i*4+1), mh_data_p
+	ROUND_00_15_R	TT2, (i*4+2), mh_data_p
+	ROUND_00_15_R	TT3, (i*4+3), mh_data_p
+  %assign i (i+1)
+  %endrep
+	PREFETCH_X [mh_in_p + pref+128*0]
+
+  %assign i 16
+  %rep 48
+	%if i = 48
+		PREFETCH_X [mh_in_p + pref+128*1]
+	%endif
+	ROUND_16_XX	T1, i, mh_data_p
+  %assign i (i+1)
+  %endrep
+
+	;; add old digest
+	vpaddd	a, a, [rsp + 0*64 + mh_segs]
+	vpaddd	b, b, [rsp + 1*64 + mh_segs]
+	vpaddd	c, c, [rsp + 2*64 + mh_segs]
+	vpaddd	d, d, [rsp + 3*64 + mh_segs]
+	vpaddd	e, e, [rsp + 4*64 + mh_segs]
+	vpaddd	f, f, [rsp + 5*64 + mh_segs]
+	vpaddd	g, g, [rsp + 6*64 + mh_segs]
+	vpaddd	h, h, [rsp + 7*64 + mh_segs]
+
+	; write out digests
+	vmovdqa  [rsp + 0*64 + mh_segs], a
+	vmovdqa  [rsp + 1*64 + mh_segs], b
+	vmovdqa  [rsp + 2*64 + mh_segs], c
+	vmovdqa  [rsp + 3*64 + mh_segs], d
+	vmovdqa  [rsp + 4*64 + mh_segs], e
+	vmovdqa  [rsp + 5*64 + mh_segs], f
+	vmovdqa  [rsp + 6*64 + mh_segs], g
+	vmovdqa  [rsp + 7*64 + mh_segs], h
+
+	add	pref,      256
+	add	mh_data_p, 256
+	add 	mh_segs,   16
+	cmp	mh_segs,   64
+	jc 	.segs_loop
+
+	sub	mh_data_p, (1024)
+	add 	mh_in_p,   (1024)
+	sub     loops,     1
+	jne     .block_loop
+
+ %assign I 0					; copy segs_digests back to mh_digests_p
+ %rep 8
+	vmovdqa a, [rsp + I*64 + 16*0]
+	vmovdqa b, [rsp + I*64 + 16*1]
+	vmovdqa c, [rsp + I*64 + 16*2]
+	vmovdqa d, [rsp + I*64 + 16*3]
+
+	VMOVPS  [mh_digests_p + I*64 + 16*0], a
+	VMOVPS  [mh_digests_p + I*64 + 16*1], b
+	VMOVPS  [mh_digests_p + I*64 + 16*2], c
+	VMOVPS  [mh_digests_p + I*64 + 16*3], d
+ %assign I (I+1)
+ %endrep
+	mov	rsp, RSP_SAVE			; restore rsp
+
+.return:
+	FUNC_RESTORE
+	ret
+
+endproc_frame
+
+section .data align=64
+
+align 64
+TABLE:
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx2.asm
new file mode 100644
index 000000000..c2b3f2c59
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx2.asm
@@ -0,0 +1,616 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA256 using AVX-2
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0  rdi
+ %define arg1  rsi
+ %define arg2  rdx
+ %define arg3  rcx
+
+ %define arg4  r8
+ %define arg5  r9
+
+ %define tmp1  r10
+ %define tmp2  r11
+ %define tmp3  r12		; must be saved and restored
+ %define tmp4  r13		; must be saved and restored
+ %define tmp5  r14		; must be saved and restored
+ %define tmp6  r15		; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0   rcx
+ %define arg1   rdx
+ %define arg2   r8
+ %define arg3   r9
+
+ %define arg4   r10
+ %define arg5   r11
+ %define tmp1   r12		; must be saved and restored
+ %define tmp2   r13		; must be saved and restored
+ %define tmp3   r14		; must be saved and restored
+ %define tmp4   r15		; must be saved and restored
+ %define tmp5   rdi		; must be saved and restored
+ %define tmp6   rsi		; must be saved and restored
+ %define return rax
+
+ %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+	alloc_stack	stack_size
+	save_xmm128	xmm6, 0*16
+	save_xmm128	xmm7, 1*16
+	save_xmm128	xmm8, 2*16
+	save_xmm128	xmm9, 3*16
+	save_xmm128	xmm10, 4*16
+	save_xmm128	xmm11, 5*16
+	save_xmm128	xmm12, 6*16
+	save_xmm128	xmm13, 7*16
+	save_xmm128	xmm14, 8*16
+	save_xmm128	xmm15, 9*16
+	save_reg	r12,  10*16 + 0*8
+	save_reg	r13,  10*16 + 1*8
+	save_reg	r14,  10*16 + 2*8
+	save_reg	r15,  10*16 + 3*8
+	save_reg	rdi,  10*16 + 4*8
+	save_reg	rsi,  10*16 + 5*8
+	end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+	movdqa	xmm6, [rsp + 0*16]
+	movdqa	xmm7, [rsp + 1*16]
+	movdqa	xmm8, [rsp + 2*16]
+	movdqa	xmm9, [rsp + 3*16]
+	movdqa	xmm10, [rsp + 4*16]
+	movdqa	xmm11, [rsp + 5*16]
+	movdqa	xmm12, [rsp + 6*16]
+	movdqa	xmm13, [rsp + 7*16]
+	movdqa	xmm14, [rsp + 8*16]
+	movdqa	xmm15, [rsp + 9*16]
+	mov	r12,  [rsp + 10*16 + 0*8]
+	mov	r13,  [rsp + 10*16 + 1*8]
+	mov	r14,  [rsp + 10*16 + 2*8]
+	mov	r15,  [rsp + 10*16 + 3*8]
+	mov	rdi,  [rsp + 10*16 + 4*8]
+	mov	rsi,  [rsp + 10*16 + 5*8]
+	add	rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops 		arg3
+;variables of mh_sha256
+%define mh_in_p  	arg0
+%define mh_digests_p 	arg1
+%define mh_data_p	arg2
+%define mh_segs  	tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE	tmp2
+%define FRAMESZ 	4*8*16		;BYTES*DWORDS*SEGS
+
+; Common definitions
+%define ROUND	tmp4
+%define TBL	tmp5
+
+%define pref	tmp3
+%macro PREFETCH_X 1
+%define %%mem  %1
+	prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS  vmovups
+
+%define SZ	4
+%define SZ8	8*SZ
+%define ROUNDS 64*SZ8
+
+%define a ymm0
+%define b ymm1
+%define c ymm2
+%define d ymm3
+%define e ymm4
+%define f ymm5
+%define g ymm6
+%define h ymm7
+
+%define a0 ymm8
+%define a1 ymm9
+%define a2 ymm10
+
+%define TT0 ymm14
+%define TT1 ymm13
+%define TT2 ymm12
+%define TT3 ymm11
+%define TT4 ymm10
+%define TT5 ymm9
+
+%define T1  ymm14
+%define TMP ymm15
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro ROTATE_ARGS 0
+%xdefine TMP_ h
+%xdefine h g
+%xdefine g f
+%xdefine f e
+%xdefine e d
+%xdefine d c
+%xdefine c b
+%xdefine b a
+%xdefine a TMP_
+%endm
+
+; PRORD reg, imm, tmp
+%macro PRORD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+	vpslld	%%tmp, %%reg, (32-(%%imm))
+	vpsrld	%%reg, %%reg, %%imm
+	vpor	%%reg, %%reg, %%tmp
+%endmacro
+
+; non-destructive
+; PRORD_nd reg, imm, tmp, src
+%macro PRORD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+	vpslld	%%tmp, %%src, (32-(%%imm))
+	vpsrld	%%reg, %%src, %%imm
+	vpor	%%reg, %%reg, %%tmp
+%endmacro
+
+; PRORD dst/src, amt
+%macro PRORD 2
+	PRORD	%1, %2, TMP
+%endmacro
+
+; PRORD_nd dst, src, amt
+%macro PRORD_nd 3
+	PRORD_nd	%1, %3, TMP, %2
+%endmacro
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_00_15_R 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	PRORD_nd	a0, e, (11-6)	; sig1: a0 = (e >> 5)
+
+	vpxor	a2, f, g	; ch: a2 = f^g
+	vpand	a2, a2, e		; ch: a2 = (f^g)&e
+	vpxor	a2, a2, g		; a2 = ch
+
+	PRORD_nd	a1, e, 25		; sig1: a1 = (e >> 25)
+	vmovdqa	%%T1, [SZ8*(%%i&0xf) + %%data]
+	vpaddd	%%T1, %%T1, [TBL + ROUND]	; T1 = W + K
+	vpxor	a0, a0, e	; sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		; sig1: a0 = (e >> 6) ^ (e >> 11)
+	vpaddd	h, h, a2	; h = h + ch
+	PRORD_nd	a2, a, (13-2)	; sig0: a2 = (a >> 11)
+	vpaddd	h, h, %%T1	; h = h + ch + W + K
+	vpxor	a0, a0, a1	; a0 = sigma1
+	PRORD_nd	a1, a, 22	; sig0: a1 = (a >> 22)
+	vpxor	%%T1, a, c	; maj: T1 = a^c
+	add	ROUND, SZ8	; ROUND++
+	vpand	%%T1, %%T1, b	; maj: T1 = (a^c)&b
+	vpaddd	h, h, a0
+
+	vpaddd	d, d, h
+
+	vpxor	a2, a2, a	; sig0: a2 = a ^ (a >> 11)
+	PRORD	a2, 2		; sig0: a2 = (a >> 2) ^ (a >> 13)
+	vpxor	a2, a2, a1	; a2 = sig0
+	vpand	a1, a, c	; maj: a1 = a&c
+	vpor	a1, a1, %%T1	; a1 = maj
+	vpaddd	h, h, a1	; h = h + ch + W + K + maj
+	vpaddd	h, h, a2	; h = h + ch + W + K + maj + sigma0
+
+	ROTATE_ARGS
+%endm
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_00_15_W 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	PRORD_nd	a0, e, (11-6)	; sig1: a0 = (e >> 5)
+
+	vpxor	a2, f, g	; ch: a2 = f^g
+	vpand	a2, a2, e		; ch: a2 = (f^g)&e
+	vpxor	a2, a2, g		; a2 = ch
+
+	PRORD_nd	a1, e, 25		; sig1: a1 = (e >> 25)
+	vmovdqa	[SZ8*(%%i&0xf) + %%data], %%T1
+	vpaddd	%%T1, %%T1, [TBL + ROUND]	; T1 = W + K
+	vpxor	a0, a0, e	; sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		; sig1: a0 = (e >> 6) ^ (e >> 11)
+	vpaddd	h, h, a2	; h = h + ch
+	PRORD_nd	a2, a, (13-2)	; sig0: a2 = (a >> 11)
+	vpaddd	h, h, %%T1	; h = h + ch + W + K
+	vpxor	a0, a0, a1	; a0 = sigma1
+	PRORD_nd	a1, a, 22	; sig0: a1 = (a >> 22)
+	vpxor	%%T1, a, c	; maj: T1 = a^c
+	add	ROUND, SZ8	; ROUND++
+	vpand	%%T1, %%T1, b	; maj: T1 = (a^c)&b
+	vpaddd	h, h, a0
+
+	vpaddd	d, d, h
+
+	vpxor	a2, a2, a	; sig0: a2 = a ^ (a >> 11)
+	PRORD	a2, 2		; sig0: a2 = (a >> 2) ^ (a >> 13)
+	vpxor	a2, a2, a1	; a2 = sig0
+	vpand	a1, a, c	; maj: a1 = a&c
+	vpor	a1, a1, %%T1	; a1 = maj
+	vpaddd	h, h, a1	; h = h + ch + W + K + maj
+	vpaddd	h, h, a2	; h = h + ch + W + K + maj + sigma0
+
+	ROTATE_ARGS
+%endm
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_16_XX 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	vmovdqa	%%T1, [SZ8*((%%i-15)&0xf) + %%data]
+	vmovdqa	a1, [SZ8*((%%i-2)&0xf) + %%data]
+	vmovdqa	a0, %%T1
+	PRORD	%%T1, 18-7
+	vmovdqa	a2, a1
+	PRORD	a1, 19-17
+	vpxor	%%T1, %%T1, a0
+	PRORD	%%T1, 7
+	vpxor	a1, a1, a2
+	PRORD	a1, 17
+	vpsrld	a0, a0, 3
+	vpxor	%%T1, %%T1, a0
+	vpsrld	a2, a2, 10
+	vpxor	a1, a1, a2
+	vpaddd	%%T1, %%T1, [SZ8*((%%i-16)&0xf) + %%data]
+	vpaddd	a1, a1, [SZ8*((%%i-7)&0xf) + %%data]
+	vpaddd	%%T1, %%T1, a1
+
+	ROUND_00_15_W %%T1, %%i, %%data
+%endm
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a  | b  |  c | ...|  p | (16)
+; h0 | h0 | h0 | ...| h0 |    | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 |    | Ba| Bb | Bc |...| Bp |
+; ....
+; h7 | h7 | h7 | ...| h7 |    | Ha| Hb | Hc |...| Hp |
+
+align 32
+
+;void mh_sha256_block_avx2(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+;		uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][8])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number  of 1KB blocks
+;
+mk_global mh_sha256_block_avx2, function, internal
+func(mh_sha256_block_avx2)
+	endbranch
+	FUNC_SAVE
+	; save rsp
+	mov	RSP_SAVE, rsp
+
+	cmp	loops, 0
+	jle	.return
+
+	; leave enough space to store segs_digests
+	sub     rsp, FRAMESZ
+	; align rsp to 32 Bytes needed by avx2
+	and	rsp, ~0x1F
+	lea	TBL,[TABLE]
+
+ %assign I 0					; copy segs_digests into stack
+ %rep 4
+	VMOVPS  a, [mh_digests_p + I*64*2 + 32*0]
+	VMOVPS  b, [mh_digests_p + I*64*2 + 32*1]
+	VMOVPS  c, [mh_digests_p + I*64*2 + 32*2]
+	VMOVPS  d, [mh_digests_p + I*64*2 + 32*3]
+
+	vmovdqa [rsp + I*64*2 + 32*0], a
+	vmovdqa [rsp + I*64*2 + 32*1], b
+	vmovdqa [rsp + I*64*2 + 32*2], c
+	vmovdqa [rsp + I*64*2 + 32*3], d
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+	;transform to big-endian data and store on aligned_frame
+	vmovdqa  TMP, [PSHUFFLE_BYTE_FLIP_MASK]
+	;transform input data from DWORD*16_SEGS*8 to DWORD*8_SEGS*8*2
+ %assign I 0
+ %rep 16
+	VMOVPS   TT0,[mh_in_p + I*64+0*32]
+	VMOVPS   TT1,[mh_in_p + I*64+1*32]
+
+	vpshufb  TT0, TT0, TMP
+	vmovdqa  [mh_data_p +I*32 +0*512],TT0
+	vpshufb  TT1, TT1, TMP
+	vmovdqa  [mh_data_p +I*32 +1*512],TT1
+ %assign I (I+1)
+ %endrep
+
+	mov	mh_segs, 0			;start from the first 8 segments
+	mov	pref, 1024			;avoid prefetch repeadtedly
+ .segs_loop:
+	xor	ROUND, ROUND
+	;; Initialize digests
+	vmovdqa  a, [rsp + 0*64 + mh_segs]
+	vmovdqa  b, [rsp + 1*64 + mh_segs]
+	vmovdqa  c, [rsp + 2*64 + mh_segs]
+	vmovdqa  d, [rsp + 3*64 + mh_segs]
+	vmovdqa  e, [rsp + 4*64 + mh_segs]
+	vmovdqa  f, [rsp + 5*64 + mh_segs]
+	vmovdqa  g, [rsp + 6*64 + mh_segs]
+	vmovdqa  h, [rsp + 7*64 + mh_segs]
+
+  %assign i 0
+  %rep 4
+	ROUND_00_15_R	TT0, (i*4+0), mh_data_p
+	ROUND_00_15_R	TT1, (i*4+1), mh_data_p
+	ROUND_00_15_R	TT2, (i*4+2), mh_data_p
+	ROUND_00_15_R	TT3, (i*4+3), mh_data_p
+  %assign i (i+1)
+  %endrep
+	PREFETCH_X [mh_in_p + pref+128*0]
+
+  %assign i 16
+  %rep 48
+	ROUND_16_XX	T1, i, mh_data_p
+	%if i % 16 = 8
+		PREFETCH_X [mh_in_p + pref+128*(i/16)]
+	%endif
+  %assign i (i+1)
+  %endrep
+
+	;; add old digest
+	vpaddd	a, a, [rsp + 0*64 + mh_segs]
+	vpaddd	b, b, [rsp + 1*64 + mh_segs]
+	vpaddd	c, c, [rsp + 2*64 + mh_segs]
+	vpaddd	d, d, [rsp + 3*64 + mh_segs]
+	vpaddd	e, e, [rsp + 4*64 + mh_segs]
+	vpaddd	f, f, [rsp + 5*64 + mh_segs]
+	vpaddd	g, g, [rsp + 6*64 + mh_segs]
+	vpaddd	h, h, [rsp + 7*64 + mh_segs]
+
+	; write out digests
+	vmovdqa  [rsp + 0*64 + mh_segs], a
+	vmovdqa  [rsp + 1*64 + mh_segs], b
+	vmovdqa  [rsp + 2*64 + mh_segs], c
+	vmovdqa  [rsp + 3*64 + mh_segs], d
+	vmovdqa  [rsp + 4*64 + mh_segs], e
+	vmovdqa  [rsp + 5*64 + mh_segs], f
+	vmovdqa  [rsp + 6*64 + mh_segs], g
+	vmovdqa  [rsp + 7*64 + mh_segs], h
+
+	add	pref,      512
+	add	mh_data_p, 512
+	add 	mh_segs,   32
+	cmp	mh_segs,   64
+	jc 	.segs_loop
+
+	sub	mh_data_p, (1024)
+	add 	mh_in_p,   (1024)
+	sub     loops,     1
+	jne     .block_loop
+
+ %assign I 0					; copy segs_digests back to mh_digests_p
+ %rep 4
+	vmovdqa a, [rsp + I*64*2 + 32*0]
+	vmovdqa b, [rsp + I*64*2 + 32*1]
+	vmovdqa c, [rsp + I*64*2 + 32*2]
+	vmovdqa d, [rsp + I*64*2 + 32*3]
+
+	VMOVPS  [mh_digests_p + I*64*2 + 32*0], a
+	VMOVPS  [mh_digests_p + I*64*2 + 32*1], b
+	VMOVPS  [mh_digests_p + I*64*2 + 32*2], c
+	VMOVPS  [mh_digests_p + I*64*2 + 32*3], d
+ %assign I (I+1)
+ %endrep
+	mov	rsp, RSP_SAVE			; restore rsp
+
+.return:
+	FUNC_RESTORE
+	ret
+
+endproc_frame
+
+section .data align=64
+
+align 64
+TABLE:
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+			 dq 0x0405060700010203, 0x0c0d0e0f08090a0b
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx512.asm
new file mode 100644
index 000000000..1ee76ddfc
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_avx512.asm
@@ -0,0 +1,682 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA256 using AVX-512
+;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0  rdi
+ %define arg1  rsi
+ %define arg2  rdx
+ %define arg3  rcx
+
+ %define arg4  r8
+ %define arg5  r9
+
+ %define tmp1  r10
+ %define tmp2  r11
+ %define tmp3  r12		; must be saved and restored
+ %define tmp4  r13		; must be saved and restored
+ %define tmp5  r14		; must be saved and restored
+ %define tmp6  r15		; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0   rcx
+ %define arg1   rdx
+ %define arg2   r8
+ %define arg3   r9
+
+ %define arg4   r10
+ %define arg5   r11
+ %define tmp1   r12		; must be saved and restored
+ %define tmp2   r13		; must be saved and restored
+ %define tmp3   r14		; must be saved and restored
+ %define tmp4   r15		; must be saved and restored
+ %define tmp5   rdi		; must be saved and restored
+ %define tmp6   rsi		; must be saved and restored
+ %define return rax
+
+ %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+	alloc_stack	stack_size
+	save_xmm128	xmm6, 0*16
+	save_xmm128	xmm7, 1*16
+	save_xmm128	xmm8, 2*16
+	save_xmm128	xmm9, 3*16
+	save_xmm128	xmm10, 4*16
+	save_xmm128	xmm11, 5*16
+	save_xmm128	xmm12, 6*16
+	save_xmm128	xmm13, 7*16
+	save_xmm128	xmm14, 8*16
+	save_xmm128	xmm15, 9*16
+	save_reg	r12,  10*16 + 0*8
+	save_reg	r13,  10*16 + 1*8
+	save_reg	r14,  10*16 + 2*8
+	save_reg	r15,  10*16 + 3*8
+	save_reg	rdi,  10*16 + 4*8
+	save_reg	rsi,  10*16 + 5*8
+	end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+	movdqa	xmm6, [rsp + 0*16]
+	movdqa	xmm7, [rsp + 1*16]
+	movdqa	xmm8, [rsp + 2*16]
+	movdqa	xmm9, [rsp + 3*16]
+	movdqa	xmm10, [rsp + 4*16]
+	movdqa	xmm11, [rsp + 5*16]
+	movdqa	xmm12, [rsp + 6*16]
+	movdqa	xmm13, [rsp + 7*16]
+	movdqa	xmm14, [rsp + 8*16]
+	movdqa	xmm15, [rsp + 9*16]
+	mov	r12,  [rsp + 10*16 + 0*8]
+	mov	r13,  [rsp + 10*16 + 1*8]
+	mov	r14,  [rsp + 10*16 + 2*8]
+	mov	r15,  [rsp + 10*16 + 3*8]
+	mov	rdi,  [rsp + 10*16 + 4*8]
+	mov	rsi,  [rsp + 10*16 + 5*8]
+	add	rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops 		arg3
+;variables of mh_sha256
+%define mh_in_p  	arg0
+%define mh_digests_p 	arg1
+%define mh_data_p	arg2
+;variables used by storing segs_digests on stack
+%define RSP_SAVE	tmp2
+%define FRAMESZ 	4*8*16		;BYTES*DWORDS*SEGS
+; Common definitions
+%define ROUND	tmp4
+%define TBL	tmp5
+
+%define pref	tmp3
+%macro PREFETCH_X 1
+%define %%mem  %1
+	prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS  vmovups
+
+%define A	zmm0
+%define B	zmm1
+%define C	zmm2
+%define D	zmm3
+%define E	zmm4
+%define F	zmm5
+%define G	zmm6
+%define H	zmm7
+%define T1	zmm8
+%define TMP0	zmm9
+%define TMP1	zmm10
+%define TMP2	zmm11
+%define TMP3	zmm12
+%define TMP4	zmm13
+%define TMP5	zmm14
+%define TMP6	zmm15
+
+%define W0	zmm16
+%define W1	zmm17
+%define W2	zmm18
+%define W3	zmm19
+%define W4	zmm20
+%define W5	zmm21
+%define W6	zmm22
+%define W7	zmm23
+%define W8	zmm24
+%define W9	zmm25
+%define W10	zmm26
+%define W11	zmm27
+%define W12	zmm28
+%define W13	zmm29
+%define W14	zmm30
+%define W15	zmm31
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro ROTATE_ARGS 0
+%xdefine TMP_ H
+%xdefine H G
+%xdefine G F
+%xdefine F E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define APPEND(a,b) a %+ b
+;;  CH(A, B, C) = (A&B) ^ (~A&C)
+;; MAJ(E, F, G) = (E&F) ^ (E&G) ^ (F&G)
+;; SIGMA0 = ROR_2  ^ ROR_13 ^ ROR_22
+;; SIGMA1 = ROR_6  ^ ROR_11 ^ ROR_25
+;; sigma0 = ROR_7  ^ ROR_18 ^ SHR_3
+;; sigma1 = ROR_17 ^ ROR_19 ^ SHR_10
+
+; Main processing loop per round
+%macro PROCESS_LOOP 2
+%define %%WT	%1
+%define %%ROUND	%2
+	;; T1 = H + SIGMA1(E) + CH(E, F, G) + Kt + Wt
+	;; T2 = SIGMA0(A) + MAJ(A, B, C)
+	;; H=G, G=F, F=E, E=D+T1, D=C, C=B, B=A, A=T1+T2
+
+	;; H becomes T2, then add T1 for A
+	;; D becomes D + T1 for E
+
+	vpaddd		T1, H, TMP3		; T1 = H + Kt
+	vmovdqa32	TMP0, E
+	vprord		TMP1, E, 6 		; ROR_6(E)
+	vprord		TMP2, E, 11 		; ROR_11(E)
+	vprord		TMP3, E, 25 		; ROR_25(E)
+	vpternlogd	TMP0, F, G, 0xCA	; TMP0 = CH(E,F,G)
+	vpaddd		T1, T1, %%WT		; T1 = T1 + Wt
+	vpternlogd	TMP1, TMP2, TMP3, 0x96	; TMP1 = SIGMA1(E)
+	vpaddd		T1, T1, TMP0		; T1 = T1 + CH(E,F,G)
+	vpaddd		T1, T1, TMP1		; T1 = T1 + SIGMA1(E)
+	vpaddd		D, D, T1		; D = D + T1
+
+	vprord		H, A, 2 		; ROR_2(A)
+	vprord		TMP2, A, 13 		; ROR_13(A)
+	vprord		TMP3, A, 22 		; ROR_22(A)
+	vmovdqa32	TMP0, A
+	vpternlogd	TMP0, B, C, 0xE8	; TMP0 = MAJ(A,B,C)
+	vpternlogd	H, TMP2, TMP3, 0x96	; H(T2) = SIGMA0(A)
+	vpaddd		H, H, TMP0		; H(T2) = SIGMA0(A) + MAJ(A,B,C)
+	vpaddd		H, H, T1		; H(A) = H(T2) + T1
+
+	vmovdqa32	TMP3, [TBL + ((%%ROUND+1)*64)]	; Next Kt
+
+	;; Rotate the args A-H (rotation of names associated with regs)
+	ROTATE_ARGS
+%endmacro
+
+%macro MSG_SCHED_ROUND_16_63 4
+%define %%WT	%1
+%define %%WTp1	%2
+%define %%WTp9	%3
+%define %%WTp14	%4
+	vprord		TMP4, %%WTp14, 17 	; ROR_17(Wt-2)
+	vprord		TMP5, %%WTp14, 19 	; ROR_19(Wt-2)
+	vpsrld		TMP6, %%WTp14, 10 	; SHR_10(Wt-2)
+	vpternlogd	TMP4, TMP5, TMP6, 0x96	; TMP4 = sigma1(Wt-2)
+
+	vpaddd		%%WT, %%WT, TMP4	; Wt = Wt-16 + sigma1(Wt-2)
+	vpaddd		%%WT, %%WT, %%WTp9	; Wt = Wt-16 + sigma1(Wt-2) + Wt-7
+
+	vprord		TMP4, %%WTp1, 7 	; ROR_7(Wt-15)
+	vprord		TMP5, %%WTp1, 18 	; ROR_18(Wt-15)
+	vpsrld		TMP6, %%WTp1, 3 	; SHR_3(Wt-15)
+	vpternlogd	TMP4, TMP5, TMP6, 0x96	; TMP4 = sigma0(Wt-15)
+
+	vpaddd		%%WT, %%WT, TMP4	; Wt = Wt-16 + sigma1(Wt-2) +
+						;      Wt-7 + sigma0(Wt-15) +
+%endmacro
+
+; Note this is reading in a block of data for one lane
+; When all 16 are read, the data must be transposed to build msg schedule
+%macro MSG_SCHED_ROUND_00_15 2
+%define %%WT	 %1
+%define %%OFFSET %2
+	mov		inp0, [IN + (%%OFFSET*8)]
+	vmovups		%%WT, [inp0+IDX]
+%endmacro
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a  | b  |  c | ...|  p | (16)
+; h0 | h0 | h0 | ...| h0 |    | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 |    | Ba| Bb | Bc |...| Bp |
+; ....
+; h7 | h7 | h7 | ...| h7 |    | Ha| Hb | Hc |...| Hp |
+
+[bits 64]
+section .text
+align 32
+
+;void mh_sha256_block_avx512(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+;		uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][8])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number  of 1KB blocks
+;
+global mh_sha256_block_avx512
+func(mh_sha256_block_avx512)
+	endbranch
+	FUNC_SAVE
+	; save rsp
+	mov	RSP_SAVE, rsp
+
+	cmp	loops, 0
+	jle	.return
+
+	; leave enough space to store segs_digests
+	sub     rsp, FRAMESZ
+	; align rsp to 64 Bytes needed by avx512
+	and	rsp, ~0x3F
+	lea	TBL,[TABLE]
+
+	; copy segs_digests into stack and ZMM
+	VMOVPS  A, [mh_digests_p + 64*0]
+	VMOVPS  B, [mh_digests_p + 64*1]
+	VMOVPS  C, [mh_digests_p + 64*2]
+	VMOVPS  D, [mh_digests_p + 64*3]
+	VMOVPS  E, [mh_digests_p + 64*4]
+	VMOVPS  F, [mh_digests_p + 64*5]
+	VMOVPS  G, [mh_digests_p + 64*6]
+	VMOVPS  H, [mh_digests_p + 64*7]
+
+.block_loop:
+	; Save digests for later addition
+	vmovdqa32 [rsp + 64*0], A
+	vmovdqa32 [rsp + 64*1], B
+	vmovdqa32 [rsp + 64*2], C
+	vmovdqa32 [rsp + 64*3], D
+	vmovdqa32 [rsp + 64*4], E
+	vmovdqa32 [rsp + 64*5], F
+	vmovdqa32 [rsp + 64*6], G
+	vmovdqa32 [rsp + 64*7], H
+
+	vmovdqa32	TMP3, [TBL]	; First K
+	;transform to big-endian data and store on aligned_frame
+	vmovdqa32	TMP2, [PSHUFFLE_BYTE_FLIP_MASK]
+	;using extra 16 ZMM registers instead of heap
+%assign I 0
+%rep 8
+%assign J (I+1)
+	VMOVPS	APPEND(W,I),[mh_in_p + I*64+0*64]
+	VMOVPS	APPEND(W,J),[mh_in_p + I*64+1*64]
+
+	vpshufb	APPEND(W,I), APPEND(W,I), TMP2
+	vpshufb	APPEND(W,J), APPEND(W,J), TMP2
+%assign I (I+2)
+%endrep
+
+	; MSG Schedule for W0-W15 is now complete in registers
+	; Process first 48 rounds
+	; Calculate next Wt+16 after processing is complete and Wt is unneeded
+
+	; PROCESS_LOOP_00_47 APPEND(W,J), I, APPEND(W,K), APPEND(W,L), APPEND(W,M)
+
+%assign I 0
+%assign J 0
+%assign K 1
+%assign L 9
+%assign M 14
+%rep 64
+	PROCESS_LOOP  APPEND(W,J),  I
+	%if I < 48
+	MSG_SCHED_ROUND_16_63  APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M)
+	%endif
+	%if I % 8 = 4
+		PREFETCH_X [mh_in_p + 1024+128*(I / 8)]
+	%endif
+%assign I (I+1)
+%assign J ((J+1)% 16)
+%assign K ((K+1)% 16)
+%assign L ((L+1)% 16)
+%assign M ((M+1)% 16)
+%endrep
+
+	;; add old digest
+	vpaddd	A, A, [rsp + 0*64]
+	vpaddd	B, B, [rsp + 1*64]
+	vpaddd	C, C, [rsp + 2*64]
+	vpaddd	D, D, [rsp + 3*64]
+	vpaddd	E, E, [rsp + 4*64]
+	vpaddd	F, F, [rsp + 5*64]
+	vpaddd	G, G, [rsp + 6*64]
+	vpaddd	H, H, [rsp + 7*64]
+
+	add 	mh_in_p,   1024
+	sub     loops, 1
+	jne     .block_loop
+
+	; copy segs_digests back to mh_digests_p
+
+	VMOVPS  [mh_digests_p + 64*0], A
+	VMOVPS  [mh_digests_p + 64*1], B
+	VMOVPS  [mh_digests_p + 64*2], C
+	VMOVPS  [mh_digests_p + 64*3], D
+	VMOVPS  [mh_digests_p + 64*4], E
+	VMOVPS  [mh_digests_p + 64*5], F
+	VMOVPS  [mh_digests_p + 64*6], G
+	VMOVPS  [mh_digests_p + 64*7], H
+
+	mov	rsp, RSP_SAVE			; restore rsp
+
+.return:
+	FUNC_RESTORE
+	ret
+
+endproc_frame
+
+section .data
+align 64
+TABLE:
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+
+
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+			 dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+			 dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+			 dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_mh_sha256_block_avx512
+no_mh_sha256_block_avx512:
+%endif
+%endif ; HAVE_AS_KNOWS_AVX512
+
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_base.c
new file mode 100644
index 000000000..8d9a828c6
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_base.c
@@ -0,0 +1,188 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha256_internal.h"
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// Base multi-hash SHA256 Functions
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// store_w is only used for step 0 ~ 15
+#define store_w(s, i, w, ww) (w[i][s] = to_be32(ww[i*HASH_SEGS+s]))
+#define Ws(x, s) w[(x) & 15][s]
+// update_w is used for step > 15
+#define update_w(s, i, w) \
+	Ws(i, s) = Ws(i-16, s) + S0(Ws(i-15, s)) + Ws(i-7, s) + S1(Ws(i-2, s))
+#define update_t2(s, a, b, c) t2[s] = s0(a[s]) + maj(a[s],b[s],c[s])
+#define update_t1(s, h, e, f, g, i, k) \
+	t1[s] = h[s] + s1(e[s]) + ch(e[s],f[s],g[s]) + k + Ws(i, s);
+#define update_d(s) d[s] += t1[s]
+#define update_h(s) h[s] = t1[s] + t2[s]
+
+// s is a iterator
+#define STORE_W(s, i, w, ww) \
+	for(s = 0; s < HASH_SEGS; s++) \
+		store_w(s, i, w, ww);
+#define UPDATE_W(s, i, w) \
+	for(s = 0; s < HASH_SEGS; s++) \
+		update_w(s, i, w);
+#define UPDATE_T2(s, a, b, c) \
+	for(s = 0; s < HASH_SEGS; s++) \
+		update_t2(s, a, b, c);
+#define UPDATE_T1(s, h, e, f, g, i, k) \
+	for(s = 0; s < HASH_SEGS; s++) \
+		update_t1(s, h, e, f, g, i, k);
+#define UPDATE_D(s) \
+	for(s = 0; s < HASH_SEGS; s++) \
+		update_d(s);
+#define UPDATE_H(s) \
+	for(s = 0; s < HASH_SEGS; s++) \
+		update_h(s);
+
+static inline void step(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+			uint32_t * d, uint32_t * e, uint32_t * f,
+			uint32_t * g, uint32_t * h, uint32_t k,
+			uint32_t * t1, uint32_t * t2, uint32_t(*w)[HASH_SEGS], uint32_t * ww)
+{
+	uint8_t s;
+	if (i < 16) {
+		STORE_W(s, i, w, ww);
+	} else {
+		UPDATE_W(s, i, w);
+	}
+	UPDATE_T2(s, a, b, c);
+	UPDATE_T1(s, h, e, f, g, i, k);
+	UPDATE_D(s);
+	UPDATE_H(s);
+}
+
+static inline void init_abcdefgh(uint32_t * xx, uint32_t n,
+				 uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS])
+{
+	uint8_t s;
+	for (s = 0; s < HASH_SEGS; s++)
+		xx[s] = digests[n][s];
+}
+
+static inline void add_abcdefgh(uint32_t * xx, uint32_t n,
+				uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS])
+{
+	uint8_t s;
+	for (s = 0; s < HASH_SEGS; s++)
+		digests[n][s] += xx[s];
+}
+
+/*
+ * API to perform 0-64 steps of the multi-hash algorithm for
+ * a single block of data. The caller is responsible for ensuring
+ * a full block of data input.
+ *
+ * Argument:
+ *   input  - the pointer to the data
+ *   digest - the space to hold the digests for all segments.
+ *
+ * Return:
+ *   N/A
+ */
+void mh_sha256_single(const uint8_t * input, uint32_t(*digests)[HASH_SEGS],
+		      uint8_t * frame_buffer)
+{
+	uint8_t i;
+	uint32_t aa[HASH_SEGS], bb[HASH_SEGS], cc[HASH_SEGS], dd[HASH_SEGS];
+	uint32_t ee[HASH_SEGS], ff[HASH_SEGS], gg[HASH_SEGS], hh[HASH_SEGS];
+	uint32_t t1[HASH_SEGS], t2[HASH_SEGS];
+	uint32_t *ww = (uint32_t *) input;
+	uint32_t(*w)[HASH_SEGS];
+
+	const static uint32_t k[64] = {
+		0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5,
+		0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5,
+		0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3,
+		0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174,
+		0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc,
+		0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da,
+		0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7,
+		0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967,
+		0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13,
+		0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85,
+		0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3,
+		0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070,
+		0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5,
+		0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3,
+		0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208,
+		0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
+	};
+
+	w = (uint32_t(*)[HASH_SEGS]) frame_buffer;
+
+	init_abcdefgh(aa, 0, digests);
+	init_abcdefgh(bb, 1, digests);
+	init_abcdefgh(cc, 2, digests);
+	init_abcdefgh(dd, 3, digests);
+	init_abcdefgh(ee, 4, digests);
+	init_abcdefgh(ff, 5, digests);
+	init_abcdefgh(gg, 6, digests);
+	init_abcdefgh(hh, 7, digests);
+
+	for (i = 0; i < 64; i += 8) {
+		step(i, aa, bb, cc, dd, ee, ff, gg, hh, k[i], t1, t2, w, ww);
+		step(i + 1, hh, aa, bb, cc, dd, ee, ff, gg, k[i + 1], t1, t2, w, ww);
+		step(i + 2, gg, hh, aa, bb, cc, dd, ee, ff, k[i + 2], t1, t2, w, ww);
+		step(i + 3, ff, gg, hh, aa, bb, cc, dd, ee, k[i + 3], t1, t2, w, ww);
+		step(i + 4, ee, ff, gg, hh, aa, bb, cc, dd, k[i + 4], t1, t2, w, ww);
+		step(i + 5, dd, ee, ff, gg, hh, aa, bb, cc, k[i + 5], t1, t2, w, ww);
+		step(i + 6, cc, dd, ee, ff, gg, hh, aa, bb, k[i + 6], t1, t2, w, ww);
+		step(i + 7, bb, cc, dd, ee, ff, gg, hh, aa, k[i + 7], t1, t2, w, ww);
+	}
+
+	add_abcdefgh(aa, 0, digests);
+	add_abcdefgh(bb, 1, digests);
+	add_abcdefgh(cc, 2, digests);
+	add_abcdefgh(dd, 3, digests);
+	add_abcdefgh(ee, 4, digests);
+	add_abcdefgh(ff, 5, digests);
+	add_abcdefgh(gg, 6, digests);
+	add_abcdefgh(hh, 7, digests);
+}
+
+void mh_sha256_block_base(const uint8_t * input_data,
+			  uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			  uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks)
+{
+	uint32_t i;
+
+	for (i = 0; i < num_blocks; i++) {
+		mh_sha256_single(input_data, digests, frame_buffer);
+		input_data += MH_SHA256_BLOCK_SIZE;
+	}
+
+	return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_sse.asm
new file mode 100644
index 000000000..b1d6fd9ea
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_block_sse.asm
@@ -0,0 +1,557 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA256 using SSE
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0  rdi
+ %define arg1  rsi
+ %define arg2  rdx
+ %define arg3  rcx
+
+ %define arg4  r8
+ %define arg5  r9
+
+ %define tmp1  r10
+ %define tmp2  r11
+ %define tmp3  r12		; must be saved and restored
+ %define tmp4  r13		; must be saved and restored
+ %define tmp5  r14		; must be saved and restored
+ %define tmp6  r15		; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+	push	r12
+	push	r13
+	push	r14
+	push	r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+	pop	r15
+	pop	r14
+	pop	r13
+	pop	r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0   rcx
+ %define arg1   rdx
+ %define arg2   r8
+ %define arg3   r9
+
+ %define arg4   r10
+ %define arg5   r11
+ %define tmp1   r12		; must be saved and restored
+ %define tmp2   r13		; must be saved and restored
+ %define tmp3   r14		; must be saved and restored
+ %define tmp4   r15		; must be saved and restored
+ %define tmp5   rdi		; must be saved and restored
+ %define tmp6   rsi		; must be saved and restored
+ %define return rax
+
+ %define stack_size  10*16 + 7*8		; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+	alloc_stack	stack_size
+	save_xmm128	xmm6, 0*16
+	save_xmm128	xmm7, 1*16
+	save_xmm128	xmm8, 2*16
+	save_xmm128	xmm9, 3*16
+	save_xmm128	xmm10, 4*16
+	save_xmm128	xmm11, 5*16
+	save_xmm128	xmm12, 6*16
+	save_xmm128	xmm13, 7*16
+	save_xmm128	xmm14, 8*16
+	save_xmm128	xmm15, 9*16
+	save_reg	r12,  10*16 + 0*8
+	save_reg	r13,  10*16 + 1*8
+	save_reg	r14,  10*16 + 2*8
+	save_reg	r15,  10*16 + 3*8
+	save_reg	rdi,  10*16 + 4*8
+	save_reg	rsi,  10*16 + 5*8
+	end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+	movdqa	xmm6, [rsp + 0*16]
+	movdqa	xmm7, [rsp + 1*16]
+	movdqa	xmm8, [rsp + 2*16]
+	movdqa	xmm9, [rsp + 3*16]
+	movdqa	xmm10, [rsp + 4*16]
+	movdqa	xmm11, [rsp + 5*16]
+	movdqa	xmm12, [rsp + 6*16]
+	movdqa	xmm13, [rsp + 7*16]
+	movdqa	xmm14, [rsp + 8*16]
+	movdqa	xmm15, [rsp + 9*16]
+	mov	r12,  [rsp + 10*16 + 0*8]
+	mov	r13,  [rsp + 10*16 + 1*8]
+	mov	r14,  [rsp + 10*16 + 2*8]
+	mov	r15,  [rsp + 10*16 + 3*8]
+	mov	rdi,  [rsp + 10*16 + 4*8]
+	mov	rsi,  [rsp + 10*16 + 5*8]
+	add	rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops 		arg3
+;variables of mh_sha256
+%define mh_in_p  	arg0
+%define mh_digests_p 	arg1
+%define mh_data_p	arg2
+%define mh_segs  	tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE	tmp2
+%define FRAMESZ 	4*8*16		;BYTES*DWORDS*SEGS
+
+; Common definitions
+%define ROUND	tmp4
+%define TBL	tmp5
+
+%define pref	tmp3
+%macro PREFETCH_X 1
+%define %%mem  %1
+	prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define MOVPS  movups
+
+%define SZ	4
+%define SZ4	4*SZ
+%define ROUNDS 64*SZ4
+
+%define a xmm0
+%define b xmm1
+%define c xmm2
+%define d xmm3
+%define e xmm4
+%define f xmm5
+%define g xmm6
+%define h xmm7
+
+%define a0 xmm8
+%define a1 xmm9
+%define a2 xmm10
+
+%define TT0 xmm14
+%define TT1 xmm13
+%define TT2 xmm12
+%define TT3 xmm11
+%define TT4 xmm10
+%define TT5 xmm9
+
+%define T1  xmm14
+%define TMP xmm15
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro ROTATE_ARGS 0
+%xdefine TMP_ h
+%xdefine h g
+%xdefine g f
+%xdefine f e
+%xdefine e d
+%xdefine d c
+%xdefine c b
+%xdefine b a
+%xdefine a TMP_
+%endm
+
+
+; PRORD reg, imm, tmp
+%macro PRORD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+	movdqa  %%tmp, %%reg
+	psrld   %%reg, %%imm
+	pslld   %%tmp, (32-(%%imm))
+	por     %%reg, %%tmp
+%endmacro
+
+; PRORD dst/src, amt
+%macro PRORD 2
+	PRORD	%1, %2, TMP
+%endmacro
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_00_15_R 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	movdqa	a0, e		; sig1: a0 = e
+	movdqa	a1, e		; sig1: s1 = e
+	PRORD	a0, (11-6)	; sig1: a0 = (e >> 5)
+
+	movdqa	a2, f		; ch: a2 = f
+	pxor	a2, g		; ch: a2 = f^g
+	pand	a2, e		; ch: a2 = (f^g)&e
+	pxor	a2, g		; a2 = ch
+
+	PRORD	a1, 25		; sig1: a1 = (e >> 25)
+	movdqa	%%T1,[SZ4*(%%i&0xf) + %%data]
+	paddd	%%T1,[TBL + ROUND]	; T1 = W + K
+	pxor	a0, e		; sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		; sig1: a0 = (e >> 6) ^ (e >> 11)
+	paddd	h, a2		; h = h + ch
+	movdqa	a2, a		; sig0: a2 = a
+	PRORD	a2, (13-2)	; sig0: a2 = (a >> 11)
+	paddd	h, %%T1		; h = h + ch + W + K
+	pxor	a0, a1		; a0 = sigma1
+	movdqa	a1, a		; sig0: a1 = a
+	movdqa	%%T1, a		; maj: T1 = a
+	PRORD	a1, 22		; sig0: a1 = (a >> 22)
+	pxor	%%T1, c		; maj: T1 = a^c
+	add	ROUND, SZ4	; ROUND++
+	pand	%%T1, b		; maj: T1 = (a^c)&b
+	paddd	h, a0
+
+	paddd	d, h
+
+	pxor	a2, a		; sig0: a2 = a ^ (a >> 11)
+	PRORD	a2, 2		; sig0: a2 = (a >> 2) ^ (a >> 13)
+	pxor	a2, a1		; a2 = sig0
+	movdqa	a1, a		; maj: a1 = a
+	pand	a1, c		; maj: a1 = a&c
+	por	a1, %%T1	; a1 = maj
+	paddd	h, a1		; h = h + ch + W + K + maj
+	paddd	h, a2		; h = h + ch + W + K + maj + sigma0
+
+	ROTATE_ARGS
+%endm
+
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_00_15_W 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	movdqa	a0, e		; sig1: a0 = e
+	movdqa	a1, e		; sig1: s1 = e
+	PRORD	a0, (11-6)	; sig1: a0 = (e >> 5)
+
+	movdqa	a2, f		; ch: a2 = f
+	pxor	a2, g		; ch: a2 = f^g
+	pand	a2, e		; ch: a2 = (f^g)&e
+	pxor	a2, g		; a2 = ch
+
+	PRORD	a1, 25		; sig1: a1 = (e >> 25)
+	movdqa	[SZ4*(%%i&0xf) + %%data], %%T1
+	paddd	%%T1,[TBL + ROUND]	; T1 = W + K
+	pxor	a0, e		; sig1: a0 = e ^ (e >> 5)
+	PRORD	a0, 6		; sig1: a0 = (e >> 6) ^ (e >> 11)
+	paddd	h, a2		; h = h + ch
+	movdqa	a2, a		; sig0: a2 = a
+	PRORD	a2, (13-2)	; sig0: a2 = (a >> 11)
+	paddd	h, %%T1		; h = h + ch + W + K
+	pxor	a0, a1		; a0 = sigma1
+	movdqa	a1, a		; sig0: a1 = a
+	movdqa	%%T1, a		; maj: T1 = a
+	PRORD	a1, 22		; sig0: a1 = (a >> 22)
+	pxor	%%T1, c		; maj: T1 = a^c
+	add	ROUND, SZ4	; ROUND++
+	pand	%%T1, b		; maj: T1 = (a^c)&b
+	paddd	h, a0
+
+	paddd	d, h
+
+	pxor	a2, a		; sig0: a2 = a ^ (a >> 11)
+	PRORD	a2, 2		; sig0: a2 = (a >> 2) ^ (a >> 13)
+	pxor	a2, a1		; a2 = sig0
+	movdqa	a1, a		; maj: a1 = a
+	pand	a1, c		; maj: a1 = a&c
+	por	a1, %%T1	; a1 = maj
+	paddd	h, a1		; h = h + ch + W + K + maj
+	paddd	h, a2		; h = h + ch + W + K + maj + sigma0
+
+	ROTATE_ARGS
+%endm
+;; arguments passed implicitly in preprocessor symbols i, a...h
+%macro ROUND_16_XX 3
+%define %%T1 %1
+%define %%i  %2
+%define %%data %3
+
+	movdqa	%%T1, [SZ4*((%%i-15)&0xf) + %%data]
+	movdqa	a1, [SZ4*((%%i-2)&0xf) + %%data]
+	movdqa	a0, %%T1
+	PRORD	%%T1, 18-7
+	movdqa	a2, a1
+	PRORD	a1, 19-17
+	pxor	%%T1, a0
+	PRORD	%%T1, 7
+	pxor	a1, a2
+	PRORD	a1, 17
+	psrld	a0, 3
+	pxor	%%T1, a0
+	psrld	a2, 10
+	pxor	a1, a2
+	paddd	%%T1, [SZ4*((%%i-16)&0xf) + %%data]
+	paddd	a1, [SZ4*((%%i-7)&0xf) + %%data]
+	paddd	%%T1, a1
+
+	ROUND_00_15_W %%T1, %%i, %%data
+
+%endm
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a  | b  |  c | ...|  p | (16)
+; h0 | h0 | h0 | ...| h0 |    | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 |    | Ba| Bb | Bc |...| Bp |
+; ....
+; h7 | h7 | h7 | ...| h7 |    | Ha| Hb | Hc |...| Hp |
+
+align 32
+
+;void mh_sha256_block_sse(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+;		uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][8])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number  of 1KB blocks
+;
+mk_global mh_sha256_block_sse, function, internal
+func(mh_sha256_block_sse)
+	endbranch
+	FUNC_SAVE
+	; save rsp
+	mov	RSP_SAVE, rsp
+
+	cmp	loops, 0
+	jle	.return
+
+	; leave enough space to store segs_digests
+	sub     rsp, FRAMESZ
+	; align rsp to 16 Bytes needed by sse
+	and	rsp, ~0x0F
+	lea	TBL,[TABLE]
+
+ %assign I 0					; copy segs_digests into stack
+ %rep 8
+	MOVPS  a, [mh_digests_p + I*64 + 16*0]
+	MOVPS  b, [mh_digests_p + I*64 + 16*1]
+	MOVPS  c, [mh_digests_p + I*64 + 16*2]
+	MOVPS  d, [mh_digests_p + I*64 + 16*3]
+
+	movdqa [rsp + I*64 + 16*0], a
+	movdqa [rsp + I*64 + 16*1], b
+	movdqa [rsp + I*64 + 16*2], c
+	movdqa [rsp + I*64 + 16*3], d
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+	;transform to big-endian data and store on aligned_frame
+	movdqa  TMP, [PSHUFFLE_BYTE_FLIP_MASK]
+	;transform input data from DWORD*16_SEGS*8 to DWORD*4_SEGS*8*4
+ %assign I 0
+ %rep 16
+	MOVPS   TT0,[mh_in_p + I*64+0*16]
+	MOVPS   TT1,[mh_in_p + I*64+1*16]
+	MOVPS   TT2,[mh_in_p + I*64+2*16]
+	MOVPS   TT3,[mh_in_p + I*64+3*16]
+
+	pshufb  TT0, TMP
+	movdqa  [mh_data_p +(I)*16 +0*256],TT0
+	pshufb  TT1, TMP
+	movdqa  [mh_data_p +(I)*16 +1*256],TT1
+	pshufb  TT2, TMP
+	movdqa  [mh_data_p +(I)*16 +2*256],TT2
+	pshufb  TT3, TMP
+	movdqa  [mh_data_p +(I)*16 +3*256],TT3
+ %assign I (I+1)
+ %endrep
+
+	mov	mh_segs, 0			;start from the first 4 segments
+	mov	pref, 1024			;avoid prefetch repeadtedly
+ .segs_loop:
+	xor	ROUND, ROUND
+	;; Initialize digests
+	movdqa  a, [rsp + 0*64 + mh_segs]
+	movdqa  b, [rsp + 1*64 + mh_segs]
+	movdqa  c, [rsp + 2*64 + mh_segs]
+	movdqa  d, [rsp + 3*64 + mh_segs]
+	movdqa  e, [rsp + 4*64 + mh_segs]
+	movdqa  f, [rsp + 5*64 + mh_segs]
+	movdqa  g, [rsp + 6*64 + mh_segs]
+	movdqa  h, [rsp + 7*64 + mh_segs]
+
+  %assign i 0
+  %rep 4
+	ROUND_00_15_R	TT0, (i*4+0), mh_data_p
+	ROUND_00_15_R	TT1, (i*4+1), mh_data_p
+	ROUND_00_15_R	TT2, (i*4+2), mh_data_p
+	ROUND_00_15_R	TT3, (i*4+3), mh_data_p
+  %assign i (i+1)
+  %endrep
+	PREFETCH_X [mh_in_p + pref+128*0]
+
+  %assign i 16
+  %rep 48
+	%if i = 48
+		PREFETCH_X [mh_in_p + pref+128*1]
+	%endif
+	ROUND_16_XX	T1, i, mh_data_p
+  %assign i (i+1)
+  %endrep
+
+	;; add old digest
+	paddd	a, [rsp + 0*64 + mh_segs]
+	paddd	b, [rsp + 1*64 + mh_segs]
+	paddd	c, [rsp + 2*64 + mh_segs]
+	paddd	d, [rsp + 3*64 + mh_segs]
+	paddd	e, [rsp + 4*64 + mh_segs]
+	paddd	f, [rsp + 5*64 + mh_segs]
+	paddd	g, [rsp + 6*64 + mh_segs]
+	paddd	h, [rsp + 7*64 + mh_segs]
+
+	; write out digests
+	movdqa  [rsp + 0*64 + mh_segs], a
+	movdqa  [rsp + 1*64 + mh_segs], b
+	movdqa  [rsp + 2*64 + mh_segs], c
+	movdqa  [rsp + 3*64 + mh_segs], d
+	movdqa  [rsp + 4*64 + mh_segs], e
+	movdqa  [rsp + 5*64 + mh_segs], f
+	movdqa  [rsp + 6*64 + mh_segs], g
+	movdqa  [rsp + 7*64 + mh_segs], h
+
+	add	pref,      256
+	add	mh_data_p, 256
+	add 	mh_segs,   16
+	cmp	mh_segs,   64
+	jc 	.segs_loop
+
+	sub	mh_data_p, (1024)
+	add 	mh_in_p,   (1024)
+	sub     loops,     1
+	jne     .block_loop
+
+ %assign I 0					; copy segs_digests back to mh_digests_p
+ %rep 8
+	movdqa a, [rsp + I*64 + 16*0]
+	movdqa b, [rsp + I*64 + 16*1]
+	movdqa c, [rsp + I*64 + 16*2]
+	movdqa d, [rsp + I*64 + 16*3]
+
+	MOVPS  [mh_digests_p + I*64 + 16*0], a
+	MOVPS  [mh_digests_p + I*64 + 16*1], b
+	MOVPS  [mh_digests_p + I*64 + 16*2], c
+	MOVPS  [mh_digests_p + I*64 + 16*3], d
+ %assign I (I+1)
+ %endrep
+	mov	rsp, RSP_SAVE			; restore rsp
+
+.return:
+	FUNC_RESTORE
+	ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+TABLE:
+	dq	0x428a2f98428a2f98, 0x428a2f98428a2f98
+	dq	0x7137449171374491, 0x7137449171374491
+	dq	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf
+	dq	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5
+	dq	0x3956c25b3956c25b, 0x3956c25b3956c25b
+	dq	0x59f111f159f111f1, 0x59f111f159f111f1
+	dq	0x923f82a4923f82a4, 0x923f82a4923f82a4
+	dq	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5
+	dq	0xd807aa98d807aa98, 0xd807aa98d807aa98
+	dq	0x12835b0112835b01, 0x12835b0112835b01
+	dq	0x243185be243185be, 0x243185be243185be
+	dq	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3
+	dq	0x72be5d7472be5d74, 0x72be5d7472be5d74
+	dq	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe
+	dq	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7
+	dq	0xc19bf174c19bf174, 0xc19bf174c19bf174
+	dq	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1
+	dq	0xefbe4786efbe4786, 0xefbe4786efbe4786
+	dq	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6
+	dq	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc
+	dq	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f
+	dq	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa
+	dq	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc
+	dq	0x76f988da76f988da, 0x76f988da76f988da
+	dq	0x983e5152983e5152, 0x983e5152983e5152
+	dq	0xa831c66da831c66d, 0xa831c66da831c66d
+	dq	0xb00327c8b00327c8, 0xb00327c8b00327c8
+	dq	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7
+	dq	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3
+	dq	0xd5a79147d5a79147, 0xd5a79147d5a79147
+	dq	0x06ca635106ca6351, 0x06ca635106ca6351
+	dq	0x1429296714292967, 0x1429296714292967
+	dq	0x27b70a8527b70a85, 0x27b70a8527b70a85
+	dq	0x2e1b21382e1b2138, 0x2e1b21382e1b2138
+	dq	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc
+	dq	0x53380d1353380d13, 0x53380d1353380d13
+	dq	0x650a7354650a7354, 0x650a7354650a7354
+	dq	0x766a0abb766a0abb, 0x766a0abb766a0abb
+	dq	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e
+	dq	0x92722c8592722c85, 0x92722c8592722c85
+	dq	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1
+	dq	0xa81a664ba81a664b, 0xa81a664ba81a664b
+	dq	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70
+	dq	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3
+	dq	0xd192e819d192e819, 0xd192e819d192e819
+	dq	0xd6990624d6990624, 0xd6990624d6990624
+	dq	0xf40e3585f40e3585, 0xf40e3585f40e3585
+	dq	0x106aa070106aa070, 0x106aa070106aa070
+	dq	0x19a4c11619a4c116, 0x19a4c11619a4c116
+	dq	0x1e376c081e376c08, 0x1e376c081e376c08
+	dq	0x2748774c2748774c, 0x2748774c2748774c
+	dq	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5
+	dq	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3
+	dq	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a
+	dq	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f
+	dq	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3
+	dq	0x748f82ee748f82ee, 0x748f82ee748f82ee
+	dq	0x78a5636f78a5636f, 0x78a5636f78a5636f
+	dq	0x84c8781484c87814, 0x84c8781484c87814
+	dq	0x8cc702088cc70208, 0x8cc702088cc70208
+	dq	0x90befffa90befffa, 0x90befffa90befffa
+	dq	0xa4506ceba4506ceb, 0xa4506ceba4506ceb
+	dq	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7
+	dq	0xc67178f2c67178f2, 0xc67178f2c67178f2
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_finalize_base.c
new file mode 100644
index 000000000..6abb20688
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_finalize_base.c
@@ -0,0 +1,121 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/*
+ * mh_sha256_finalize_base.c contains the prototypes of mh_sha256_finalize_XXX
+ * and mh_sha256_tail_XXX. Default definitions are base type which generates
+ * mh_sha256_finalize_base and mh_sha256_tail_base. Other types are generated
+ * through different predefined macros by mh_sha256.c.
+ * mh_sha256_tail is used to calculate the last incomplete block of input
+ * data. mh_sha256_finalize is the mh_sha256_ctx wrapper of mh_sha256_tail.
+ */
+#ifndef MH_SHA256_FINALIZE_FUNCTION
+#include <string.h>
+#include "mh_sha256_internal.h"
+
+#define MH_SHA256_FINALIZE_FUNCTION	mh_sha256_finalize_base
+#define MH_SHA256_TAIL_FUNCTION		mh_sha256_tail_base
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_base
+#define MH_SHA256_FINALIZE_SLVER
+#endif
+
+void MH_SHA256_TAIL_FUNCTION(uint8_t * partial_buffer, uint32_t total_len,
+			     uint32_t(*mh_sha256_segs_digests)[HASH_SEGS],
+			     uint8_t * frame_buffer, uint32_t digests[SHA256_DIGEST_WORDS])
+{
+	uint64_t partial_buffer_len, len_in_bit;
+
+	partial_buffer_len = total_len % MH_SHA256_BLOCK_SIZE;
+
+	// Padding the first block
+	partial_buffer[partial_buffer_len] = 0x80;
+	partial_buffer_len++;
+	memset(partial_buffer + partial_buffer_len, 0,
+	       MH_SHA256_BLOCK_SIZE - partial_buffer_len);
+
+	// Calculate the first block without total_length if padding needs 2 block
+	if (partial_buffer_len > (MH_SHA256_BLOCK_SIZE - 8)) {
+		MH_SHA256_BLOCK_FUNCTION(partial_buffer, mh_sha256_segs_digests, frame_buffer,
+					 1);
+		//Padding the second block
+		memset(partial_buffer, 0, MH_SHA256_BLOCK_SIZE);
+	}
+	//Padding the block
+	len_in_bit = to_be64((uint64_t) total_len * 8);
+	*(uint64_t *) (partial_buffer + MH_SHA256_BLOCK_SIZE - 8) = len_in_bit;
+	MH_SHA256_BLOCK_FUNCTION(partial_buffer, mh_sha256_segs_digests, frame_buffer, 1);
+
+	//Calculate multi-hash SHA256 digests (segment digests as input message)
+	sha256_for_mh_sha256((uint8_t *) mh_sha256_segs_digests, digests,
+			     4 * SHA256_DIGEST_WORDS * HASH_SEGS);
+
+	return;
+}
+
+int MH_SHA256_FINALIZE_FUNCTION(struct mh_sha256_ctx *ctx, void *mh_sha256_digest)
+{
+	uint8_t i;
+	uint8_t *partial_block_buffer;
+	uint64_t total_len;
+	uint32_t(*mh_sha256_segs_digests)[HASH_SEGS];
+	uint8_t *aligned_frame_buffer;
+
+	if (ctx == NULL)
+		return MH_SHA256_CTX_ERROR_NULL;
+
+	total_len = ctx->total_length;
+	partial_block_buffer = ctx->partial_block_buffer;
+
+	/* mh_sha256 tail */
+	aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+	mh_sha256_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha256_interim_digests;
+
+	MH_SHA256_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha256_segs_digests,
+				aligned_frame_buffer, ctx->mh_sha256_digest);
+
+	/* Output the digests of mh_sha256 */
+	if (mh_sha256_digest != NULL) {
+		for (i = 0; i < SHA256_DIGEST_WORDS; i++)
+			((uint32_t *) mh_sha256_digest)[i] = ctx->mh_sha256_digest[i];
+	}
+
+	return MH_SHA256_CTX_ERROR_NONE;
+}
+
+#ifdef MH_SHA256_FINALIZE_SLVER
+struct slver {
+	uint16_t snum;
+	uint8_t ver;
+	uint8_t core;
+};
+
+// Version info
+struct slver mh_sha256_finalize_base_slver_000002bb;
+struct slver mh_sha256_finalize_base_slver = { 0x02bb, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_internal.h
new file mode 100644
index 000000000..8051e3f36
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_internal.h
@@ -0,0 +1,318 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA256_INTERNAL_H_
+#define _MH_SHA256_INTERNAL_H_
+
+/**
+ *  @file mh_sha256_internal.h
+ *  @brief mh_sha256 internal function prototypes and macros
+ *
+ *  Interface for mh_sha256 internal functions
+ *
+ */
+#include <stdint.h>
+#include "mh_sha256.h"
+#include "endian_helper.h"
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+ // 64byte pointer align
+#define ALIGN_64(pointer) ( ((uint64_t)(pointer) + 0x3F)&(~0x3F) )
+
+ /*******************************************************************
+  *mh_sha256 constants and macros
+  ******************************************************************/
+ /* mh_sha256 constants */
+#define MH_SHA256_H0 0x6a09e667UL
+#define MH_SHA256_H1 0xbb67ae85UL
+#define MH_SHA256_H2 0x3c6ef372UL
+#define MH_SHA256_H3 0xa54ff53aUL
+#define MH_SHA256_H4 0x510e527fUL
+#define MH_SHA256_H5 0x9b05688cUL
+#define MH_SHA256_H6 0x1f83d9abUL
+#define MH_SHA256_H7 0x5be0cd19UL
+
+ /* mh_sha256 macros */
+#define ror32(x, r) (((x)>>(r)) ^ ((x)<<(32-(r))))
+
+#define S0(w) (ror32(w,7) ^ ror32(w,18) ^ (w >> 3))
+#define S1(w) (ror32(w,17) ^ ror32(w,19) ^ (w >> 10))
+
+#define s0(a) (ror32(a,2) ^ ror32(a,13) ^ ror32(a,22))
+#define s1(e) (ror32(e,6) ^ ror32(e,11) ^ ror32(e,25))
+#define maj(a,b,c) ((a & b) ^ (a & c) ^ (b & c))
+#define ch(e,f,g) ((e & f) ^ (g & ~e))
+
+ /*******************************************************************
+  * SHA256 API internal function prototypes
+  ******************************************************************/
+
+ /**
+  * @brief Performs complete SHA256 algorithm.
+  *
+  * @param input  Pointer to buffer containing the input message.
+  * @param digest Pointer to digest to update.
+  * @param len	  Length of buffer.
+  * @returns None
+  */
+ void sha256_for_mh_sha256(const uint8_t * input_data, uint32_t * digest, const uint32_t len);
+
+ /**
+  * @brief Calculate sha256 digest of blocks which size is SHA256_BLOCK_SIZE
+  *
+  * @param data   Pointer to data buffer containing the input message.
+  * @param digest Pointer to sha256 digest.
+  * @returns None
+  */
+ void sha256_single_for_mh_sha256(const uint8_t * data, uint32_t digest[]);
+
+ /*******************************************************************
+  * mh_sha256 API internal function prototypes
+  * Multiple versions of Update and Finalize functions are supplied which use
+  * multiple versions of block and tail process subfunctions.
+  ******************************************************************/
+
+ /**
+  * @brief  Tail process for multi-hash sha256.
+  *
+  * Calculate the remainder of input data which is less than MH_SHA256_BLOCK_SIZE.
+  * It will output the final SHA256 digest based on mh_sha256_segs_digests.
+  *
+  * This function determines what instruction sets are enabled and selects the
+  * appropriate version at runtime.
+  *
+  * @param  partial_buffer Pointer to the start addr of remainder
+  * @param  total_len The total length of all sections of input data.
+  * @param  mh_sha256_segs_digests The digests of all 16 segments .
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @returns none
+  *
+  */
+ void mh_sha256_tail(uint8_t *partial_buffer, uint32_t total_len,
+			 uint32_t (*mh_sha256_segs_digests)[HASH_SEGS],
+			 uint8_t *frame_buffer, uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]);
+
+ /**
+  * @brief  Tail process for multi-hash sha256.
+  *
+  * Calculate the remainder of input data which is less than MH_SHA256_BLOCK_SIZE.
+  * It will output the final SHA256 digest based on mh_sha256_segs_digests.
+  *
+  * @param  partial_buffer Pointer to the start addr of remainder
+  * @param  total_len The total length of all sections of input data.
+  * @param  mh_sha256_segs_digests The digests of all 16 segments .
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  mh_sha256_digest mh_sha256 digest
+  * @returns none
+  *
+  */
+ void mh_sha256_tail_base(uint8_t *partial_buffer, uint32_t total_len,
+			 uint32_t (*mh_sha256_segs_digests)[HASH_SEGS],
+			 uint8_t *frame_buffer, uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]);
+
+ /**
+  * @brief  Tail process for multi-hash sha256.
+  *
+  * Calculate the remainder of input data which is less than MH_SHA256_BLOCK_SIZE.
+  * It will output the final SHA256 digest based on mh_sha256_segs_digests.
+  *
+  * @requires SSE
+  *
+  * @param  partial_buffer Pointer to the start addr of remainder
+  * @param  total_len The total length of all sections of input data.
+  * @param  mh_sha256_segs_digests The digests of all 16 segments .
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  mh_sha256_digest mh_sha256 digest
+  * @returns none
+  *
+  */
+ void mh_sha256_tail_sse(uint8_t *partial_buffer, uint32_t total_len,
+			 uint32_t (*mh_sha256_segs_digests)[HASH_SEGS],
+			 uint8_t *frame_buffer, uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]);
+
+ /**
+  * @brief  Tail process for multi-hash sha256.
+  *
+  * Calculate the remainder of input data which is less than MH_SHA256_BLOCK_SIZE.
+  * It will output the final SHA256 digest based on mh_sha256_segs_digests.
+  *
+  * @requires AVX
+  *
+  * @param  partial_buffer Pointer to the start addr of remainder
+  * @param  total_len The total length of all sections of input data.
+  * @param  mh_sha256_segs_digests The digests of all 16 segments .
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  mh_sha256_digest mh_sha256 digest
+  * @returns none
+  *
+  */
+ void mh_sha256_tail_avx(uint8_t *partial_buffer, uint32_t total_len,
+			 uint32_t (*mh_sha256_segs_digests)[HASH_SEGS],
+			 uint8_t *frame_buffer, uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]);
+
+ /**
+  * @brief  Tail process for multi-hash sha256.
+  *
+  * Calculate the remainder of input data which is less than MH_SHA256_BLOCK_SIZE.
+  * It will output the final SHA256 digest based on mh_sha256_segs_digests.
+  *
+  * @requires AVX2
+  *
+  * @param  partial_buffer Pointer to the start addr of remainder
+  * @param  total_len The total length of all sections of input data.
+  * @param  mh_sha256_segs_digests The digests of all 16 segments .
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  mh_sha256_digest mh_sha256 digest
+  * @returns none
+  *
+  */
+ void mh_sha256_tail_avx2(uint8_t *partial_buffer, uint32_t total_len,
+			 uint32_t (*mh_sha256_segs_digests)[HASH_SEGS],
+			 uint8_t *frame_buffer, uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]);
+
+ /**
+  * @brief  Tail process for multi-hash sha256.
+  *
+  * Calculate the remainder of input data which is less than MH_SHA256_BLOCK_SIZE.
+  * It will output the final SHA256 digest based on mh_sha256_segs_digests.
+  *
+  * @requires AVX512
+  *
+  * @param  partial_buffer Pointer to the start addr of remainder
+  * @param  total_len The total length of all sections of input data.
+  * @param  mh_sha256_segs_digests The digests of all 16 segments .
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  mh_sha256_digest mh_sha256 digest
+  * @returns none
+  *
+  */
+ void mh_sha256_tail_avx512(uint8_t *partial_buffer, uint32_t total_len,
+			 uint32_t (*mh_sha256_segs_digests)[HASH_SEGS],
+			 uint8_t *frame_buffer, uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]);
+
+ /**
+  * @brief  Calculate mh_sha256 digest of blocks which size is MH_SHA256_BLOCK_SIZE*N.
+  *
+  * This function determines what instruction sets are enabled and selects the
+  * appropriate version at runtime.
+  *
+  * @param  input_data Pointer to input data to be processed
+  * @param  digests 16 segments digests
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  num_blocks The number of blocks.
+  * @returns none
+  *
+  */
+ void mh_sha256_block(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+  * @brief  Calculate mh_sha256 digest of blocks which size is MH_SHA256_BLOCK_SIZE*N.
+  *
+  * @param  input_data Pointer to input data to be processed
+  * @param  digests 16 segments digests
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  num_blocks The number of blocks.
+  * @returns none
+  *
+  */
+ void mh_sha256_block_base(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+  * @brief  Calculate mh_sha256 digest of blocks which size is MH_SHA256_BLOCK_SIZE*N.
+  *
+  * @requires SSE
+  * @param  input_data Pointer to input data to be processed
+  * @param  digests 16 segments digests
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  num_blocks The number of blocks.
+  * @returns none
+  *
+  */
+ void mh_sha256_block_sse(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+  * @brief  Calculate mh_sha256 digest of blocks which size is MH_SHA256_BLOCK_SIZE*N.
+  *
+  * @requires AVX
+  *
+  * @param  input_data Pointer to input data to be processed
+  * @param  digests 16 segments digests
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  num_blocks The number of blocks.
+  * @returns none
+  *
+  */
+ void mh_sha256_block_avx(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+  * @brief  Calculate mh_sha256 digest of blocks which size is MH_SHA256_BLOCK_SIZE*N.
+  *
+  * @requires AVX2
+  *
+  * @param  input_data Pointer to input data to be processed
+  * @param  digests 16 segments digests
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  num_blocks The number of blocks.
+  * @returns none
+  *
+  */
+ void mh_sha256_block_avx2(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+  * @brief  Calculate mh_sha256 digest of blocks which size is MH_SHA256_BLOCK_SIZE*N.
+  *
+  * @requires AVX512
+  *
+  * @param  input_data Pointer to input data to be processed
+  * @param  digests 16 segments digests
+  * @param  frame_buffer Pointer to buffer which is a temp working area
+  * @param  num_blocks The number of blocks.
+  * @returns none
+  *
+  */
+ void mh_sha256_block_avx512(const uint8_t * input_data, uint32_t digests[SHA256_DIGEST_WORDS][HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_multibinary.asm
new file mode 100644
index 000000000..e14fc7eb1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_multibinary.asm
@@ -0,0 +1,77 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+;
+;  Redistribution and use in source and binary forms, with or without
+;  modification, are permitted provided that the following conditions
+;  are met:
+;    * Redistributions of source code must retain the above copyright
+;      notice, this list of conditions and the following disclaimer.
+;    * Redistributions in binary form must reproduce the above copyright
+;      notice, this list of conditions and the following disclaimer in
+;      the documentation and/or other materials provided with the
+;      distribution.
+;    * Neither the name of Intel Corporation nor the names of its
+;      contributors may be used to endorse or promote products derived
+;      from this software without specific prior written permission.
+;
+;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+;  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+;  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+;  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+;  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+;  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+;  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+;  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+;  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ [bits 32]
+%else
+ default rel
+ [bits 64]
+
+ extern mh_sha256_update_sse
+ extern mh_sha256_update_avx
+ extern mh_sha256_update_avx2
+ extern mh_sha256_finalize_sse
+ extern mh_sha256_finalize_avx
+ extern mh_sha256_finalize_avx2
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+  extern mh_sha256_update_avx512
+  extern mh_sha256_finalize_avx512
+ %endif
+
+%endif
+
+extern mh_sha256_update_base
+extern mh_sha256_finalize_base
+
+mbin_interface mh_sha256_update
+mbin_interface mh_sha256_finalize
+
+%ifidn __OUTPUT_FORMAT__, elf64
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+  mbin_dispatch_init6 mh_sha256_update, mh_sha256_update_base, mh_sha256_update_sse, mh_sha256_update_avx, mh_sha256_update_avx2, mh_sha256_update_avx512
+  mbin_dispatch_init6 mh_sha256_finalize, mh_sha256_finalize_base, mh_sha256_finalize_sse, mh_sha256_finalize_avx, mh_sha256_finalize_avx2, mh_sha256_finalize_avx512
+ %else
+  mbin_dispatch_init5 mh_sha256_update, mh_sha256_update_base, mh_sha256_update_sse, mh_sha256_update_avx, mh_sha256_update_avx2
+  mbin_dispatch_init5 mh_sha256_finalize, mh_sha256_finalize_base, mh_sha256_finalize_sse, mh_sha256_finalize_avx, mh_sha256_finalize_avx2
+ %endif
+
+%else
+ mbin_dispatch_init2 mh_sha256_update, mh_sha256_update_base
+ mbin_dispatch_init2 mh_sha256_finalize, mh_sha256_finalize_base
+%endif
+
+;;;       func                 				core, ver, snum
+slversion mh_sha256_update,				00, 00, 02b2
+slversion mh_sha256_finalize,				00, 00, 02b3
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_perf.c
new file mode 100644
index 000000000..8095e4f05
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_perf.c
@@ -0,0 +1,180 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha256.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Loop many times over same
+# define TEST_LEN     16*1024
+# define TEST_LOOPS   20000
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test.  Pull from large mem base.
+# define TEST_LEN     16*1024*1024
+# define TEST_LOOPS   100
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+#define TEST_MEM   TEST_LEN
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type)		func##type
+#define FUNC_TOKEN(func, type)		_FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA256_FUNC_TYPE
+#define	MH_SHA256_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION		FUNC_TOKEN(mh_sha256_update, MH_SHA256_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION		FUNC_TOKEN(mh_sha256_finalize, MH_SHA256_FUNC_TYPE)
+
+#define CHECK_RETURN(state)		do{ \
+					  if((state) != MH_SHA256_CTX_ERROR_NONE){ \
+					    printf("The mh_sha256 function is failed.\n"); \
+					    return 1; \
+					  } \
+					}while(0)
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+	long i;
+	for (i = 0; i < buffer_size; i++)
+		buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+	int i;
+	for (i = 0; i < len;) {
+		printf(" %2x", 0xff & buf[i++]);
+		if (i % 32 == 0)
+			printf("\n");
+	}
+	if (i % 32 != 0)
+		printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA256_DIGEST_WORDS],
+		    uint32_t hash_test[SHA256_DIGEST_WORDS])
+{
+	int i;
+	int mh_sha256_fail = 0;
+
+	for (i = 0; i < SHA256_DIGEST_WORDS; i++) {
+		if (hash_test[i] != hash_base[i])
+			mh_sha256_fail++;
+	}
+
+	if (mh_sha256_fail) {
+		printf("mh_sha256 fail test\n");
+		printf("base: ");
+		dump((char *)hash_base, 32);
+		printf("ref: ");
+		dump((char *)hash_test, 32);
+	}
+
+	return mh_sha256_fail;
+}
+
+int main(int argc, char *argv[])
+{
+	int i, fail = 0;
+	uint32_t hash_test[SHA256_DIGEST_WORDS], hash_base[SHA256_DIGEST_WORDS];
+	uint8_t *buff = NULL;
+	struct mh_sha256_ctx *update_ctx_test = NULL, *update_ctx_base = NULL;
+	struct perf start, stop;
+
+	printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n");
+
+	buff = malloc(TEST_LEN);
+	update_ctx_test = malloc(sizeof(*update_ctx_test));
+	update_ctx_base = malloc(sizeof(*update_ctx_base));
+
+	if (buff == NULL || update_ctx_base == NULL || update_ctx_test == NULL) {
+		printf("malloc failed test aborted\n");
+		return -1;
+	}
+	// Rand test1
+	rand_buffer(buff, TEST_LEN);
+
+	// mh_sha256 base version
+	mh_sha256_init(update_ctx_base);
+	mh_sha256_update_base(update_ctx_base, buff, TEST_LEN);
+	mh_sha256_finalize_base(update_ctx_base, hash_base);
+
+	perf_start(&start);
+	for (i = 0; i < TEST_LOOPS / 10; i++) {
+		mh_sha256_init(update_ctx_base);
+		mh_sha256_update_base(update_ctx_base, buff, TEST_LEN);
+		mh_sha256_finalize_base(update_ctx_base, hash_base);
+	}
+	perf_stop(&stop);
+	printf("mh_sha256_update_base" TEST_TYPE_STR ": ");
+	perf_print(stop, start, (long long)TEST_MEM * i);
+
+	//Update feature test
+	CHECK_RETURN(mh_sha256_init(update_ctx_test));
+	CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN));
+	CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test));
+
+	perf_start(&start);
+	for (i = 0; i < TEST_LOOPS; i++) {
+		CHECK_RETURN(mh_sha256_init(update_ctx_test));
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN));
+		CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test));
+	}
+	perf_stop(&stop);
+	printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": ");
+	perf_print(stop, start, (long long)TEST_MEM * i);
+
+	// Check results
+	fail = compare_digests(hash_base, hash_test);
+
+	if (fail) {
+		printf("Fail size=%d\n", TEST_LEN);
+		return -1;
+	}
+
+	if (fail)
+		printf("Test failed function test%d\n", fail);
+	else
+		printf("Pass func check\n");
+
+	return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_ref.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_ref.c
new file mode 100644
index 000000000..2aaefecb0
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_ref.c
@@ -0,0 +1,410 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha256_internal.h"
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+ //  Macros and sub-functions which already exist in source code file
+ //  (sha256_for_mh_sha256.c) is part of ISA-L library as internal functions.
+ //  The reason why writing them twice is the linking issue caused by
+ //  mh_sha256_ref(). mh_sha256_ref() needs these macros and sub-functions
+ //  without linking ISA-L library. So mh_sha256_ref() includes them in
+ //  order to contain essential sub-functions in its own object file.
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+#define W(x) w[(x) & 15]
+
+#define step(i,a,b,c,d,e,f,g,h,k) \
+	if (i<16) W(i) = to_be32(ww[i]); \
+	else \
+	W(i) = W(i-16) + S0(W(i-15)) + W(i-7) + S1(W(i-2)); \
+	t2 = s0(a) + maj(a,b,c); \
+	t1 = h + s1(e) + ch(e,f,g) + k + W(i); \
+	d += t1; \
+	h = t1 + t2;
+
+void sha256_single_for_mh_sha256_ref(const uint8_t * data, uint32_t digest[])
+{
+	uint32_t a, b, c, d, e, f, g, h, t1, t2;
+	uint32_t w[16];
+	uint32_t *ww = (uint32_t *) data;
+
+	a = digest[0];
+	b = digest[1];
+	c = digest[2];
+	d = digest[3];
+	e = digest[4];
+	f = digest[5];
+	g = digest[6];
+	h = digest[7];
+
+	step(0, a, b, c, d, e, f, g, h, 0x428a2f98);
+	step(1, h, a, b, c, d, e, f, g, 0x71374491);
+	step(2, g, h, a, b, c, d, e, f, 0xb5c0fbcf);
+	step(3, f, g, h, a, b, c, d, e, 0xe9b5dba5);
+	step(4, e, f, g, h, a, b, c, d, 0x3956c25b);
+	step(5, d, e, f, g, h, a, b, c, 0x59f111f1);
+	step(6, c, d, e, f, g, h, a, b, 0x923f82a4);
+	step(7, b, c, d, e, f, g, h, a, 0xab1c5ed5);
+	step(8, a, b, c, d, e, f, g, h, 0xd807aa98);
+	step(9, h, a, b, c, d, e, f, g, 0x12835b01);
+	step(10, g, h, a, b, c, d, e, f, 0x243185be);
+	step(11, f, g, h, a, b, c, d, e, 0x550c7dc3);
+	step(12, e, f, g, h, a, b, c, d, 0x72be5d74);
+	step(13, d, e, f, g, h, a, b, c, 0x80deb1fe);
+	step(14, c, d, e, f, g, h, a, b, 0x9bdc06a7);
+	step(15, b, c, d, e, f, g, h, a, 0xc19bf174);
+	step(16, a, b, c, d, e, f, g, h, 0xe49b69c1);
+	step(17, h, a, b, c, d, e, f, g, 0xefbe4786);
+	step(18, g, h, a, b, c, d, e, f, 0x0fc19dc6);
+	step(19, f, g, h, a, b, c, d, e, 0x240ca1cc);
+	step(20, e, f, g, h, a, b, c, d, 0x2de92c6f);
+	step(21, d, e, f, g, h, a, b, c, 0x4a7484aa);
+	step(22, c, d, e, f, g, h, a, b, 0x5cb0a9dc);
+	step(23, b, c, d, e, f, g, h, a, 0x76f988da);
+	step(24, a, b, c, d, e, f, g, h, 0x983e5152);
+	step(25, h, a, b, c, d, e, f, g, 0xa831c66d);
+	step(26, g, h, a, b, c, d, e, f, 0xb00327c8);
+	step(27, f, g, h, a, b, c, d, e, 0xbf597fc7);
+	step(28, e, f, g, h, a, b, c, d, 0xc6e00bf3);
+	step(29, d, e, f, g, h, a, b, c, 0xd5a79147);
+	step(30, c, d, e, f, g, h, a, b, 0x06ca6351);
+	step(31, b, c, d, e, f, g, h, a, 0x14292967);
+	step(32, a, b, c, d, e, f, g, h, 0x27b70a85);
+	step(33, h, a, b, c, d, e, f, g, 0x2e1b2138);
+	step(34, g, h, a, b, c, d, e, f, 0x4d2c6dfc);
+	step(35, f, g, h, a, b, c, d, e, 0x53380d13);
+	step(36, e, f, g, h, a, b, c, d, 0x650a7354);
+	step(37, d, e, f, g, h, a, b, c, 0x766a0abb);
+	step(38, c, d, e, f, g, h, a, b, 0x81c2c92e);
+	step(39, b, c, d, e, f, g, h, a, 0x92722c85);
+	step(40, a, b, c, d, e, f, g, h, 0xa2bfe8a1);
+	step(41, h, a, b, c, d, e, f, g, 0xa81a664b);
+	step(42, g, h, a, b, c, d, e, f, 0xc24b8b70);
+	step(43, f, g, h, a, b, c, d, e, 0xc76c51a3);
+	step(44, e, f, g, h, a, b, c, d, 0xd192e819);
+	step(45, d, e, f, g, h, a, b, c, 0xd6990624);
+	step(46, c, d, e, f, g, h, a, b, 0xf40e3585);
+	step(47, b, c, d, e, f, g, h, a, 0x106aa070);
+	step(48, a, b, c, d, e, f, g, h, 0x19a4c116);
+	step(49, h, a, b, c, d, e, f, g, 0x1e376c08);
+	step(50, g, h, a, b, c, d, e, f, 0x2748774c);
+	step(51, f, g, h, a, b, c, d, e, 0x34b0bcb5);
+	step(52, e, f, g, h, a, b, c, d, 0x391c0cb3);
+	step(53, d, e, f, g, h, a, b, c, 0x4ed8aa4a);
+	step(54, c, d, e, f, g, h, a, b, 0x5b9cca4f);
+	step(55, b, c, d, e, f, g, h, a, 0x682e6ff3);
+	step(56, a, b, c, d, e, f, g, h, 0x748f82ee);
+	step(57, h, a, b, c, d, e, f, g, 0x78a5636f);
+	step(58, g, h, a, b, c, d, e, f, 0x84c87814);
+	step(59, f, g, h, a, b, c, d, e, 0x8cc70208);
+	step(60, e, f, g, h, a, b, c, d, 0x90befffa);
+	step(61, d, e, f, g, h, a, b, c, 0xa4506ceb);
+	step(62, c, d, e, f, g, h, a, b, 0xbef9a3f7);
+	step(63, b, c, d, e, f, g, h, a, 0xc67178f2);
+
+	digest[0] += a;
+	digest[1] += b;
+	digest[2] += c;
+	digest[3] += d;
+	digest[4] += e;
+	digest[5] += f;
+	digest[6] += g;
+	digest[7] += h;
+}
+
+void sha256_for_mh_sha256_ref(const uint8_t * input_data, uint32_t * digest,
+			      const uint32_t len)
+{
+	uint32_t i, j;
+	uint8_t buf[2 * SHA256_BLOCK_SIZE];
+
+	digest[0] = MH_SHA256_H0;
+	digest[1] = MH_SHA256_H1;
+	digest[2] = MH_SHA256_H2;
+	digest[3] = MH_SHA256_H3;
+	digest[4] = MH_SHA256_H4;
+	digest[5] = MH_SHA256_H5;
+	digest[6] = MH_SHA256_H6;
+	digest[7] = MH_SHA256_H7;
+
+	i = len;
+	while (i >= SHA256_BLOCK_SIZE) {
+		sha256_single_for_mh_sha256_ref(input_data, digest);
+		input_data += SHA256_BLOCK_SIZE;
+		i -= SHA256_BLOCK_SIZE;
+	}
+
+	memcpy(buf, input_data, i);
+	buf[i++] = 0x80;
+	for (j = i; j < ((2 * SHA256_BLOCK_SIZE) - 8); j++)
+		buf[j] = 0;
+
+	if (i > SHA256_BLOCK_SIZE - 8)
+		i = 2 * SHA256_BLOCK_SIZE;
+	else
+		i = SHA256_BLOCK_SIZE;
+
+	*(uint64_t *) (buf + i - 8) = to_be64((uint64_t) len * 8);
+
+	sha256_single_for_mh_sha256_ref(buf, digest);
+	if (i == (2 * SHA256_BLOCK_SIZE))
+		sha256_single_for_mh_sha256_ref(buf + SHA256_BLOCK_SIZE, digest);
+}
+
+/*
+ * buffer to rearrange one segment data from one block.
+ *
+ * Layout of new_data:
+ *  segment
+ *  -------------------------
+ *   w0  |  w1  | ... |  w15
+ *
+ */
+static inline void transform_input_single(uint32_t * new_data, uint32_t * input,
+					  uint32_t segment)
+{
+	new_data[16 * segment + 0] = input[16 * 0 + segment];
+	new_data[16 * segment + 1] = input[16 * 1 + segment];
+	new_data[16 * segment + 2] = input[16 * 2 + segment];
+	new_data[16 * segment + 3] = input[16 * 3 + segment];
+	new_data[16 * segment + 4] = input[16 * 4 + segment];
+	new_data[16 * segment + 5] = input[16 * 5 + segment];
+	new_data[16 * segment + 6] = input[16 * 6 + segment];
+	new_data[16 * segment + 7] = input[16 * 7 + segment];
+	new_data[16 * segment + 8] = input[16 * 8 + segment];
+	new_data[16 * segment + 9] = input[16 * 9 + segment];
+	new_data[16 * segment + 10] = input[16 * 10 + segment];
+	new_data[16 * segment + 11] = input[16 * 11 + segment];
+	new_data[16 * segment + 12] = input[16 * 12 + segment];
+	new_data[16 * segment + 13] = input[16 * 13 + segment];
+	new_data[16 * segment + 14] = input[16 * 14 + segment];
+	new_data[16 * segment + 15] = input[16 * 15 + segment];
+}
+
+// Adapt parameters to sha256_single_for_mh_sha256_ref
+#define sha256_update_one_seg(data, digest) \
+	sha256_single_for_mh_sha256_ref((const uint8_t *)(data), (uint32_t *)(digest))
+
+/*
+ * buffer to Rearrange all segments data from one block.
+ *
+ * Layout of new_data:
+ *  segment
+ *  -------------------------
+ *   seg0:   | w0  |  w1  | ... |  w15
+ *   seg1:   | w0  |  w1  | ... |  w15
+ *   seg2:   | w0  |  w1  | ... |  w15
+ *   ....
+ *   seg15: | w0  |  w1  | ... |  w15
+ *
+ */
+static inline void transform_input(uint32_t * new_data, uint32_t * input, uint32_t block)
+{
+	uint32_t *current_input = input + block * MH_SHA256_BLOCK_SIZE / 4;
+
+	transform_input_single(new_data, current_input, 0);
+	transform_input_single(new_data, current_input, 1);
+	transform_input_single(new_data, current_input, 2);
+	transform_input_single(new_data, current_input, 3);
+	transform_input_single(new_data, current_input, 4);
+	transform_input_single(new_data, current_input, 5);
+	transform_input_single(new_data, current_input, 6);
+	transform_input_single(new_data, current_input, 7);
+	transform_input_single(new_data, current_input, 8);
+	transform_input_single(new_data, current_input, 9);
+	transform_input_single(new_data, current_input, 10);
+	transform_input_single(new_data, current_input, 11);
+	transform_input_single(new_data, current_input, 12);
+	transform_input_single(new_data, current_input, 13);
+	transform_input_single(new_data, current_input, 14);
+	transform_input_single(new_data, current_input, 15);
+
+}
+
+/*
+ * buffer to Calculate all segments' digests from one block.
+ *
+ * Layout of seg_digest:
+ *  segment
+ *  -------------------------
+ *   seg0:   | H0  |  H1  | ... |  H7
+ *   seg1:   | H0  |  H1  | ... |  H7
+ *   seg2:   | H0  |  H1  | ... |  H7
+ *   ....
+ *   seg15: | H0  |  H1  | ... |  H7
+ *
+ */
+static inline void sha256_update_all_segs(uint32_t * new_data, uint32_t(*mh_sha256_seg_digests)
+					  [SHA256_DIGEST_WORDS])
+{
+	sha256_update_one_seg(&(new_data)[16 * 0], mh_sha256_seg_digests[0]);
+	sha256_update_one_seg(&(new_data)[16 * 1], mh_sha256_seg_digests[1]);
+	sha256_update_one_seg(&(new_data)[16 * 2], mh_sha256_seg_digests[2]);
+	sha256_update_one_seg(&(new_data)[16 * 3], mh_sha256_seg_digests[3]);
+	sha256_update_one_seg(&(new_data)[16 * 4], mh_sha256_seg_digests[4]);
+	sha256_update_one_seg(&(new_data)[16 * 5], mh_sha256_seg_digests[5]);
+	sha256_update_one_seg(&(new_data)[16 * 6], mh_sha256_seg_digests[6]);
+	sha256_update_one_seg(&(new_data)[16 * 7], mh_sha256_seg_digests[7]);
+	sha256_update_one_seg(&(new_data)[16 * 8], mh_sha256_seg_digests[8]);
+	sha256_update_one_seg(&(new_data)[16 * 9], mh_sha256_seg_digests[9]);
+	sha256_update_one_seg(&(new_data)[16 * 10], mh_sha256_seg_digests[10]);
+	sha256_update_one_seg(&(new_data)[16 * 11], mh_sha256_seg_digests[11]);
+	sha256_update_one_seg(&(new_data)[16 * 12], mh_sha256_seg_digests[12]);
+	sha256_update_one_seg(&(new_data)[16 * 13], mh_sha256_seg_digests[13]);
+	sha256_update_one_seg(&(new_data)[16 * 14], mh_sha256_seg_digests[14]);
+	sha256_update_one_seg(&(new_data)[16 * 15], mh_sha256_seg_digests[15]);
+}
+
+void mh_sha256_block_ref(const uint8_t * input_data, uint32_t(*digests)[HASH_SEGS],
+			 uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE], uint32_t num_blocks)
+{
+	uint32_t i, j;
+	uint32_t *temp_buffer = (uint32_t *) frame_buffer;
+	uint32_t(*trans_digests)[SHA256_DIGEST_WORDS];
+
+	trans_digests = (uint32_t(*)[SHA256_DIGEST_WORDS]) digests;
+
+	// Re-structure seg_digests from 5*16 to 16*5
+	for (j = 0; j < HASH_SEGS; j++) {
+		for (i = 0; i < SHA256_DIGEST_WORDS; i++) {
+			temp_buffer[j * SHA256_DIGEST_WORDS + i] = digests[i][j];
+		}
+	}
+	memcpy(trans_digests, temp_buffer, 4 * SHA256_DIGEST_WORDS * HASH_SEGS);
+
+	// Calculate digests for all segments, leveraging sha256 API
+	for (i = 0; i < num_blocks; i++) {
+		transform_input(temp_buffer, (uint32_t *) input_data, i);
+		sha256_update_all_segs(temp_buffer, trans_digests);
+	}
+
+	// Re-structure seg_digests from 16*5 to 5*16
+	for (j = 0; j < HASH_SEGS; j++) {
+		for (i = 0; i < SHA256_DIGEST_WORDS; i++) {
+			temp_buffer[i * HASH_SEGS + j] = trans_digests[j][i];
+		}
+	}
+	memcpy(digests, temp_buffer, 4 * SHA256_DIGEST_WORDS * HASH_SEGS);
+
+	return;
+}
+
+void mh_sha256_tail_ref(uint8_t * partial_buffer, uint32_t total_len,
+			uint32_t(*mh_sha256_segs_digests)[HASH_SEGS], uint8_t * frame_buffer,
+			uint32_t digests[SHA256_DIGEST_WORDS])
+{
+	uint64_t partial_buffer_len, len_in_bit;
+
+	partial_buffer_len = total_len % MH_SHA256_BLOCK_SIZE;
+
+	// Padding the first block
+	partial_buffer[partial_buffer_len] = 0x80;
+	partial_buffer_len++;
+	memset(partial_buffer + partial_buffer_len, 0,
+	       MH_SHA256_BLOCK_SIZE - partial_buffer_len);
+
+	// Calculate the first block without total_length if padding needs 2 block
+	if (partial_buffer_len > (MH_SHA256_BLOCK_SIZE - 8)) {
+		mh_sha256_block_ref(partial_buffer, mh_sha256_segs_digests, frame_buffer, 1);
+		//Padding the second block
+		memset(partial_buffer, 0, MH_SHA256_BLOCK_SIZE);
+	}
+	//Padding the block
+	len_in_bit = to_be64((uint64_t) total_len * 8);
+	*(uint64_t *) (partial_buffer + MH_SHA256_BLOCK_SIZE - 8) = len_in_bit;
+	mh_sha256_block_ref(partial_buffer, mh_sha256_segs_digests, frame_buffer, 1);
+
+	//Calculate multi-hash SHA256 digests (segment digests as input message)
+	sha256_for_mh_sha256_ref((uint8_t *) mh_sha256_segs_digests, digests,
+				 4 * SHA256_DIGEST_WORDS * HASH_SEGS);
+
+	return;
+}
+
+void mh_sha256_ref(const void *buffer, uint32_t len, uint32_t * mh_sha256_digest)
+{
+	uint64_t total_len;
+	uint64_t num_blocks;
+	uint32_t mh_sha256_segs_digests[SHA256_DIGEST_WORDS][HASH_SEGS];
+	uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE];
+	uint8_t partial_block_buffer[MH_SHA256_BLOCK_SIZE * 2];
+	uint32_t mh_sha256_hash_dword[SHA256_DIGEST_WORDS];
+	uint32_t i;
+	const uint8_t *input_data = (const uint8_t *)buffer;
+
+	/* Initialize digests of all segments */
+	for (i = 0; i < HASH_SEGS; i++) {
+		mh_sha256_segs_digests[0][i] = MH_SHA256_H0;
+		mh_sha256_segs_digests[1][i] = MH_SHA256_H1;
+		mh_sha256_segs_digests[2][i] = MH_SHA256_H2;
+		mh_sha256_segs_digests[3][i] = MH_SHA256_H3;
+		mh_sha256_segs_digests[4][i] = MH_SHA256_H4;
+		mh_sha256_segs_digests[5][i] = MH_SHA256_H5;
+		mh_sha256_segs_digests[6][i] = MH_SHA256_H6;
+		mh_sha256_segs_digests[7][i] = MH_SHA256_H7;
+	}
+
+	total_len = len;
+
+	// Calculate blocks
+	num_blocks = len / MH_SHA256_BLOCK_SIZE;
+	if (num_blocks > 0) {
+		//do num_blocks process
+		mh_sha256_block_ref(input_data, mh_sha256_segs_digests, frame_buffer,
+				    num_blocks);
+		len -= num_blocks * MH_SHA256_BLOCK_SIZE;
+		input_data += num_blocks * MH_SHA256_BLOCK_SIZE;
+	}
+	// Store the partial block
+	if (len != 0) {
+		memcpy(partial_block_buffer, input_data, len);
+	}
+
+	/* Finalize */
+	mh_sha256_tail_ref(partial_block_buffer, total_len, mh_sha256_segs_digests,
+			   frame_buffer, mh_sha256_hash_dword);
+
+	// Output the digests of mh_sha256
+	if (mh_sha256_digest != NULL) {
+		mh_sha256_digest[0] = mh_sha256_hash_dword[0];
+		mh_sha256_digest[1] = mh_sha256_hash_dword[1];
+		mh_sha256_digest[2] = mh_sha256_hash_dword[2];
+		mh_sha256_digest[3] = mh_sha256_hash_dword[3];
+		mh_sha256_digest[4] = mh_sha256_hash_dword[4];
+		mh_sha256_digest[5] = mh_sha256_hash_dword[5];
+		mh_sha256_digest[6] = mh_sha256_hash_dword[6];
+		mh_sha256_digest[7] = mh_sha256_hash_dword[7];
+	}
+
+	return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_test.c
new file mode 100644
index 000000000..13ab91c16
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_test.c
@@ -0,0 +1,217 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha256.h"
+
+#define TEST_LEN   16*1024
+#define TEST_SIZE   8*1024
+#define TEST_MEM   TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type)		func##type
+#define FUNC_TOKEN(func, type)		_FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA256_FUNC_TYPE
+#define	MH_SHA256_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION		FUNC_TOKEN(mh_sha256_update, MH_SHA256_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION		FUNC_TOKEN(mh_sha256_finalize, MH_SHA256_FUNC_TYPE)
+
+#define CHECK_RETURN(state)		do{ \
+					  if((state) != MH_SHA256_CTX_ERROR_NONE){ \
+					    printf("The mh_sha256 function is failed.\n"); \
+					    return 1; \
+					  } \
+					}while(0)
+
+extern void mh_sha256_ref(const void *buffer, uint32_t len, uint32_t * mh_sha256_digest);
+#define MH_SHA256_REF	mh_sha256_ref
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+	long i;
+	for (i = 0; i < buffer_size; i++)
+		buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+	int i;
+	for (i = 0; i < len;) {
+		printf(" %2x", 0xff & buf[i++]);
+		if (i % 32 == 0)
+			printf("\n");
+	}
+	if (i % 32 != 0)
+		printf("\n");
+}
+
+int compare_digests(uint32_t hash_ref[SHA256_DIGEST_WORDS],
+		    uint32_t hash_test[SHA256_DIGEST_WORDS])
+{
+	int i;
+	int mh_sha256_fail = 0;
+
+	for (i = 0; i < SHA256_DIGEST_WORDS; i++) {
+		if (hash_test[i] != hash_ref[i])
+			mh_sha256_fail++;
+	}
+
+	if (mh_sha256_fail) {
+		printf("mh_sha256 fail test\n");
+		printf("ref: ");
+		dump((char *)hash_ref, 32);
+		printf("test: ");
+		dump((char *)hash_test, 32);
+	}
+
+	return mh_sha256_fail;
+}
+
+int main(int argc, char *argv[])
+{
+	int fail = 0;
+	uint32_t hash_test[SHA256_DIGEST_WORDS], hash_ref[SHA256_DIGEST_WORDS];
+	uint8_t *buff = NULL;
+	int size, offset;
+	struct mh_sha256_ctx *update_ctx = NULL;
+
+	printf(xstr(TEST_UPDATE_FUNCTION) "_test:\n");
+
+	srand(TEST_SEED);
+
+	buff = malloc(TEST_LEN);
+	update_ctx = malloc(sizeof(*update_ctx));
+
+	if (buff == NULL || update_ctx == NULL) {
+		printf("malloc failed test aborted\n");
+		return -1;
+	}
+	// Rand test1
+	rand_buffer(buff, TEST_LEN);
+
+	MH_SHA256_REF(buff, TEST_LEN, hash_ref);
+	CHECK_RETURN(mh_sha256_init(update_ctx));
+	CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+	CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+	fail = compare_digests(hash_ref, hash_test);
+
+	if (fail) {
+		printf("fail rand1 test\n");
+		return -1;
+	} else
+		putchar('.');
+
+	// Test various size messages
+	for (size = TEST_LEN; size >= 0; size--) {
+
+		// Fill with rand data
+		rand_buffer(buff, size);
+
+		MH_SHA256_REF(buff, size, hash_ref);
+		CHECK_RETURN(mh_sha256_init(update_ctx));
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size));
+		CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+		fail = compare_digests(hash_ref, hash_test);
+
+		if (fail) {
+			printf("Fail size=%d\n", size);
+			return -1;
+		}
+
+		if ((size & 0xff) == 0) {
+			putchar('.');
+			fflush(0);
+		}
+	}
+
+	// Test various buffer offsets and sizes
+	printf("offset tests");
+	for (size = TEST_LEN - 256; size > 256; size -= 11) {
+		for (offset = 0; offset < 256; offset++) {
+			MH_SHA256_REF(buff + offset, size, hash_ref);
+
+			CHECK_RETURN(mh_sha256_init(update_ctx));
+			CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+			CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+			fail = compare_digests(hash_ref, hash_test);
+
+			if (fail) {
+				printf("Fail size=%d\n", size);
+				return -1;
+			}
+
+		}
+		if ((size & 0xf) == 0) {
+			putchar('.');
+			fflush(0);
+		}
+	}
+
+	// Run efence tests
+	printf("efence tests");
+	for (size = TEST_SIZE; size > 0; size--) {
+		offset = TEST_LEN - size;
+
+		MH_SHA256_REF(buff + offset, size, hash_ref);
+
+		CHECK_RETURN(mh_sha256_init(update_ctx));
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+		CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+		fail = compare_digests(hash_ref, hash_test);
+
+		if (fail) {
+			printf("Fail size=%d\n", size);
+			return -1;
+		}
+
+		if ((size & 0xf) == 0) {
+			putchar('.');
+			fflush(0);
+		}
+	}
+
+	printf(xstr(TEST_UPDATE_FUNCTION) "_test:");
+	printf(" %s\n", fail == 0 ? "Pass" : "Fail");
+
+	return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_update_base.c
new file mode 100644
index 000000000..024ae2b91
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_update_base.c
@@ -0,0 +1,110 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/*
+ * mh_sha256_update_base.c contains the prototype of mh_sha256_update_XXX.
+ * Default definitions are base type which generates mh_sha256_update_base.
+ * Other types are generated through different predefined macros by mh_sha256.c.
+ */
+#ifndef MH_SHA256_UPDATE_FUNCTION
+#include "mh_sha256_internal.h"
+#include <string.h>
+
+#define MH_SHA256_UPDATE_FUNCTION	mh_sha256_update_base
+#define MH_SHA256_BLOCK_FUNCTION	mh_sha256_block_base
+#define MH_SHA256_UPDATE_SLVER
+#endif
+
+int MH_SHA256_UPDATE_FUNCTION(struct mh_sha256_ctx *ctx, const void *buffer, uint32_t len)
+{
+
+	uint8_t *partial_block_buffer;
+	uint64_t partial_block_len;
+	uint64_t num_blocks;
+	uint32_t(*mh_sha256_segs_digests)[HASH_SEGS];
+	uint8_t *aligned_frame_buffer;
+	const uint8_t *input_data = (const uint8_t *)buffer;
+
+	if (ctx == NULL)
+		return MH_SHA256_CTX_ERROR_NULL;
+
+	if (len == 0)
+		return MH_SHA256_CTX_ERROR_NONE;
+
+	partial_block_len = ctx->total_length % MH_SHA256_BLOCK_SIZE;
+	partial_block_buffer = ctx->partial_block_buffer;
+	aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+	mh_sha256_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha256_interim_digests;
+
+	ctx->total_length += len;
+	// No enough input data for mh_sha256 calculation
+	if (len + partial_block_len < MH_SHA256_BLOCK_SIZE) {
+		memcpy(partial_block_buffer + partial_block_len, input_data, len);
+		return MH_SHA256_CTX_ERROR_NONE;
+	}
+	// mh_sha256 calculation for the previous partial block
+	if (partial_block_len != 0) {
+		memcpy(partial_block_buffer + partial_block_len, input_data,
+		       MH_SHA256_BLOCK_SIZE - partial_block_len);
+		//do one_block process
+		MH_SHA256_BLOCK_FUNCTION(partial_block_buffer, mh_sha256_segs_digests,
+					 aligned_frame_buffer, 1);
+		input_data += MH_SHA256_BLOCK_SIZE - partial_block_len;
+		len -= MH_SHA256_BLOCK_SIZE - partial_block_len;
+		memset(partial_block_buffer, 0, MH_SHA256_BLOCK_SIZE);
+	}
+	// Calculate mh_sha256 for the current blocks
+	num_blocks = len / MH_SHA256_BLOCK_SIZE;
+	if (num_blocks > 0) {
+		//do num_blocks process
+		MH_SHA256_BLOCK_FUNCTION(input_data, mh_sha256_segs_digests,
+					 aligned_frame_buffer, num_blocks);
+		len -= num_blocks * MH_SHA256_BLOCK_SIZE;
+		input_data += num_blocks * MH_SHA256_BLOCK_SIZE;
+	}
+	// Store the partial block
+	if (len != 0) {
+		memcpy(partial_block_buffer, input_data, len);
+	}
+
+	return MH_SHA256_CTX_ERROR_NONE;
+
+}
+
+#ifdef MH_SHA256_UPDATE_SLVER
+struct slver {
+	uint16_t snum;
+	uint8_t ver;
+	uint8_t core;
+};
+
+// Version info
+struct slver mh_sha256_update_base_slver_000002ba;
+struct slver mh_sha256_update_base_slver = { 0x02ba, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_update_test.c
new file mode 100644
index 000000000..f5b28bba7
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/mh_sha256_update_test.c
@@ -0,0 +1,240 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha256.h"
+
+#define TEST_LEN   16*1024
+#define TEST_SIZE   8*1024
+#define TEST_MEM   TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type)		func##type
+#define FUNC_TOKEN(func, type)		_FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA256_FUNC_TYPE
+#define	MH_SHA256_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION		FUNC_TOKEN(mh_sha256_update, MH_SHA256_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION		FUNC_TOKEN(mh_sha256_finalize, MH_SHA256_FUNC_TYPE)
+
+#define CHECK_RETURN(state)		do{ \
+					  if((state) != MH_SHA256_CTX_ERROR_NONE){ \
+					    printf("The mh_sha256 function is failed.\n"); \
+					    return 1; \
+					  } \
+					}while(0)
+
+extern void mh_sha256_ref(const void *buffer, uint32_t len, uint32_t * mh_sha256_digest);
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+	long i;
+	for (i = 0; i < buffer_size; i++)
+		buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+	int i;
+	for (i = 0; i < len;) {
+		printf(" %2x", 0xff & buf[i++]);
+		if (i % 20 == 0)
+			printf("\n");
+	}
+	if (i % 20 != 0)
+		printf("\n");
+}
+
+int compare_digests(uint32_t hash_ref[SHA256_DIGEST_WORDS],
+		    uint32_t hash_test[SHA256_DIGEST_WORDS])
+{
+	int i;
+	int mh_sha256_fail = 0;
+
+	for (i = 0; i < SHA256_DIGEST_WORDS; i++) {
+		if (hash_test[i] != hash_ref[i])
+			mh_sha256_fail++;
+	}
+
+	if (mh_sha256_fail) {
+		printf("mh_sha256 fail test\n");
+		printf("ref: ");
+		dump((char *)hash_ref, 20);
+		printf("test: ");
+		dump((char *)hash_test, 20);
+	}
+
+	return mh_sha256_fail;
+}
+
+int main(int argc, char *argv[])
+{
+	int fail = 0, i;
+	uint32_t hash_test[SHA256_DIGEST_WORDS], hash_ref[SHA256_DIGEST_WORDS];
+	uint8_t *buff = NULL;
+	int update_count;
+	int size1, size2, offset, addr_offset;
+	struct mh_sha256_ctx *update_ctx = NULL;
+	uint8_t *mem_addr = NULL;
+
+	printf(xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+	srand(TEST_SEED);
+
+	buff = malloc(TEST_LEN);
+	update_ctx = malloc(sizeof(*update_ctx));
+
+	if (buff == NULL || update_ctx == NULL) {
+		printf("malloc failed test aborted\n");
+		return -1;
+	}
+	// Rand test1
+	rand_buffer(buff, TEST_LEN);
+
+	mh_sha256_ref(buff, TEST_LEN, hash_ref);
+
+	CHECK_RETURN(mh_sha256_init(update_ctx));
+	CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+	CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+	fail = compare_digests(hash_ref, hash_test);
+
+	if (fail) {
+		printf("fail rand1 test\n");
+		return -1;
+	} else
+		putchar('.');
+
+	// Test various size messages by update twice.
+	printf("\n various size messages by update twice tests");
+	for (size1 = TEST_LEN; size1 >= 0; size1--) {
+
+		// Fill with rand data
+		rand_buffer(buff, TEST_LEN);
+
+		mh_sha256_ref(buff, TEST_LEN, hash_ref);
+
+		// subsequent update
+		size2 = TEST_LEN - size1;	// size2 is different with the former
+		CHECK_RETURN(mh_sha256_init(update_ctx));
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1));
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2));
+		CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+		fail = compare_digests(hash_ref, hash_test);
+
+		if (fail) {
+			printf("Fail size1=%d\n", size1);
+			return -1;
+		}
+
+		if ((size2 & 0xff) == 0) {
+			putchar('.');
+			fflush(0);
+		}
+	}
+
+	// Test various update count
+	printf("\n various update count tests");
+	for (update_count = 1; update_count <= TEST_LEN; update_count++) {
+
+		// Fill with rand data
+		rand_buffer(buff, TEST_LEN);
+
+		mh_sha256_ref(buff, TEST_LEN, hash_ref);
+
+		// subsequent update
+		size1 = TEST_LEN / update_count;
+		size2 = TEST_LEN - size1 * (update_count - 1);	// size2 is different with the former
+
+		CHECK_RETURN(mh_sha256_init(update_ctx));
+		for (i = 1, offset = 0; i < update_count; i++) {
+			CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1));
+			offset += size1;
+		}
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2));
+		CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+		fail = compare_digests(hash_ref, hash_test);
+
+		if (fail) {
+			printf("Fail size1=%d\n", size1);
+			return -1;
+		}
+
+		if ((size2 & 0xff) == 0) {
+			putchar('.');
+			fflush(0);
+		}
+	}
+
+	// test various start address of ctx.
+	printf("\n various start address of ctx test");
+	free(update_ctx);
+	mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10);
+	for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) {
+
+		// Fill with rand data
+		rand_buffer(buff, TEST_LEN);
+
+		mh_sha256_ref(buff, TEST_LEN, hash_ref);
+
+		// a unaligned offset
+		update_ctx = (struct mh_sha256_ctx *)(mem_addr + addr_offset);
+		CHECK_RETURN(mh_sha256_init(update_ctx));
+		CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+		CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+		fail = compare_digests(hash_ref, hash_test);
+
+		if (fail) {
+			printf("Fail addr_offset=%d\n", addr_offset);
+			return -1;
+		}
+
+		if ((addr_offset & 0xf) == 0) {
+			putchar('.');
+			fflush(0);
+		}
+	}
+
+	printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+	return fail;
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha256/sha256_for_mh_sha256.c b/src/crypto/isa-l/isa-l_crypto/mh_sha256/sha256_for_mh_sha256.c
new file mode 100644
index 000000000..ea8c9f436
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha256/sha256_for_mh_sha256.c
@@ -0,0 +1,176 @@
+/**********************************************************************
+  Copyright(c) 2011-2017 Intel Corporation All rights reserved.
+
+  Redistribution and use in source and binary forms, with or without
+  modification, are permitted provided that the following conditions
+  are met:
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in
+      the documentation and/or other materials provided with the
+      distribution.
+    * Neither the name of Intel Corporation nor the names of its
+      contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha256_internal.h"
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// Reference SHA256 Functions for mh_sha256
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+#define W(x) w[(x) & 15]
+
+#define step(i,a,b,c,d,e,f,g,h,k) \
+	if (i<16) W(i) = to_be32(ww[i]); \
+	else \
+	W(i) = W(i-16) + S0(W(i-15)) + W(i-7) + S1(W(i-2)); \
+	t2 = s0(a) + maj(a,b,c); \
+	t1 = h + s1(e) + ch(e,f,g) + k + W(i); \
+	d += t1; \
+	h = t1 + t2;
+
+void sha256_single_for_mh_sha256(const uint8_t * data, uint32_t digest[])
+{
+	uint32_t a, b, c, d, e, f, g, h, t1, t2;
+	uint32_t w[16];
+	uint32_t *ww = (uint32_t *) data;
+
+	a = digest[0];
+	b = digest[1];
+	c = digest[2];
+	d = digest[3];
+	e = digest[4];
+	f = digest[5];
+	g = digest[6];
+	h = digest[7];
+
+	step(0, a, b, c, d, e, f, g, h, 0x428a2f98);
+	step(1, h, a, b, c, d, e, f, g, 0x71374491);
+	step(2, g, h, a, b, c, d, e, f, 0xb5c0fbcf);
+	step(3, f, g, h, a, b, c, d, e, 0xe9b5dba5);
+	step(4, e, f, g, h, a, b, c, d, 0x3956c25b);
+	step(5, d, e, f, g, h, a, b, c, 0x59f111f1);
+	step(6, c, d, e, f, g, h, a, b, 0x923f82a4);
+	step(7, b, c, d, e, f, g, h, a, 0xab1c5ed5);
+	step(8, a, b, c, d, e, f, g, h, 0xd807aa98);
+	step(9, h, a, b, c, d, e, f, g, 0x12835b01);
+	step(10, g, h, a, b, c, d, e, f, 0x243185be);
+	step(11, f, g, h, a, b, c, d, e, 0x550c7dc3);
+	step(12, e, f, g, h, a, b, c, d, 0x72be5d74);
+	step(13, d, e, f, g, h, a, b, c, 0x80deb1fe);
+	step(14, c, d, e, f, g, h, a, b, 0x9bdc06a7);
+	step(15, b, c, d, e, f, g, h, a, 0xc19bf174);
+	step(16, a, b, c, d, e, f, g, h, 0xe49b69c1);
+	step(17, h, a, b, c, d, e, f, g, 0xefbe4786);
+	step(18, g, h, a, b, c, d, e, f, 0x0fc19dc6);
+	step(19, f, g, h, a, b, c, d, e, 0x240ca1cc);
+	step(20, e, f, g, h, a, b, c, d, 0x2de92c6f);
+	step(21, d, e, f, g, h, a, b, c, 0x4a7484aa);
+	step(22, c, d, e, f, g, h, a, b, 0x5cb0a9dc);
+	step(23, b, c, d, e, f, g, h, a, 0x76f988da);
+	step(24, a, b, c, d, e, f, g, h, 0x983e5152);
+	step(25, h, a, b, c, d, e, f, g, 0xa831c66d);
+	step(26, g, h, a, b, c, d, e, f, 0xb00327c8);
+	step(27, f, g, h, a, b, c, d, e, 0xbf597fc7);
+	step(28, e, f, g, h, a, b, c, d, 0xc6e00bf3);
+	step(29, d, e, f, g, h, a, b, c, 0xd5a79147);
+	step(30, c, d, e, f, g, h, a, b, 0x06ca6351);
+	step(31, b, c, d, e, f, g, h, a, 0x14292967);
+	step(32, a, b, c, d, e, f, g, h, 0x27b70a85);
+	step(33, h, a, b, c, d, e, f, g, 0x2e1b2138);
+	step(34, g, h, a, b, c, d, e, f, 0x4d2c6dfc);
+	step(35, f, g, h, a, b, c, d, e, 0x53380d13);
+	step(36, e, f, g, h, a, b, c, d, 0x650a7354);
+	step(37, d, e, f, g, h, a, b, c, 0x766a0abb);
+	step(38, c, d, e, f, g, h, a, b, 0x81c2c92e);
+	step(39, b, c, d, e, f, g, h, a, 0x92722c85);
+	step(40, a, b, c, d, e, f, g, h, 0xa2bfe8a1);
+	step(41, h, a, b, c, d, e, f, g, 0xa81a664b);
+	step(42, g, h, a, b, c, d, e, f, 0xc24b8b70);
+	step(43, f, g, h, a, b, c, d, e, 0xc76c51a3);
+	step(44, e, f, g, h, a, b, c, d, 0xd192e819);
+	step(45, d, e, f, g, h, a, b, c, 0xd6990624);
+	step(46, c, d, e, f, g, h, a, b, 0xf40e3585);
+	step(47, b, c, d, e, f, g, h, a, 0x106aa070);
+	step(48, a, b, c, d, e, f, g, h, 0x19a4c116);
+	step(49, h, a, b, c, d, e, f, g, 0x1e376c08);
+	step(50, g, h, a, b, c, d, e, f, 0x2748774c);
+	step(51, f, g, h, a, b, c, d, e, 0x34b0bcb5);
+	step(52, e, f, g, h, a, b, c, d, 0x391c0cb3);
+	step(53, d, e, f, g, h, a, b, c, 0x4ed8aa4a);
+	step(54, c, d, e, f, g, h, a, b, 0x5b9cca4f);
+	step(55, b, c, d, e, f, g, h, a, 0x682e6ff3);
+	step(56, a, b, c, d, e, f, g, h, 0x748f82ee);
+	step(57, h, a, b, c, d, e, f, g, 0x78a5636f);
+	step(58, g, h, a, b, c, d, e, f, 0x84c87814);
+	step(59, f, g, h, a, b, c, d, e, 0x8cc70208);
+	step(60, e, f, g, h, a, b, c, d, 0x90befffa);
+	step(61, d, e, f, g, h, a, b, c, 0xa4506ceb);
+	step(62, c, d, e, f, g, h, a, b, 0xbef9a3f7);
+	step(63, b, c, d, e, f, g, h, a, 0xc67178f2);
+
+	digest[0] += a;
+	digest[1] += b;
+	digest[2] += c;
+	digest[3] += d;
+	digest[4] += e;
+	digest[5] += f;
+	digest[6] += g;
+	digest[7] += h;
+}
+
+void sha256_for_mh_sha256(const uint8_t * input_data, uint32_t * digest, const uint32_t len)
+{
+	uint32_t i, j;
+	uint8_t buf[2 * SHA256_BLOCK_SIZE];
+
+	digest[0] = MH_SHA256_H0;
+	digest[1] = MH_SHA256_H1;
+	digest[2] = MH_SHA256_H2;
+	digest[3] = MH_SHA256_H3;
+	digest[4] = MH_SHA256_H4;
+	digest[5] = MH_SHA256_H5;
+	digest[6] = MH_SHA256_H6;
+	digest[7] = MH_SHA256_H7;
+
+	i = len;
+	while (i >= SHA256_BLOCK_SIZE) {
+		sha256_single_for_mh_sha256(input_data, digest);
+		input_data += SHA256_BLOCK_SIZE;
+		i -= SHA256_BLOCK_SIZE;
+	}
+
+	memcpy(buf, input_data, i);
+	buf[i++] = 0x80;
+	for (j = i; j < ((2 * SHA256_BLOCK_SIZE) - 8); j++)
+		buf[j] = 0;
+
+	if (i > SHA256_BLOCK_SIZE - 8)
+		i = 2 * SHA256_BLOCK_SIZE;
+	else
+		i = SHA256_BLOCK_SIZE;
+
+	*(uint64_t *) (buf + i - 8) = to_be64((uint64_t) len * 8);
+
+	sha256_single_for_mh_sha256(buf, digest);
+	if (i == (2 * SHA256_BLOCK_SIZE))
+		sha256_single_for_mh_sha256(buf + SHA256_BLOCK_SIZE, digest);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-21 11:54:28 +0000
commit	e6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree	64f88b554b444a49f656b6c656111a145cbbaa28 /src/crypto/isa-l/isa-l_crypto/mh_sha256
parent	Initial commit. (diff)
download	ceph-upstream/18.2.2.tar.xz ceph-upstream/18.2.2.zip