summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/mh_sha1
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am67
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c137
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c71
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm502
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm509
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm403
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c387
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm494
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c122
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h323
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm83
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c180
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c438
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c217
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c110
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c240
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c212
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am71
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c151
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c67
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm702
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm649
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm500
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm698
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c102
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h202
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm82
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c206
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c248
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c107
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c272
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c78
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c138
33 files changed, 8768 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am
new file mode 100644
index 000000000..1ec5f35c5
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am
@@ -0,0 +1,67 @@
+########################################################################
+# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+lsrc_sha1 = mh_sha1/sha1_for_mh_sha1.c
+
+lsrc_mh_sha1 = mh_sha1/mh_sha1.c \
+ mh_sha1/mh_sha1_block_sse.asm \
+ mh_sha1/mh_sha1_block_avx.asm \
+ mh_sha1/mh_sha1_block_avx2.asm \
+ mh_sha1/mh_sha1_multibinary.asm \
+ mh_sha1/mh_sha1_finalize_base.c \
+ mh_sha1/mh_sha1_update_base.c \
+ mh_sha1/mh_sha1_block_base.c
+
+lsrc_mh_sha1 += mh_sha1/mh_sha1_block_avx512.asm \
+ mh_sha1/mh_sha1_avx512.c
+
+lsrc += $(lsrc_sha1) \
+ $(lsrc_mh_sha1)
+
+other_src += mh_sha1/mh_sha1_ref.c \
+ include/reg_sizes.asm \
+ include/multibinary.asm \
+ include/test.h \
+ mh_sha1/mh_sha1_internal.h
+
+extern_hdrs += include/mh_sha1.h
+
+check_tests += mh_sha1/mh_sha1_test
+unit_tests += mh_sha1/mh_sha1_update_test
+
+perf_tests += mh_sha1/mh_sha1_perf
+
+
+mh_sha1_test: mh_sha1_ref.o
+mh_sha1_mh_sha1_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la
+
+mh_sha1_update_test: mh_sha1_ref.o
+mh_sha1_mh_sha1_update_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la
+
+mh_sha1_mh_sha1_perf_LDADD = libisal_crypto.la
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c
new file mode 100644
index 000000000..6cb458fc8
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c
@@ -0,0 +1,137 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+int mh_sha1_init(struct mh_sha1_ctx *ctx)
+{
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint32_t i;
+
+ if (ctx == NULL)
+ return MH_SHA1_CTX_ERROR_NULL;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+ for (i = 0; i < HASH_SEGS; i++) {
+ mh_sha1_segs_digests[0][i] = MH_SHA1_H0;
+ mh_sha1_segs_digests[1][i] = MH_SHA1_H1;
+ mh_sha1_segs_digests[2][i] = MH_SHA1_H2;
+ mh_sha1_segs_digests[3][i] = MH_SHA1_H3;
+ mh_sha1_segs_digests[4][i] = MH_SHA1_H4;
+ }
+
+ return MH_SHA1_CTX_ERROR_NONE;
+}
+
+/***************mh_sha1_update***********/
+// mh_sha1_update_sse.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_sse
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_update_avx.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_update_avx2.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx2
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+
+// mh_sha1_finalize_sse.c and mh_sha1_tail_sse.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_sse
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_finalize_avx.c and mh_sha1_tail_avx.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_finalize_avx2.c and mh_sha1_tail_avx2.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx2
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+// Version info
+struct slver mh_sha1_init_slver_00000271;
+struct slver mh_sha1_init_slver = { 0x0271, 0x00, 0x00 };
+
+// mh_sha1_update version info
+struct slver mh_sha1_update_sse_slver_00000274;
+struct slver mh_sha1_update_sse_slver = { 0x0274, 0x00, 0x00 };
+
+struct slver mh_sha1_update_avx_slver_02000276;
+struct slver mh_sha1_update_avx_slver = { 0x0276, 0x00, 0x02 };
+
+struct slver mh_sha1_update_avx2_slver_04000278;
+struct slver mh_sha1_update_avx2_slver = { 0x0278, 0x00, 0x04 };
+
+// mh_sha1_finalize version info
+struct slver mh_sha1_finalize_sse_slver_00000275;
+struct slver mh_sha1_finalize_sse_slver = { 0x0275, 0x00, 0x00 };
+
+struct slver mh_sha1_finalize_avx_slver_02000277;
+struct slver mh_sha1_finalize_avx_slver = { 0x0277, 0x00, 0x02 };
+
+struct slver mh_sha1_finalize_avx2_slver_04000279;
+struct slver mh_sha1_finalize_avx2_slver = { 0x0279, 0x00, 0x04 };
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c
new file mode 100644
index 000000000..15f0ae1e2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c
@@ -0,0 +1,71 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+#ifdef HAVE_AS_KNOWS_AVX512
+
+/***************mh_sha1_update***********/
+// mh_sha1_update_avx512.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx512
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+// mh_sha1_finalize_avx512.c and mh_sha1_tail_avx512.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx512
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************version info***********/
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// mh_sha1_update version info
+struct slver mh_sha1_update_avx512_slver_0600027c;
+struct slver mh_sha1_update_avx512_slver = { 0x027c, 0x00, 0x06 };
+
+// mh_sha1_finalize version info
+struct slver mh_sha1_finalize_avx512_slver_0600027d;
+struct slver mh_sha1_finalize_avx512_slver = { 0x027d, 0x00, 0x06 };
+
+#endif // HAVE_AS_KNOWS_AVX512
+
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm
new file mode 100644
index 000000000..8b6031997
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm
@@ -0,0 +1,502 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX
+;;
+
+%include "reg_sizes.asm"
+default rel
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-(%%imm))
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; non-destructive
+; PROLD_nd reg, imm, tmp, src
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-(%%imm))
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 16)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+
+;void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+global mh_sha1_block_avx:function internal
+func(mh_sha1_block_avx)
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by avx
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ VMOVPS A, [mh_digests_p + I*64 + 16*0]
+ VMOVPS B, [mh_digests_p + I*64 + 16*1]
+ VMOVPS C, [mh_digests_p + I*64 + 16*2]
+ VMOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ vmovdqa [rsp + I*64 + 16*0], A
+ vmovdqa [rsp + I*64 + 16*1], B
+ vmovdqa [rsp + I*64 + 16*2], C
+ vmovdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*16]
+ VMOVPS T1,[mh_in_p + I*64+1*16]
+ VMOVPS T2,[mh_in_p + I*64+2*16]
+ VMOVPS T3,[mh_in_p + I*64+3*16]
+
+ vpshufb T0, F
+ vmovdqa [mh_data_p +(I)*16 +0*256],T0
+ vpshufb T1, F
+ vmovdqa [mh_data_p +(I)*16 +1*256],T1
+ vpshufb T2, F
+ vmovdqa [mh_data_p +(I)*16 +2*256],T2
+ vpshufb T3, F
+ vmovdqa [mh_data_p +(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A, AA
+ vpaddd B, BB
+ vpaddd C, CC
+ vpaddd D, DD
+ vpaddd E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 256
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ vmovdqa A, [rsp + I*64 + 16*0]
+ vmovdqa B, [rsp + I*64 + 16*1]
+ vmovdqa C, [rsp + I*64 + 16*2]
+ vmovdqa D, [rsp + I*64 + 16*3]
+
+ VMOVPS [mh_digests_p + I*64 + 16*0], A
+ VMOVPS [mh_digests_p + I*64 + 16*1], B
+ VMOVPS [mh_digests_p + I*64 + 16*2], C
+ VMOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm
new file mode 100644
index 000000000..77aeda0d7
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm
@@ -0,0 +1,509 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX-2
+;;
+
+%include "reg_sizes.asm"
+default rel
+
+;; Magic functions defined in FIPS 180-1
+;;
+;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+
+
+;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-%%imm)
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-%%imm)
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 32)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A ymm0
+%define B ymm1
+%define C ymm2
+%define D ymm3
+%define E ymm4
+
+%define F ymm5
+%define T0 ymm6
+%define T1 ymm7
+%define T2 ymm8
+%define T3 ymm9
+%define T4 ymm10
+%define T5 ymm11
+%define T6 ymm12
+%define T7 ymm13
+%define T8 ymm14
+%define T9 ymm15
+
+%define AA ymm5
+%define BB ymm6
+%define CC ymm7
+%define DD ymm8
+%define EE ymm9
+%define TMP ymm10
+%define FUN ymm11
+%define K ymm12
+%define W14 ymm13
+%define W15 ymm14
+%define W16 ymm15
+
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+
+;void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+global mh_sha1_block_avx2:function internal
+func(mh_sha1_block_avx2)
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 32 Bytes needed by avx2
+ and rsp, ~0x1F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 2
+ VMOVPS A, [mh_digests_p + I*32*5 + 32*0]
+ VMOVPS B, [mh_digests_p + I*32*5 + 32*1]
+ VMOVPS C, [mh_digests_p + I*32*5 + 32*2]
+ VMOVPS D, [mh_digests_p + I*32*5 + 32*3]
+ VMOVPS E, [mh_digests_p + I*32*5 + 32*4]
+
+ vmovdqa [rsp + I*32*5 + 32*0], A
+ vmovdqa [rsp + I*32*5 + 32*1], B
+ vmovdqa [rsp + I*32*5 + 32*2], C
+ vmovdqa [rsp + I*32*5 + 32*3], D
+ vmovdqa [rsp + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2
+%assign I 0
+%rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*32]
+ VMOVPS T1,[mh_in_p + I*64+1*32]
+
+ vpshufb T0, T0, F
+ vmovdqa [mh_data_p +I*32+0*512],T0
+ vpshufb T1, T1, F
+ vmovdqa [mh_data_p +I*32+1*512],T1
+%assign I (I+1)
+%endrep
+
+ mov mh_segs, 0 ;start from the first 8 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+%assign I (I+1)
+%endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32]
+ %rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*2]
+ PREFETCH_X [mh_in_p + pref+128*3]
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A,A, AA
+ vpaddd B,B, BB
+ vpaddd C,C, CC
+ vpaddd D,D, DD
+ vpaddd E,E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 512
+
+ add mh_data_p, 512
+ add mh_segs, 32
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 2
+ vmovdqa A, [rsp + I*32*5 + 32*0]
+ vmovdqa B, [rsp + I*32*5 + 32*1]
+ vmovdqa C, [rsp + I*32*5 + 32*2]
+ vmovdqa D, [rsp + I*32*5 + 32*3]
+ vmovdqa E, [rsp + I*32*5 + 32*4]
+
+ VMOVPS [mh_digests_p + I*32*5 + 32*0], A
+ VMOVPS [mh_digests_p + I*32*5 + 32*1], B
+ VMOVPS [mh_digests_p + I*32*5 + 32*2], C
+ VMOVPS [mh_digests_p + I*32*5 + 32*3], D
+ VMOVPS [mh_digests_p + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=32
+
+align 32
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+ dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm
new file mode 100644
index 000000000..3738c6d40
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm
@@ -0,0 +1,403 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX-512
+;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+default rel
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovdqu64
+;SIMD variables definition
+%define A zmm0
+%define B zmm1
+%define C zmm2
+%define D zmm3
+%define E zmm4
+%define HH0 zmm5
+%define HH1 zmm6
+%define HH2 zmm7
+%define HH3 zmm8
+%define HH4 zmm9
+%define KT zmm10
+%define XTMP0 zmm11
+%define XTMP1 zmm12
+%define SHUF_MASK zmm13
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;using extra 16 ZMM registers to place the inverse input data
+%define W0 zmm16
+%define W1 zmm17
+%define W2 zmm18
+%define W3 zmm19
+%define W4 zmm20
+%define W5 zmm21
+%define W6 zmm22
+%define W7 zmm23
+%define W8 zmm24
+%define W9 zmm25
+%define W10 zmm26
+%define W11 zmm27
+%define W12 zmm28
+%define W13 zmm29
+%define W14 zmm30
+%define W15 zmm31
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;macros definition
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro PROCESS_LOOP 2
+%define %%WT %1
+%define %%F_IMMED %2
+
+ ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt
+ ; E=D, D=C, C=ROTL_30(B), B=A, A=T
+
+ ; Ft
+ ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D)
+ ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D
+ ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D)
+
+ vmovdqa32 XTMP1, B ; Copy B
+ vpaddd E, E, %%WT ; E = E + Wt
+ vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D)
+ vpaddd E, E, KT ; E = E + Wt + Kt
+ vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A)
+ vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt
+ vprold B, B, 30 ; B = ROTL_30(B)
+ vpaddd E, E, XTMP0 ; E = T
+
+ ROTATE_ARGS
+%endmacro
+
+%macro MSG_SCHED_ROUND_16_79 4
+%define %%WT %1
+%define %%WTp2 %2
+%define %%WTp8 %3
+%define %%WTp13 %4
+ ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16)
+ ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt)
+ vpternlogd %%WT, %%WTp2, %%WTp8, 0x96
+ vpxord %%WT, %%WT, %%WTp13
+ vprold %%WT, %%WT, 1
+%endmacro
+
+%define APPEND(a,b) a %+ b
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ ; remove unwind info macros
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp + 0*16], xmm6
+ movdqa [rsp + 1*16], xmm7
+ movdqa [rsp + 2*16], xmm8
+ movdqa [rsp + 3*16], xmm9
+ movdqa [rsp + 4*16], xmm10
+ movdqa [rsp + 5*16], xmm11
+ movdqa [rsp + 6*16], xmm12
+ movdqa [rsp + 7*16], xmm13
+ movdqa [rsp + 8*16], xmm14
+ movdqa [rsp + 9*16], xmm15
+ mov [rsp + 10*16 + 0*8], r12
+ mov [rsp + 10*16 + 1*8], r13
+ mov [rsp + 10*16 + 2*8], r14
+ mov [rsp + 10*16 + 3*8], r15
+ mov [rsp + 10*16 + 4*8], rdi
+ mov [rsp + 10*16 + 5*8], rsi
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+[bits 64]
+section .text
+align 32
+
+;void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+global mh_sha1_block_avx512
+func(mh_sha1_block_avx512)
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; align rsp to 64 Bytes needed by avx512
+ and rsp, ~0x3f
+
+ ; copy segs_digests into registers.
+ VMOVPS HH0, [mh_digests_p + 64*0]
+ VMOVPS HH1, [mh_digests_p + 64*1]
+ VMOVPS HH2, [mh_digests_p + 64*2]
+ VMOVPS HH3, [mh_digests_p + 64*3]
+ VMOVPS HH4, [mh_digests_p + 64*4]
+ ;a mask used to transform to big-endian data
+ vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK]
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ ;using extra 16 ZMM registers instead of stack
+%assign I 0
+%rep 8
+%assign J (I+1)
+ VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64]
+ VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64]
+
+ vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK
+ vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK
+%assign I (I+2)
+%endrep
+
+ vmovdqa64 A, HH0
+ vmovdqa64 B, HH1
+ vmovdqa64 C, HH2
+ vmovdqa64 D, HH3
+ vmovdqa64 E, HH4
+
+ vmovdqa32 KT, [K00_19]
+%assign I 0xCA
+%assign J 0
+%assign K 2
+%assign L 8
+%assign M 13
+%assign N 0
+%rep 80
+ PROCESS_LOOP APPEND(W,J), I
+ %if N < 64
+ MSG_SCHED_ROUND_16_79 APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M)
+ %endif
+ %if N = 19
+ vmovdqa32 KT, [K20_39]
+ %assign I 0x96
+ %elif N = 39
+ vmovdqa32 KT, [K40_59]
+ %assign I 0xE8
+ %elif N = 59
+ vmovdqa32 KT, [K60_79]
+ %assign I 0x96
+ %endif
+ %if N % 20 = 19
+ PREFETCH_X [mh_in_p + 1024+128*(N / 20)]
+ PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)]
+ %endif
+%assign J ((J+1)% 16)
+%assign K ((K+1)% 16)
+%assign L ((L+1)% 16)
+%assign M ((M+1)% 16)
+%assign N (N+1)
+%endrep
+
+ ; Add old digest
+ vpaddd HH0,A, HH0
+ vpaddd HH1,B, HH1
+ vpaddd HH2,C, HH2
+ vpaddd HH3,D, HH3
+ vpaddd HH4,E, HH4
+
+ add mh_in_p, 1024
+ sub loops, 1
+ jne .block_loop
+
+ ; copy segs_digests to mh_digests_p
+ VMOVPS [mh_digests_p + 64*0], HH0
+ VMOVPS [mh_digests_p + 64*1], HH1
+ VMOVPS [mh_digests_p + 64*2], HH2
+ VMOVPS [mh_digests_p + 64*3], HH3
+ VMOVPS [mh_digests_p + 64*4], HH4
+
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+
+section .data align=64
+
+align 64
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+
+K20_39: dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+
+K40_59: dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+
+K60_79: dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_mh_sha1_block_avx512
+no_mh_sha1_block_avx512:
+%endif
+%endif ; HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c
new file mode 100644
index 000000000..cdee69a4a
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c
@@ -0,0 +1,387 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha1_internal.h"
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// Base multi-hash SHA1 Functions
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+#define store_w(s, i, w, ww) (w[i][s] = bswap(ww[i*HASH_SEGS+s])) // only used for step 0 ~ 15
+#define update_w(s, i, w) (w[i&15][s] = rol32(w[(i-3)&15][s]^w[(i-8)&15][s]^w[(i-14)&15][s]^w[(i-16)&15][s], 1)) // used for step > 15
+#define update_e_1(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F1(b[s],c[s],d[s]) + K_00_19 + w[i&15][s])
+#define update_e_2(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F2(b[s],c[s],d[s]) + K_20_39 + w[i&15][s])
+#define update_e_3(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F3(b[s],c[s],d[s]) + K_40_59 + w[i&15][s])
+#define update_e_4(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F4(b[s],c[s],d[s]) + K_60_79 + w[i&15][s])
+#define update_b(s, b) (b[s] = rol32(b[s],30))
+
+#define STORE_W(i, w, ww) \
+ store_w(0, i, w, ww); \
+ store_w(1, i, w, ww); \
+ store_w(2, i, w, ww); \
+ store_w(3, i, w, ww); \
+ store_w(4, i, w, ww); \
+ store_w(5, i, w, ww); \
+ store_w(6, i, w, ww); \
+ store_w(7, i, w, ww); \
+ store_w(8, i, w, ww); \
+ store_w(9, i, w, ww); \
+ store_w(10, i, w, ww); \
+ store_w(11, i, w, ww); \
+ store_w(12, i, w, ww); \
+ store_w(13, i, w, ww); \
+ store_w(14, i, w, ww); \
+ store_w(15, i, w, ww)
+
+#define UPDATE_W(i, w) \
+ update_w(0, i, w); \
+ update_w(1, i, w); \
+ update_w(2, i, w); \
+ update_w(3, i, w); \
+ update_w(4, i, w); \
+ update_w(5, i, w); \
+ update_w(6, i, w); \
+ update_w(7, i, w); \
+ update_w(8, i, w); \
+ update_w(9, i, w); \
+ update_w(10, i, w); \
+ update_w(11, i, w); \
+ update_w(12, i, w); \
+ update_w(13, i, w); \
+ update_w(14, i, w); \
+ update_w(15, i, w)
+
+#define UPDATE_E1(a, b, c, d, e, i, w) \
+ update_e_1(0, a, b, c, d, e, i, w); \
+ update_e_1(1, a, b, c, d, e, i, w); \
+ update_e_1(2, a, b, c, d, e, i, w); \
+ update_e_1(3, a, b, c, d, e, i, w); \
+ update_e_1(4, a, b, c, d, e, i, w); \
+ update_e_1(5, a, b, c, d, e, i, w); \
+ update_e_1(6, a, b, c, d, e, i, w); \
+ update_e_1(7, a, b, c, d, e, i, w); \
+ update_e_1(8, a, b, c, d, e, i, w); \
+ update_e_1(9, a, b, c, d, e, i, w); \
+ update_e_1(10, a, b, c, d, e, i, w); \
+ update_e_1(11, a, b, c, d, e, i, w); \
+ update_e_1(12, a, b, c, d, e, i, w); \
+ update_e_1(13, a, b, c, d, e, i, w); \
+ update_e_1(14, a, b, c, d, e, i, w); \
+ update_e_1(15, a, b, c, d, e, i, w)
+
+#define UPDATE_E2(a, b, c, d, e, i, w) \
+ update_e_2(0, a, b, c, d, e, i, w); \
+ update_e_2(1, a, b, c, d, e, i, w); \
+ update_e_2(2, a, b, c, d, e, i, w); \
+ update_e_2(3, a, b, c, d, e, i, w); \
+ update_e_2(4, a, b, c, d, e, i, w); \
+ update_e_2(5, a, b, c, d, e, i, w); \
+ update_e_2(6, a, b, c, d, e, i, w); \
+ update_e_2(7, a, b, c, d, e, i, w); \
+ update_e_2(8, a, b, c, d, e, i, w); \
+ update_e_2(9, a, b, c, d, e, i, w); \
+ update_e_2(10, a, b, c, d, e, i, w); \
+ update_e_2(11, a, b, c, d, e, i, w); \
+ update_e_2(12, a, b, c, d, e, i, w); \
+ update_e_2(13, a, b, c, d, e, i, w); \
+ update_e_2(14, a, b, c, d, e, i, w); \
+ update_e_2(15, a, b, c, d, e, i, w)
+
+#define UPDATE_E3(a, b, c, d, e, i, w) \
+ update_e_3(0, a, b, c, d, e, i, w); \
+ update_e_3(1, a, b, c, d, e, i, w); \
+ update_e_3(2, a, b, c, d, e, i, w); \
+ update_e_3(3, a, b, c, d, e, i, w); \
+ update_e_3(4, a, b, c, d, e, i, w); \
+ update_e_3(5, a, b, c, d, e, i, w); \
+ update_e_3(6, a, b, c, d, e, i, w); \
+ update_e_3(7, a, b, c, d, e, i, w); \
+ update_e_3(8, a, b, c, d, e, i, w); \
+ update_e_3(9, a, b, c, d, e, i, w); \
+ update_e_3(10, a, b, c, d, e, i, w); \
+ update_e_3(11, a, b, c, d, e, i, w); \
+ update_e_3(12, a, b, c, d, e, i, w); \
+ update_e_3(13, a, b, c, d, e, i, w); \
+ update_e_3(14, a, b, c, d, e, i, w); \
+ update_e_3(15, a, b, c, d, e, i, w)
+
+#define UPDATE_E4(a, b, c, d, e, i, w) \
+ update_e_4(0, a, b, c, d, e, i, w); \
+ update_e_4(1, a, b, c, d, e, i, w); \
+ update_e_4(2, a, b, c, d, e, i, w); \
+ update_e_4(3, a, b, c, d, e, i, w); \
+ update_e_4(4, a, b, c, d, e, i, w); \
+ update_e_4(5, a, b, c, d, e, i, w); \
+ update_e_4(6, a, b, c, d, e, i, w); \
+ update_e_4(7, a, b, c, d, e, i, w); \
+ update_e_4(8, a, b, c, d, e, i, w); \
+ update_e_4(9, a, b, c, d, e, i, w); \
+ update_e_4(10, a, b, c, d, e, i, w); \
+ update_e_4(11, a, b, c, d, e, i, w); \
+ update_e_4(12, a, b, c, d, e, i, w); \
+ update_e_4(13, a, b, c, d, e, i, w); \
+ update_e_4(14, a, b, c, d, e, i, w); \
+ update_e_4(15, a, b, c, d, e, i, w)
+
+#define UPDATE_B(b) \
+ update_b(0, b); \
+ update_b(1, b); \
+ update_b(2, b); \
+ update_b(3, b); \
+ update_b(4, b); \
+ update_b(5, b); \
+ update_b(6, b); \
+ update_b(7, b); \
+ update_b(8, b); \
+ update_b(9, b); \
+ update_b(10, b); \
+ update_b(11, b); \
+ update_b(12, b); \
+ update_b(13, b); \
+ update_b(14, b); \
+ update_b(15, b)
+
+static inline void step00_15(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS],
+ uint32_t * ww)
+{
+ STORE_W(i, w, ww);
+ UPDATE_E1(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void step16_19(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E1(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+
+}
+
+static inline void step20_39(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E2(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void step40_59(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E3(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void step60_79(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E4(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void init_abcde(uint32_t * xx, uint32_t n,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS])
+{
+ xx[0] = digests[n][0];
+ xx[1] = digests[n][1];
+ xx[2] = digests[n][2];
+ xx[3] = digests[n][3];
+ xx[4] = digests[n][4];
+ xx[5] = digests[n][5];
+ xx[6] = digests[n][6];
+ xx[7] = digests[n][7];
+ xx[8] = digests[n][8];
+ xx[9] = digests[n][9];
+ xx[10] = digests[n][10];
+ xx[11] = digests[n][11];
+ xx[12] = digests[n][12];
+ xx[13] = digests[n][13];
+ xx[14] = digests[n][14];
+ xx[15] = digests[n][15];
+}
+
+static inline void add_abcde(uint32_t * xx, uint32_t n,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS])
+{
+ digests[n][0] += xx[0];
+ digests[n][1] += xx[1];
+ digests[n][2] += xx[2];
+ digests[n][3] += xx[3];
+ digests[n][4] += xx[4];
+ digests[n][5] += xx[5];
+ digests[n][6] += xx[6];
+ digests[n][7] += xx[7];
+ digests[n][8] += xx[8];
+ digests[n][9] += xx[9];
+ digests[n][10] += xx[10];
+ digests[n][11] += xx[11];
+ digests[n][12] += xx[12];
+ digests[n][13] += xx[13];
+ digests[n][14] += xx[14];
+ digests[n][15] += xx[15];
+}
+
+/*
+ * API to perform 0-79 steps of the multi-hash algorithm for
+ * a single block of data. The caller is responsible for ensuring
+ * a full block of data input.
+ *
+ * Argument:
+ * input - the pointer to the data
+ * digest - the space to hold the digests for all segments.
+ *
+ * Return:
+ * N/A
+ */
+void mh_sha1_single(const uint8_t * input, uint32_t(*digests)[HASH_SEGS],
+ uint8_t * frame_buffer)
+{
+ uint32_t aa[HASH_SEGS], bb[HASH_SEGS], cc[HASH_SEGS], dd[HASH_SEGS], ee[HASH_SEGS];
+ uint32_t *ww = (uint32_t *) input;
+ uint32_t(*w)[HASH_SEGS];
+
+ w = (uint32_t(*)[HASH_SEGS]) frame_buffer;
+
+ init_abcde(aa, 0, digests);
+ init_abcde(bb, 1, digests);
+ init_abcde(cc, 2, digests);
+ init_abcde(dd, 3, digests);
+ init_abcde(ee, 4, digests);
+
+ step00_15(0, aa, bb, cc, dd, ee, w, ww);
+ step00_15(1, ee, aa, bb, cc, dd, w, ww);
+ step00_15(2, dd, ee, aa, bb, cc, w, ww);
+ step00_15(3, cc, dd, ee, aa, bb, w, ww);
+ step00_15(4, bb, cc, dd, ee, aa, w, ww);
+ step00_15(5, aa, bb, cc, dd, ee, w, ww);
+ step00_15(6, ee, aa, bb, cc, dd, w, ww);
+ step00_15(7, dd, ee, aa, bb, cc, w, ww);
+ step00_15(8, cc, dd, ee, aa, bb, w, ww);
+ step00_15(9, bb, cc, dd, ee, aa, w, ww);
+ step00_15(10, aa, bb, cc, dd, ee, w, ww);
+ step00_15(11, ee, aa, bb, cc, dd, w, ww);
+ step00_15(12, dd, ee, aa, bb, cc, w, ww);
+ step00_15(13, cc, dd, ee, aa, bb, w, ww);
+ step00_15(14, bb, cc, dd, ee, aa, w, ww);
+ step00_15(15, aa, bb, cc, dd, ee, w, ww);
+
+ step16_19(16, ee, aa, bb, cc, dd, w);
+ step16_19(17, dd, ee, aa, bb, cc, w);
+ step16_19(18, cc, dd, ee, aa, bb, w);
+ step16_19(19, bb, cc, dd, ee, aa, w);
+
+ step20_39(20, aa, bb, cc, dd, ee, w);
+ step20_39(21, ee, aa, bb, cc, dd, w);
+ step20_39(22, dd, ee, aa, bb, cc, w);
+ step20_39(23, cc, dd, ee, aa, bb, w);
+ step20_39(24, bb, cc, dd, ee, aa, w);
+ step20_39(25, aa, bb, cc, dd, ee, w);
+ step20_39(26, ee, aa, bb, cc, dd, w);
+ step20_39(27, dd, ee, aa, bb, cc, w);
+ step20_39(28, cc, dd, ee, aa, bb, w);
+ step20_39(29, bb, cc, dd, ee, aa, w);
+ step20_39(30, aa, bb, cc, dd, ee, w);
+ step20_39(31, ee, aa, bb, cc, dd, w);
+ step20_39(32, dd, ee, aa, bb, cc, w);
+ step20_39(33, cc, dd, ee, aa, bb, w);
+ step20_39(34, bb, cc, dd, ee, aa, w);
+ step20_39(35, aa, bb, cc, dd, ee, w);
+ step20_39(36, ee, aa, bb, cc, dd, w);
+ step20_39(37, dd, ee, aa, bb, cc, w);
+ step20_39(38, cc, dd, ee, aa, bb, w);
+ step20_39(39, bb, cc, dd, ee, aa, w);
+
+ step40_59(40, aa, bb, cc, dd, ee, w);
+ step40_59(41, ee, aa, bb, cc, dd, w);
+ step40_59(42, dd, ee, aa, bb, cc, w);
+ step40_59(43, cc, dd, ee, aa, bb, w);
+ step40_59(44, bb, cc, dd, ee, aa, w);
+ step40_59(45, aa, bb, cc, dd, ee, w);
+ step40_59(46, ee, aa, bb, cc, dd, w);
+ step40_59(47, dd, ee, aa, bb, cc, w);
+ step40_59(48, cc, dd, ee, aa, bb, w);
+ step40_59(49, bb, cc, dd, ee, aa, w);
+ step40_59(50, aa, bb, cc, dd, ee, w);
+ step40_59(51, ee, aa, bb, cc, dd, w);
+ step40_59(52, dd, ee, aa, bb, cc, w);
+ step40_59(53, cc, dd, ee, aa, bb, w);
+ step40_59(54, bb, cc, dd, ee, aa, w);
+ step40_59(55, aa, bb, cc, dd, ee, w);
+ step40_59(56, ee, aa, bb, cc, dd, w);
+ step40_59(57, dd, ee, aa, bb, cc, w);
+ step40_59(58, cc, dd, ee, aa, bb, w);
+ step40_59(59, bb, cc, dd, ee, aa, w);
+
+ step60_79(60, aa, bb, cc, dd, ee, w);
+ step60_79(61, ee, aa, bb, cc, dd, w);
+ step60_79(62, dd, ee, aa, bb, cc, w);
+ step60_79(63, cc, dd, ee, aa, bb, w);
+ step60_79(64, bb, cc, dd, ee, aa, w);
+ step60_79(65, aa, bb, cc, dd, ee, w);
+ step60_79(66, ee, aa, bb, cc, dd, w);
+ step60_79(67, dd, ee, aa, bb, cc, w);
+ step60_79(68, cc, dd, ee, aa, bb, w);
+ step60_79(69, bb, cc, dd, ee, aa, w);
+ step60_79(70, aa, bb, cc, dd, ee, w);
+ step60_79(71, ee, aa, bb, cc, dd, w);
+ step60_79(72, dd, ee, aa, bb, cc, w);
+ step60_79(73, cc, dd, ee, aa, bb, w);
+ step60_79(74, bb, cc, dd, ee, aa, w);
+ step60_79(75, aa, bb, cc, dd, ee, w);
+ step60_79(76, ee, aa, bb, cc, dd, w);
+ step60_79(77, dd, ee, aa, bb, cc, w);
+ step60_79(78, cc, dd, ee, aa, bb, w);
+ step60_79(79, bb, cc, dd, ee, aa, w);
+
+ add_abcde(aa, 0, digests);
+ add_abcde(bb, 1, digests);
+ add_abcde(cc, 2, digests);
+ add_abcde(dd, 3, digests);
+ add_abcde(ee, 4, digests);
+}
+
+void mh_sha1_block_base(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks)
+{
+ uint32_t i;
+
+ for (i = 0; i < num_blocks; i++) {
+ mh_sha1_single(input_data, digests, frame_buffer);
+ input_data += MH_SHA1_BLOCK_SIZE;
+ }
+
+ return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm
new file mode 100644
index 000000000..1e53cfec9
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm
@@ -0,0 +1,494 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using SSE
+;;
+
+%include "reg_sizes.asm"
+default rel
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regC
+ pxor %%regF,%%regD
+ pand %%regF,%%regB
+ pxor %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regD
+ pxor %%regF,%%regC
+ pxor %%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regB
+ movdqa %%regT,%%regB
+ por %%regF,%%regC
+ pand %%regT,%%regC
+ pand %%regF,%%regD
+ por %%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ movdqa %%tmp, %%reg
+ pslld %%reg, %%imm
+ psrld %%tmp, (32-%%imm)
+ por %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ paddd %%regE,[%%data + (%%memW * 16)]
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define MOVPS movups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+
+;void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+global mh_sha1_block_sse:function internal
+func(mh_sha1_block_sse)
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by sse
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ MOVPS A, [mh_digests_p + I*64 + 16*0]
+ MOVPS B, [mh_digests_p + I*64 + 16*1]
+ MOVPS C, [mh_digests_p + I*64 + 16*2]
+ MOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ movdqa [rsp + I*64 + 16*0], A
+ movdqa [rsp + I*64 + 16*1], B
+ movdqa [rsp + I*64 + 16*2], C
+ movdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ movdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ MOVPS T0,[mh_in_p + I*64+0*16]
+ MOVPS T1,[mh_in_p + I*64+1*16]
+ MOVPS T2,[mh_in_p + I*64+2*16]
+ MOVPS T3,[mh_in_p + I*64+3*16]
+
+ pshufb T0, F
+ movdqa [mh_data_p +(I)*16 +0*256],T0
+ pshufb T1, F
+ movdqa [mh_data_p +(I)*16 +1*256],T1
+ pshufb T2, F
+ movdqa [mh_data_p +(I)*16 +2*256],T2
+ pshufb T3, F
+ movdqa [mh_data_p +(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ movdqa A, [rsp + 0*64 + mh_segs]
+ movdqa B, [rsp + 1*64 + mh_segs]
+ movdqa C, [rsp + 2*64 + mh_segs]
+ movdqa D, [rsp + 3*64 + mh_segs]
+ movdqa E, [rsp + 4*64 + mh_segs]
+
+ movdqa AA, A
+ movdqa BB, B
+ movdqa CC, C
+ movdqa DD, D
+ movdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ movdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+;; do rounds 20...39
+ movdqa K, [K20_39]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ movdqa K, [K40_59]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 60...79
+ movdqa K, [K60_79]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ paddd A, AA
+ paddd B, BB
+ paddd C, CC
+ paddd D, DD
+ paddd E, EE
+
+ ; write out digests
+ movdqa [rsp + 0*64 + mh_segs], A
+ movdqa [rsp + 1*64 + mh_segs], B
+ movdqa [rsp + 2*64 + mh_segs], C
+ movdqa [rsp + 3*64 + mh_segs], D
+ movdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 256
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ movdqa A, [rsp + I*64 + 16*0]
+ movdqa B, [rsp + I*64 + 16*1]
+ movdqa C, [rsp + I*64 + 16*2]
+ movdqa D, [rsp + I*64 + 16*3]
+
+ MOVPS [mh_digests_p + I*64 + 16*0], A
+ MOVPS [mh_digests_p + I*64 + 16*1], B
+ MOVPS [mh_digests_p + I*64 + 16*2], C
+ MOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c
new file mode 100644
index 000000000..9eab755a6
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c
@@ -0,0 +1,122 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/*
+ * mh_sha1_finalize_base.c contains the prototypes of mh_sha1_finalize_XXX
+ * and mh_sha1_tail_XXX. Default definitions are base type which generates
+ * mh_sha1_finalize_base and mh_sha1_tail_base. Other types are generated
+ * through different predefined macros by mh_sha1.c.
+ * mh_sha1_tail is used to calculate the last incomplete block of input
+ * data. mh_sha1_finalize is the mh_sha1_ctx wrapper of mh_sha1_tail.
+ */
+#ifndef MH_SHA1_FINALIZE_FUNCTION
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_base
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base
+#define MH_SHA1_FINALIZE_SLVER
+#endif
+
+void MH_SHA1_TAIL_FUNCTION(uint8_t * partial_buffer, uint32_t total_len,
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer,
+ uint32_t digests[SHA1_DIGEST_WORDS])
+{
+ uint64_t partial_buffer_len, len_in_bit;
+
+ partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE;
+
+ // Padding the first block
+ partial_buffer[partial_buffer_len] = 0x80;
+ partial_buffer_len++;
+ memset(partial_buffer + partial_buffer_len, 0,
+ MH_SHA1_BLOCK_SIZE - partial_buffer_len);
+
+ // Calculate the first block without total_length if padding needs 2 block
+ if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) {
+ MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+ //Padding the second block
+ memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ //Padding the block
+ len_in_bit = bswap64((uint64_t) total_len * 8);
+ *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit;
+ MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+
+ //Calculate multi-hash SHA1 digests (segment digests as input message)
+ sha1_for_mh_sha1((uint8_t *) mh_sha1_segs_digests, digests,
+ 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ return;
+}
+
+int MH_SHA1_FINALIZE_FUNCTION(struct mh_sha1_ctx *ctx, void *mh_sha1_digest)
+{
+ uint8_t *partial_block_buffer;
+ uint64_t total_len;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_CTX_ERROR_NULL;
+
+ total_len = ctx->total_length;
+ partial_block_buffer = ctx->partial_block_buffer;
+
+ /* mh_sha1 tail */
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+
+ MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests,
+ aligned_frame_buffer, ctx->mh_sha1_digest);
+
+ /* Output the digests of mh_sha1 */
+ if (mh_sha1_digest != NULL) {
+ ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0];
+ ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1];
+ ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2];
+ ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3];
+ ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4];
+ }
+
+ return MH_SHA1_CTX_ERROR_NONE;
+}
+
+#ifdef MH_SHA1_FINALIZE_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// Version info
+struct slver mh_sha1_finalize_base_slver_0000027b;
+struct slver mh_sha1_finalize_base_slver = { 0x027b, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h
new file mode 100644
index 000000000..e8f226cbc
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h
@@ -0,0 +1,323 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_INTERNAL_H_
+#define _MH_SHA1_INTERNAL_H_
+
+/**
+ * @file mh_sha1_internal.h
+ * @brief mh_sha1 internal function prototypes and macros
+ *
+ * Interface for mh_sha1 internal functions
+ *
+ */
+#include <stdint.h>
+#include "mh_sha1.h"
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+ // 64byte pointer align
+#define ALIGN_64(pointer) ( ((uint64_t)(pointer) + 0x3F)&(~0x3F) )
+
+ /*******************************************************************
+ *mh_sha1 constants and macros
+ ******************************************************************/
+ /* mh_sha1 constants */
+#define MH_SHA1_H0 0x67452301UL
+#define MH_SHA1_H1 0xefcdab89UL
+#define MH_SHA1_H2 0x98badcfeUL
+#define MH_SHA1_H3 0x10325476UL
+#define MH_SHA1_H4 0xc3d2e1f0UL
+
+#define K_00_19 0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
+ /* mh_sha1 macros */
+#define F1(b,c,d) (d ^ (b & (c ^ d)))
+#define F2(b,c,d) (b ^ c ^ d)
+#define F3(b,c,d) ((b & c) | (d & (b | c)))
+#define F4(b,c,d) (b ^ c ^ d)
+
+#define rol32(x, r) (((x)<<(r)) ^ ((x)>>(32-(r))))
+
+#define bswap(x) (((x)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | ((x)>>24))
+#define bswap64(x) (((x)<<56) | (((x)&0xff00)<<40) | (((x)&0xff0000)<<24) | \
+ (((x)&0xff000000)<<8) | (((x)&0xff00000000ull)>>8) | \
+ (((x)&0xff0000000000ull)<<24) | \
+ (((x)&0xff000000000000ull)<<40) | \
+ (((x)&0xff00000000000000ull)<<56))
+
+ /*******************************************************************
+ * SHA1 API internal function prototypes
+ ******************************************************************/
+
+ /**
+ * @brief Performs complete SHA1 algorithm.
+ *
+ * @param input Pointer to buffer containing the input message.
+ * @param digest Pointer to digest to update.
+ * @param len Length of buffer.
+ * @returns None
+ */
+ void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len);
+
+ /**
+ * @brief Calculate sha1 digest of blocks which size is SHA1_BLOCK_SIZE
+ *
+ * @param data Pointer to data buffer containing the input message.
+ * @param digest Pointer to sha1 digest.
+ * @returns None
+ */
+ void sha1_single_for_mh_sha1(const uint8_t * data, uint32_t digest[]);
+
+ /*******************************************************************
+ * mh_sha1 API internal function prototypes
+ * Multiple versions of Update and Finalize functions are supplied which use
+ * multiple versions of block and tail process subfunctions.
+ ******************************************************************/
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @returns none
+ *
+ */
+ void mh_sha1_tail(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_base(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires SSE
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_sse(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires AVX
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_avx(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires AVX2
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_avx2(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires AVX512
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_avx512(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_base(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires SSE
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires AVX
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires AVX2
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires AVX512
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm
new file mode 100644
index 000000000..83c39a315
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm
@@ -0,0 +1,83 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define WRT_OPT wrt ..plt
+%else
+%define WRT_OPT
+%endif
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ [bits 32]
+%else
+ default rel
+ [bits 64]
+
+ extern mh_sha1_update_sse
+ extern mh_sha1_update_avx
+ extern mh_sha1_update_avx2
+ extern mh_sha1_finalize_sse
+ extern mh_sha1_finalize_avx
+ extern mh_sha1_finalize_avx2
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ extern mh_sha1_update_avx512
+ extern mh_sha1_finalize_avx512
+ %endif
+
+%endif
+
+extern mh_sha1_update_base
+extern mh_sha1_finalize_base
+
+mbin_interface mh_sha1_update
+mbin_interface mh_sha1_finalize
+
+%ifidn __OUTPUT_FORMAT__, elf64
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ mbin_dispatch_init6 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2, mh_sha1_update_avx512
+ mbin_dispatch_init6 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2, mh_sha1_finalize_avx512
+ %else
+ mbin_dispatch_init5 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2
+ mbin_dispatch_init5 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2
+ %endif
+
+%else
+ mbin_dispatch_init2 mh_sha1_update, mh_sha1_update_base
+ mbin_dispatch_init2 mh_sha1_finalize, mh_sha1_finalize_base
+%endif
+
+;;; func core, ver, snum
+slversion mh_sha1_update, 00, 02, 0272
+slversion mh_sha1_finalize, 00, 02, 0273
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c
new file mode 100644
index 000000000..72b2a5237
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c
@@ -0,0 +1,180 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Loop many times over same
+# define TEST_LEN 16*1024
+# define TEST_LOOPS 20000
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define TEST_LEN 32*1024*1024
+# define TEST_LOOPS 100
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+#define TEST_MEM TEST_LEN
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_CTX_ERROR_NONE){ \
+ printf("The mh_sha1 function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+
+ return mh_sha1_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ struct mh_sha1_ctx *update_ctx_test = NULL, *update_ctx_base = NULL;
+ struct perf start, stop;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n");
+
+ buff = malloc(TEST_LEN);
+ update_ctx_test = malloc(sizeof(*update_ctx_test));
+ update_ctx_base = malloc(sizeof(*update_ctx_base));
+
+ if (buff == NULL || update_ctx_base == NULL || update_ctx_test == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ // mh_sha1 base version
+ mh_sha1_init(update_ctx_base);
+ mh_sha1_update_base(update_ctx_base, buff, TEST_LEN);
+ mh_sha1_finalize_base(update_ctx_base, hash_base);
+
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS / 10; i++) {
+ mh_sha1_init(update_ctx_base);
+ mh_sha1_update_base(update_ctx_base, buff, TEST_LEN);
+ mh_sha1_finalize_base(update_ctx_base, hash_base);
+ }
+ perf_stop(&stop);
+ printf("mh_sha1_update_base" TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ //Update feature test
+ CHECK_RETURN(mh_sha1_init(update_ctx_test));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test));
+
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS; i++) {
+ CHECK_RETURN(mh_sha1_init(update_ctx_test));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test));
+ }
+ perf_stop(&stop);
+ printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ // Check results
+ fail = compare_digests(hash_base, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", TEST_LEN);
+ return -1;
+ }
+
+ if (fail)
+ printf("Test failed function test%d\n", fail);
+ else
+ printf("Pass func check\n");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c
new file mode 100644
index 000000000..fee928a9c
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c
@@ -0,0 +1,438 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+ // Macros and sub-functions which already exist in source code file
+ // (sha1_for_mh_sha1.c) is part of ISA-L library as internal functions.
+ // The reason why writing them twice is the linking issue caused by
+ // mh_sha1_ref(). mh_sha1_ref() needs these macros and sub-functions
+ // without linking ISA-L library. So mh_sha1_ref() includes them in
+ // order to contain essential sub-functions in its own object file.
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+#define W(x) w[(x) & 15]
+
+#define step00_19(i,a,b,c,d,e) \
+ if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ else W(i) = bswap(ww[i]); \
+ e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \
+ b = rol32(b,30)
+
+#define step20_39(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \
+ b = rol32(b,30)
+
+#define step40_59(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \
+ b = rol32(b,30)
+
+#define step60_79(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \
+ b = rol32(b,30)
+
+void sha1_single_for_mh_sha1_ref(const uint8_t * data, uint32_t digest[])
+{
+ uint32_t a, b, c, d, e;
+ uint32_t w[16] = { 0 };
+ uint32_t *ww = (uint32_t *) data;
+
+ a = digest[0];
+ b = digest[1];
+ c = digest[2];
+ d = digest[3];
+ e = digest[4];
+
+ step00_19(0, a, b, c, d, e);
+ step00_19(1, e, a, b, c, d);
+ step00_19(2, d, e, a, b, c);
+ step00_19(3, c, d, e, a, b);
+ step00_19(4, b, c, d, e, a);
+ step00_19(5, a, b, c, d, e);
+ step00_19(6, e, a, b, c, d);
+ step00_19(7, d, e, a, b, c);
+ step00_19(8, c, d, e, a, b);
+ step00_19(9, b, c, d, e, a);
+ step00_19(10, a, b, c, d, e);
+ step00_19(11, e, a, b, c, d);
+ step00_19(12, d, e, a, b, c);
+ step00_19(13, c, d, e, a, b);
+ step00_19(14, b, c, d, e, a);
+ step00_19(15, a, b, c, d, e);
+ step00_19(16, e, a, b, c, d);
+ step00_19(17, d, e, a, b, c);
+ step00_19(18, c, d, e, a, b);
+ step00_19(19, b, c, d, e, a);
+
+ step20_39(20, a, b, c, d, e);
+ step20_39(21, e, a, b, c, d);
+ step20_39(22, d, e, a, b, c);
+ step20_39(23, c, d, e, a, b);
+ step20_39(24, b, c, d, e, a);
+ step20_39(25, a, b, c, d, e);
+ step20_39(26, e, a, b, c, d);
+ step20_39(27, d, e, a, b, c);
+ step20_39(28, c, d, e, a, b);
+ step20_39(29, b, c, d, e, a);
+ step20_39(30, a, b, c, d, e);
+ step20_39(31, e, a, b, c, d);
+ step20_39(32, d, e, a, b, c);
+ step20_39(33, c, d, e, a, b);
+ step20_39(34, b, c, d, e, a);
+ step20_39(35, a, b, c, d, e);
+ step20_39(36, e, a, b, c, d);
+ step20_39(37, d, e, a, b, c);
+ step20_39(38, c, d, e, a, b);
+ step20_39(39, b, c, d, e, a);
+
+ step40_59(40, a, b, c, d, e);
+ step40_59(41, e, a, b, c, d);
+ step40_59(42, d, e, a, b, c);
+ step40_59(43, c, d, e, a, b);
+ step40_59(44, b, c, d, e, a);
+ step40_59(45, a, b, c, d, e);
+ step40_59(46, e, a, b, c, d);
+ step40_59(47, d, e, a, b, c);
+ step40_59(48, c, d, e, a, b);
+ step40_59(49, b, c, d, e, a);
+ step40_59(50, a, b, c, d, e);
+ step40_59(51, e, a, b, c, d);
+ step40_59(52, d, e, a, b, c);
+ step40_59(53, c, d, e, a, b);
+ step40_59(54, b, c, d, e, a);
+ step40_59(55, a, b, c, d, e);
+ step40_59(56, e, a, b, c, d);
+ step40_59(57, d, e, a, b, c);
+ step40_59(58, c, d, e, a, b);
+ step40_59(59, b, c, d, e, a);
+
+ step60_79(60, a, b, c, d, e);
+ step60_79(61, e, a, b, c, d);
+ step60_79(62, d, e, a, b, c);
+ step60_79(63, c, d, e, a, b);
+ step60_79(64, b, c, d, e, a);
+ step60_79(65, a, b, c, d, e);
+ step60_79(66, e, a, b, c, d);
+ step60_79(67, d, e, a, b, c);
+ step60_79(68, c, d, e, a, b);
+ step60_79(69, b, c, d, e, a);
+ step60_79(70, a, b, c, d, e);
+ step60_79(71, e, a, b, c, d);
+ step60_79(72, d, e, a, b, c);
+ step60_79(73, c, d, e, a, b);
+ step60_79(74, b, c, d, e, a);
+ step60_79(75, a, b, c, d, e);
+ step60_79(76, e, a, b, c, d);
+ step60_79(77, d, e, a, b, c);
+ step60_79(78, c, d, e, a, b);
+ step60_79(79, b, c, d, e, a);
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+}
+
+void sha1_for_mh_sha1_ref(const uint8_t * input_data, uint32_t * digest, const uint32_t len)
+{
+ uint32_t i, j;
+ uint8_t buf[2 * SHA1_BLOCK_SIZE];
+ union {
+ uint64_t uint;
+ uint8_t uchar[8];
+ } convert;
+ uint8_t *p;
+
+ digest[0] = MH_SHA1_H0;
+ digest[1] = MH_SHA1_H1;
+ digest[2] = MH_SHA1_H2;
+ digest[3] = MH_SHA1_H3;
+ digest[4] = MH_SHA1_H4;
+
+ i = len;
+ while (i >= SHA1_BLOCK_SIZE) {
+ sha1_single_for_mh_sha1_ref(input_data, digest);
+ input_data += SHA1_BLOCK_SIZE;
+ i -= SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(buf, input_data, i);
+ buf[i++] = 0x80;
+ for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++)
+ buf[j] = 0;
+
+ if (i > SHA1_BLOCK_SIZE - 8)
+ i = 2 * SHA1_BLOCK_SIZE;
+ else
+ i = SHA1_BLOCK_SIZE;
+
+ convert.uint = 8 * len;
+ p = buf + i - 8;
+ p[0] = convert.uchar[7];
+ p[1] = convert.uchar[6];
+ p[2] = convert.uchar[5];
+ p[3] = convert.uchar[4];
+ p[4] = convert.uchar[3];
+ p[5] = convert.uchar[2];
+ p[6] = convert.uchar[1];
+ p[7] = convert.uchar[0];
+
+ sha1_single_for_mh_sha1_ref(buf, digest);
+ if (i == (2 * SHA1_BLOCK_SIZE))
+ sha1_single_for_mh_sha1_ref(buf + SHA1_BLOCK_SIZE, digest);
+}
+
+/*
+ * buffer to rearrange one segment data from one block.
+ *
+ * Layout of new_data:
+ * segment
+ * -------------------------
+ * w0 | w1 | ... | w15
+ *
+ */
+static inline void transform_input_single(uint32_t * new_data, uint32_t * input,
+ uint32_t segment)
+{
+ new_data[16 * segment + 0] = input[16 * 0 + segment];
+ new_data[16 * segment + 1] = input[16 * 1 + segment];
+ new_data[16 * segment + 2] = input[16 * 2 + segment];
+ new_data[16 * segment + 3] = input[16 * 3 + segment];
+ new_data[16 * segment + 4] = input[16 * 4 + segment];
+ new_data[16 * segment + 5] = input[16 * 5 + segment];
+ new_data[16 * segment + 6] = input[16 * 6 + segment];
+ new_data[16 * segment + 7] = input[16 * 7 + segment];
+ new_data[16 * segment + 8] = input[16 * 8 + segment];
+ new_data[16 * segment + 9] = input[16 * 9 + segment];
+ new_data[16 * segment + 10] = input[16 * 10 + segment];
+ new_data[16 * segment + 11] = input[16 * 11 + segment];
+ new_data[16 * segment + 12] = input[16 * 12 + segment];
+ new_data[16 * segment + 13] = input[16 * 13 + segment];
+ new_data[16 * segment + 14] = input[16 * 14 + segment];
+ new_data[16 * segment + 15] = input[16 * 15 + segment];
+}
+
+// Adapt parameters to sha1_single_for_mh_sha1_ref
+#define sha1_update_one_seg(data, digest) \
+ sha1_single_for_mh_sha1_ref((const uint8_t *)(data), (uint32_t *)(digest))
+
+/*
+ * buffer to Rearrange all segments data from one block.
+ *
+ * Layout of new_data:
+ * segment
+ * -------------------------
+ * seg0: | w0 | w1 | ... | w15
+ * seg1: | w0 | w1 | ... | w15
+ * seg2: | w0 | w1 | ... | w15
+ * ....
+ * seg15: | w0 | w1 | ... | w15
+ *
+ */
+static inline void transform_input(uint32_t * new_data, uint32_t * input, uint32_t block)
+{
+ uint32_t *current_input = input + block * MH_SHA1_BLOCK_SIZE / 4;
+
+ transform_input_single(new_data, current_input, 0);
+ transform_input_single(new_data, current_input, 1);
+ transform_input_single(new_data, current_input, 2);
+ transform_input_single(new_data, current_input, 3);
+ transform_input_single(new_data, current_input, 4);
+ transform_input_single(new_data, current_input, 5);
+ transform_input_single(new_data, current_input, 6);
+ transform_input_single(new_data, current_input, 7);
+ transform_input_single(new_data, current_input, 8);
+ transform_input_single(new_data, current_input, 9);
+ transform_input_single(new_data, current_input, 10);
+ transform_input_single(new_data, current_input, 11);
+ transform_input_single(new_data, current_input, 12);
+ transform_input_single(new_data, current_input, 13);
+ transform_input_single(new_data, current_input, 14);
+ transform_input_single(new_data, current_input, 15);
+
+}
+
+/*
+ * buffer to Calculate all segments' digests from one block.
+ *
+ * Layout of seg_digest:
+ * segment
+ * -------------------------
+ * seg0: | H0 | H1 | ... | H4
+ * seg1: | H0 | H1 | ... | H4
+ * seg2: | H0 | H1 | ... | H4
+ * ....
+ * seg15: | H0 | H1 | ... | H4
+ *
+ */
+static inline void sha1_update_all_segs(uint32_t * new_data,
+ uint32_t(*mh_sha1_seg_digests)[SHA1_DIGEST_WORDS])
+{
+ sha1_update_one_seg(&(new_data)[16 * 0], mh_sha1_seg_digests[0]);
+ sha1_update_one_seg(&(new_data)[16 * 1], mh_sha1_seg_digests[1]);
+ sha1_update_one_seg(&(new_data)[16 * 2], mh_sha1_seg_digests[2]);
+ sha1_update_one_seg(&(new_data)[16 * 3], mh_sha1_seg_digests[3]);
+ sha1_update_one_seg(&(new_data)[16 * 4], mh_sha1_seg_digests[4]);
+ sha1_update_one_seg(&(new_data)[16 * 5], mh_sha1_seg_digests[5]);
+ sha1_update_one_seg(&(new_data)[16 * 6], mh_sha1_seg_digests[6]);
+ sha1_update_one_seg(&(new_data)[16 * 7], mh_sha1_seg_digests[7]);
+ sha1_update_one_seg(&(new_data)[16 * 8], mh_sha1_seg_digests[8]);
+ sha1_update_one_seg(&(new_data)[16 * 9], mh_sha1_seg_digests[9]);
+ sha1_update_one_seg(&(new_data)[16 * 10], mh_sha1_seg_digests[10]);
+ sha1_update_one_seg(&(new_data)[16 * 11], mh_sha1_seg_digests[11]);
+ sha1_update_one_seg(&(new_data)[16 * 12], mh_sha1_seg_digests[12]);
+ sha1_update_one_seg(&(new_data)[16 * 13], mh_sha1_seg_digests[13]);
+ sha1_update_one_seg(&(new_data)[16 * 14], mh_sha1_seg_digests[14]);
+ sha1_update_one_seg(&(new_data)[16 * 15], mh_sha1_seg_digests[15]);
+}
+
+void mh_sha1_block_ref(const uint8_t * input_data, uint32_t(*digests)[HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks)
+{
+ uint32_t i, j;
+ uint32_t *temp_buffer = (uint32_t *) frame_buffer;
+ uint32_t(*trans_digests)[SHA1_DIGEST_WORDS];
+
+ trans_digests = (uint32_t(*)[SHA1_DIGEST_WORDS]) digests;
+
+ // Re-structure seg_digests from 5*16 to 16*5
+ for (j = 0; j < HASH_SEGS; j++) {
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ temp_buffer[j * SHA1_DIGEST_WORDS + i] = digests[i][j];
+ }
+ }
+ memcpy(trans_digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ // Calculate digests for all segments, leveraging sha1 API
+ for (i = 0; i < num_blocks; i++) {
+ transform_input(temp_buffer, (uint32_t *) input_data, i);
+ sha1_update_all_segs(temp_buffer, trans_digests);
+ }
+
+ // Re-structure seg_digests from 16*5 to 5*16
+ for (j = 0; j < HASH_SEGS; j++) {
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ temp_buffer[i * HASH_SEGS + j] = trans_digests[j][i];
+ }
+ }
+ memcpy(digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ return;
+}
+
+void mh_sha1_tail_ref(uint8_t * partial_buffer, uint32_t total_len,
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer,
+ uint32_t digests[SHA1_DIGEST_WORDS])
+{
+ uint64_t partial_buffer_len, len_in_bit;
+
+ partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE;
+
+ // Padding the first block
+ partial_buffer[partial_buffer_len] = 0x80;
+ partial_buffer_len++;
+ memset(partial_buffer + partial_buffer_len, 0,
+ MH_SHA1_BLOCK_SIZE - partial_buffer_len);
+
+ // Calculate the first block without total_length if padding needs 2 block
+ if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) {
+ mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+ //Padding the second block
+ memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ //Padding the block
+ len_in_bit = bswap64((uint64_t) total_len * 8);
+ *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit;
+ mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+
+ //Calculate multi-hash SHA1 digests (segment digests as input message)
+ sha1_for_mh_sha1_ref((uint8_t *) mh_sha1_segs_digests, digests,
+ 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ return;
+}
+
+void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest)
+{
+ uint64_t total_len;
+ uint64_t num_blocks;
+ uint32_t mh_sha1_segs_digests[SHA1_DIGEST_WORDS][HASH_SEGS];
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE];
+ uint8_t partial_block_buffer[MH_SHA1_BLOCK_SIZE * 2];
+ uint32_t mh_sha1_hash_dword[SHA1_DIGEST_WORDS];
+ uint32_t i;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ /* Initialize digests of all segments */
+ for (i = 0; i < HASH_SEGS; i++) {
+ mh_sha1_segs_digests[0][i] = MH_SHA1_H0;
+ mh_sha1_segs_digests[1][i] = MH_SHA1_H1;
+ mh_sha1_segs_digests[2][i] = MH_SHA1_H2;
+ mh_sha1_segs_digests[3][i] = MH_SHA1_H3;
+ mh_sha1_segs_digests[4][i] = MH_SHA1_H4;
+ }
+
+ total_len = len;
+
+ // Calculate blocks
+ num_blocks = len / MH_SHA1_BLOCK_SIZE;
+ if (num_blocks > 0) {
+ //do num_blocks process
+ mh_sha1_block_ref(input_data, mh_sha1_segs_digests, frame_buffer, num_blocks);
+ len -= num_blocks * MH_SHA1_BLOCK_SIZE;
+ input_data += num_blocks * MH_SHA1_BLOCK_SIZE;
+ }
+ // Store the partial block
+ if (len != 0) {
+ memcpy(partial_block_buffer, input_data, len);
+ }
+
+ /* Finalize */
+ mh_sha1_tail_ref(partial_block_buffer, total_len, mh_sha1_segs_digests,
+ frame_buffer, mh_sha1_hash_dword);
+
+ // Output the digests of mh_sha1
+ if (mh_sha1_digest != NULL) {
+ mh_sha1_digest[0] = mh_sha1_hash_dword[0];
+ mh_sha1_digest[1] = mh_sha1_hash_dword[1];
+ mh_sha1_digest[2] = mh_sha1_hash_dword[2];
+ mh_sha1_digest[3] = mh_sha1_hash_dword[3];
+ mh_sha1_digest[4] = mh_sha1_hash_dword[4];
+ }
+
+ return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c
new file mode 100644
index 000000000..0279cd709
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c
@@ -0,0 +1,217 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_CTX_ERROR_NONE){ \
+ printf("The mh_sha1 function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+#define MH_SHA1_REF mh_sha1_ref
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+
+ return mh_sha1_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int size, offset;
+ struct mh_sha1_ctx *update_ctx = NULL;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_test:\n");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ MH_SHA1_REF(buff, TEST_LEN, hash_ref);
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages
+ for (size = TEST_LEN; size >= 0; size--) {
+
+ // Fill with rand data
+ rand_buffer(buff, size);
+
+ MH_SHA1_REF(buff, size, hash_ref);
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ if ((size & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various buffer offsets and sizes
+ printf("offset tests");
+ for (size = TEST_LEN - 256; size > 256; size -= 11) {
+ for (offset = 0; offset < 256; offset++) {
+ MH_SHA1_REF(buff + offset, size, hash_ref);
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ }
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Run efence tests
+ printf("efence tests");
+ for (size = TEST_SIZE; size > 0; size--) {
+ offset = TEST_LEN - size;
+
+ MH_SHA1_REF(buff + offset, size, hash_ref);
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_test:");
+ printf(" %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c
new file mode 100644
index 000000000..8879b2879
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c
@@ -0,0 +1,110 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/*
+ * mh_sha1_update_base.c contains the prototype of mh_sha1_update_XXX.
+ * Default definitions are base type which generates mh_sha1_update_base.
+ * Other types are generated through different predefined macros by mh_sha1.c.
+ */
+#ifndef MH_SHA1_UPDATE_FUNCTION
+#include "mh_sha1_internal.h"
+#include <string.h>
+
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_base
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base
+#define MH_SHA1_UPDATE_SLVER
+#endif
+
+int MH_SHA1_UPDATE_FUNCTION(struct mh_sha1_ctx *ctx, const void *buffer, uint32_t len)
+{
+
+ uint8_t *partial_block_buffer;
+ uint64_t partial_block_len;
+ uint64_t num_blocks;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_CTX_ERROR_NULL;
+
+ if (len == 0)
+ return MH_SHA1_CTX_ERROR_NONE;
+
+ partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE;
+ partial_block_buffer = ctx->partial_block_buffer;
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+
+ ctx->total_length += len;
+ // No enough input data for mh_sha1 calculation
+ if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) {
+ memcpy(partial_block_buffer + partial_block_len, input_data, len);
+ return MH_SHA1_CTX_ERROR_NONE;
+ }
+ // mh_sha1 calculation for the previous partial block
+ if (partial_block_len != 0) {
+ memcpy(partial_block_buffer + partial_block_len, input_data,
+ MH_SHA1_BLOCK_SIZE - partial_block_len);
+ //do one_block process
+ MH_SHA1_BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests,
+ aligned_frame_buffer, 1);
+ input_data += MH_SHA1_BLOCK_SIZE - partial_block_len;
+ len -= MH_SHA1_BLOCK_SIZE - partial_block_len;
+ memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ // Calculate mh_sha1 for the current blocks
+ num_blocks = len / MH_SHA1_BLOCK_SIZE;
+ if (num_blocks > 0) {
+ //do num_blocks process
+ MH_SHA1_BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer,
+ num_blocks);
+ len -= num_blocks * MH_SHA1_BLOCK_SIZE;
+ input_data += num_blocks * MH_SHA1_BLOCK_SIZE;
+ }
+ // Store the partial block
+ if (len != 0) {
+ memcpy(partial_block_buffer, input_data, len);
+ }
+
+ return MH_SHA1_CTX_ERROR_NONE;
+
+}
+
+#ifdef MH_SHA1_UPDATE_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+ // Version info
+struct slver mh_sha1_update_base_slver_0000027a;
+struct slver mh_sha1_update_base_slver = { 0x027a, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c
new file mode 100644
index 000000000..2b1b79179
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c
@@ -0,0 +1,240 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_CTX_ERROR_NONE){ \
+ printf("The mh_sha1 function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+
+ return mh_sha1_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, i;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int update_count;
+ int size1, size2, offset, addr_offset;
+ struct mh_sha1_ctx *update_ctx = NULL;
+ uint8_t *mem_addr = NULL;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages by update twice.
+ printf("\n various size messages by update twice tests");
+ for (size1 = TEST_LEN; size1 >= 0; size1--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ // subsequent update
+ size2 = TEST_LEN - size1; // size2 is different with the former
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various update count
+ printf("\n various update count tests");
+ for (update_count = 1; update_count <= TEST_LEN; update_count++) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ // subsequent update
+ size1 = TEST_LEN / update_count;
+ size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ for (i = 1, offset = 0; i < update_count; i++) {
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1));
+ offset += size1;
+ }
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // test various start address of ctx.
+ printf("\n various start address of ctx test");
+ free(update_ctx);
+ mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10);
+ for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ // a unaligned offset
+ update_ctx = (struct mh_sha1_ctx *)(mem_addr + addr_offset);
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail addr_offset=%d\n", addr_offset);
+ return -1;
+ }
+
+ if ((addr_offset & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c
new file mode 100644
index 000000000..697155b50
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c
@@ -0,0 +1,212 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha1_internal.h"
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// Reference SHA1 Functions for mh_sha1
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+#define W(x) w[(x) & 15]
+
+#define step00_19(i,a,b,c,d,e) \
+ if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ else W(i) = bswap(ww[i]); \
+ e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \
+ b = rol32(b,30)
+
+#define step20_39(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \
+ b = rol32(b,30)
+
+#define step40_59(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \
+ b = rol32(b,30)
+
+#define step60_79(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \
+ b = rol32(b,30)
+
+void sha1_single_for_mh_sha1(const uint8_t * data, uint32_t digest[])
+{
+ uint32_t a, b, c, d, e;
+ uint32_t w[16] = { 0 };
+ uint32_t *ww = (uint32_t *) data;
+
+ a = digest[0];
+ b = digest[1];
+ c = digest[2];
+ d = digest[3];
+ e = digest[4];
+
+ step00_19(0, a, b, c, d, e);
+ step00_19(1, e, a, b, c, d);
+ step00_19(2, d, e, a, b, c);
+ step00_19(3, c, d, e, a, b);
+ step00_19(4, b, c, d, e, a);
+ step00_19(5, a, b, c, d, e);
+ step00_19(6, e, a, b, c, d);
+ step00_19(7, d, e, a, b, c);
+ step00_19(8, c, d, e, a, b);
+ step00_19(9, b, c, d, e, a);
+ step00_19(10, a, b, c, d, e);
+ step00_19(11, e, a, b, c, d);
+ step00_19(12, d, e, a, b, c);
+ step00_19(13, c, d, e, a, b);
+ step00_19(14, b, c, d, e, a);
+ step00_19(15, a, b, c, d, e);
+ step00_19(16, e, a, b, c, d);
+ step00_19(17, d, e, a, b, c);
+ step00_19(18, c, d, e, a, b);
+ step00_19(19, b, c, d, e, a);
+
+ step20_39(20, a, b, c, d, e);
+ step20_39(21, e, a, b, c, d);
+ step20_39(22, d, e, a, b, c);
+ step20_39(23, c, d, e, a, b);
+ step20_39(24, b, c, d, e, a);
+ step20_39(25, a, b, c, d, e);
+ step20_39(26, e, a, b, c, d);
+ step20_39(27, d, e, a, b, c);
+ step20_39(28, c, d, e, a, b);
+ step20_39(29, b, c, d, e, a);
+ step20_39(30, a, b, c, d, e);
+ step20_39(31, e, a, b, c, d);
+ step20_39(32, d, e, a, b, c);
+ step20_39(33, c, d, e, a, b);
+ step20_39(34, b, c, d, e, a);
+ step20_39(35, a, b, c, d, e);
+ step20_39(36, e, a, b, c, d);
+ step20_39(37, d, e, a, b, c);
+ step20_39(38, c, d, e, a, b);
+ step20_39(39, b, c, d, e, a);
+
+ step40_59(40, a, b, c, d, e);
+ step40_59(41, e, a, b, c, d);
+ step40_59(42, d, e, a, b, c);
+ step40_59(43, c, d, e, a, b);
+ step40_59(44, b, c, d, e, a);
+ step40_59(45, a, b, c, d, e);
+ step40_59(46, e, a, b, c, d);
+ step40_59(47, d, e, a, b, c);
+ step40_59(48, c, d, e, a, b);
+ step40_59(49, b, c, d, e, a);
+ step40_59(50, a, b, c, d, e);
+ step40_59(51, e, a, b, c, d);
+ step40_59(52, d, e, a, b, c);
+ step40_59(53, c, d, e, a, b);
+ step40_59(54, b, c, d, e, a);
+ step40_59(55, a, b, c, d, e);
+ step40_59(56, e, a, b, c, d);
+ step40_59(57, d, e, a, b, c);
+ step40_59(58, c, d, e, a, b);
+ step40_59(59, b, c, d, e, a);
+
+ step60_79(60, a, b, c, d, e);
+ step60_79(61, e, a, b, c, d);
+ step60_79(62, d, e, a, b, c);
+ step60_79(63, c, d, e, a, b);
+ step60_79(64, b, c, d, e, a);
+ step60_79(65, a, b, c, d, e);
+ step60_79(66, e, a, b, c, d);
+ step60_79(67, d, e, a, b, c);
+ step60_79(68, c, d, e, a, b);
+ step60_79(69, b, c, d, e, a);
+ step60_79(70, a, b, c, d, e);
+ step60_79(71, e, a, b, c, d);
+ step60_79(72, d, e, a, b, c);
+ step60_79(73, c, d, e, a, b);
+ step60_79(74, b, c, d, e, a);
+ step60_79(75, a, b, c, d, e);
+ step60_79(76, e, a, b, c, d);
+ step60_79(77, d, e, a, b, c);
+ step60_79(78, c, d, e, a, b);
+ step60_79(79, b, c, d, e, a);
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+}
+
+void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len)
+{
+ uint32_t i, j;
+ uint8_t buf[2 * SHA1_BLOCK_SIZE];
+ union {
+ uint64_t uint;
+ uint8_t uchar[8];
+ } convert;
+ uint8_t *p;
+
+ digest[0] = MH_SHA1_H0;
+ digest[1] = MH_SHA1_H1;
+ digest[2] = MH_SHA1_H2;
+ digest[3] = MH_SHA1_H3;
+ digest[4] = MH_SHA1_H4;
+
+ i = len;
+ while (i >= SHA1_BLOCK_SIZE) {
+ sha1_single_for_mh_sha1(input_data, digest);
+ input_data += SHA1_BLOCK_SIZE;
+ i -= SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(buf, input_data, i);
+ buf[i++] = 0x80;
+ for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++)
+ buf[j] = 0;
+
+ if (i > SHA1_BLOCK_SIZE - 8)
+ i = 2 * SHA1_BLOCK_SIZE;
+ else
+ i = SHA1_BLOCK_SIZE;
+
+ convert.uint = 8 * len;
+ p = buf + i - 8;
+ p[0] = convert.uchar[7];
+ p[1] = convert.uchar[6];
+ p[2] = convert.uchar[5];
+ p[3] = convert.uchar[4];
+ p[4] = convert.uchar[3];
+ p[5] = convert.uchar[2];
+ p[6] = convert.uchar[1];
+ p[7] = convert.uchar[0];
+
+ sha1_single_for_mh_sha1(buf, digest);
+ if (i == (2 * SHA1_BLOCK_SIZE))
+ sha1_single_for_mh_sha1(buf + SHA1_BLOCK_SIZE, digest);
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am
new file mode 100644
index 000000000..98cd59efc
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am
@@ -0,0 +1,71 @@
+########################################################################
+# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+#Requires unit mh_sha1
+AM_CFLAGS += -I mh_sha1
+
+lsrc_murmur = mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c
+
+lsrc_stitch = mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm
+
+lsrc_stitch += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm
+
+lsrc += $(lsrc_murmur) \
+ $(lsrc_stitch)
+
+other_src += include/reg_sizes.asm \
+ include/multibinary.asm \
+ include/test.h \
+ mh_sha1/mh_sha1_internal.h \
+ mh_sha1_murmur3_x64_128/murmur3_x64_128.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h
+
+extern_hdrs += include/mh_sha1_murmur3_x64_128.h
+
+unit_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test
+
+perf_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf
+
+
+mh_sha1_murmur3_x64_128_test: mh_sha1_ref.o murmur3_x64_128.o
+mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la
+
+mh_sha1_murmur3_x64_128_update_test: mh_sha1_ref.o murmur3_x64_128.o
+mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_update_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la
+
+mh_sha1_murmur3_x64_128_perf: mh_sha1_ref.o murmur3_x64_128.o
+mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_perf_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c
new file mode 100644
index 000000000..12cb3644d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c
@@ -0,0 +1,151 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+int mh_sha1_murmur3_x64_128_init(struct mh_sha1_murmur3_x64_128_ctx *ctx, uint64_t murmur_seed)
+{
+ uint64_t *murmur3_x64_128_hash;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint32_t i;
+
+ if (ctx == NULL)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NULL;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+ for (i = 0; i < HASH_SEGS; i++) {
+ mh_sha1_segs_digests[0][i] = MH_SHA1_H0;
+ mh_sha1_segs_digests[1][i] = MH_SHA1_H1;
+ mh_sha1_segs_digests[2][i] = MH_SHA1_H2;
+ mh_sha1_segs_digests[3][i] = MH_SHA1_H3;
+ mh_sha1_segs_digests[4][i] = MH_SHA1_H4;
+ }
+
+ murmur3_x64_128_hash = (uint64_t *) ctx->murmur3_x64_128_digest;
+ murmur3_x64_128_hash[0] = murmur_seed;
+ murmur3_x64_128_hash[1] = murmur_seed;
+
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+}
+
+void mh_sha1_murmur3_x64_128_block_base(const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t
+ murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks)
+{
+
+ mh_sha1_block_base(input_data, mh_sha1_digests, frame_buffer, num_blocks);
+
+ murmur3_x64_128_block(input_data,
+ num_blocks * MH_SHA1_BLOCK_SIZE / MUR_BLOCK_SIZE,
+ murmur3_x64_128_digests);
+
+ return;
+}
+
+/***************mh_sha1_murmur3_x64_128_update***********/
+// mh_sha1_murmur3_x64_128_update_sse.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_sse
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_sse
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+// mh_sha1_murmur3_x64_128_update_avx.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+// mh_sha1_murmur3_x64_128_update_avx2.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx2
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx2
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+/***************mh_sha1_murmur3_x64_128_finalize***********/
+// mh_sha1_murmur3_x64_128_finalize_sse.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_sse
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+// mh_sha1_murmur3_x64_128_finalize_avx.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+// mh_sha1_murmur3_x64_128_finalize_avx2.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx2
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// Version info
+struct slver mh_sha1_murmur3_x64_128_init_slver_00000251;
+struct slver mh_sha1_murmur3_x64_128_init_slver = { 0x0251, 0x00, 0x00 };
+
+// mh_sha1_murmur3_x64_128_update version info
+struct slver mh_sha1_murmur3_x64_128_update_sse_slver_00000254;
+struct slver mh_sha1_murmur3_x64_128_update_sse_slver = { 0x0254, 0x00, 0x00 };
+
+struct slver mh_sha1_murmur3_x64_128_update_avx_slver_02000256;
+struct slver mh_sha1_murmur3_x64_128_update_avx_slver = { 0x0256, 0x00, 0x02 };
+
+struct slver mh_sha1_murmur3_x64_128_update_avx2_slver_04000258;
+struct slver mh_sha1_murmur3_x64_128_update_avx2_slver = { 0x0258, 0x00, 0x04 };
+
+// mh_sha1_murmur3_x64_128_finalize version info
+struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver_00000255;
+struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver = { 0x0255, 0x00, 0x00 };
+
+struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver_02000257;
+struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver = { 0x0257, 0x00, 0x02 };
+
+struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver_04000259;
+struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver = { 0x0259, 0x00, 0x04 };
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c
new file mode 100644
index 000000000..e380a8795
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c
@@ -0,0 +1,67 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+#ifdef HAVE_AS_KNOWS_AVX512
+
+/***************mh_sha1_murmur3_x64_128_update***********/
+// mh_sha1_murmur3_x64_128_update_avx512.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx512
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx512
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+/***************mh_sha1_murmur3_x64_128_finalize***********/
+// mh_sha1_murmur3_x64_128_finalize_avx512.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx512
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// mh_sha1_murmur3_x64_128_update version info
+struct slver mh_sha1_murmur3_x64_128_update_avx512_slver_0600025c;
+struct slver mh_sha1_murmur3_x64_128_update_avx512_slver = { 0x025c, 0x00, 0x06 };
+
+// mh_sha1_murmur3_x64_128_finalize version info
+struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver_0600025d;
+struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver = { 0x025d, 0x00, 0x06 };
+
+#endif // HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm
new file mode 100644
index 000000000..ca35e6961
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm
@@ -0,0 +1,702 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX
+;;
+
+%include "reg_sizes.asm"
+default rel
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-(%%imm))
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; non-destructive
+; PROLD_nd reg, imm, tmp, src
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-(%%imm))
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 16)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte.
+;; So insert 1 murmur block into every 4 SHA1_STEP_16_79.
+%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J
+
+%macro SHA1_STEP_16_79_0 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c1_r
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c2_r
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_1 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ rol mur_data1, R1
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ rol mur_data2, R2
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c2_r
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c1_r
+ PROLD %%regB,30, %%regT
+ add mur_in_p, 16
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_2 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash1, mur_data1
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash1, R3
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash1, mur_hash2
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_3 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash2, mur_data2
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash2, R4
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash2, mur_hash1
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbx, 10*16 + 6*8
+ save_reg rbp, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+;void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+global mh_sha1_murmur3_x64_128_block_avx:function internal
+func(mh_sha1_murmur3_x64_128_block_avx)
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by avx
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ VMOVPS A, [mh_digests_p + I*64 + 16*0]
+ VMOVPS B, [mh_digests_p + I*64 + 16*1]
+ VMOVPS C, [mh_digests_p + I*64 + 16*2]
+ VMOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ vmovdqa [rsp + I*64 + 16*0], A
+ vmovdqa [rsp + I*64 + 16*1], B
+ vmovdqa [rsp + I*64 + 16*2], C
+ vmovdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*16]
+ VMOVPS T1,[mh_in_p + I*64+1*16]
+ VMOVPS T2,[mh_in_p + I*64+2*16]
+ VMOVPS T3,[mh_in_p + I*64+3*16]
+
+ vpshufb T0, F
+ vmovdqa [mh_data_p +(I)*16 +0*256],T0
+ vpshufb T1, F
+ vmovdqa [mh_data_p +(I)*16 +1*256],T1
+ vpshufb T2, F
+ vmovdqa [mh_data_p +(I)*16 +2*256],T2
+ vpshufb T3, F
+ vmovdqa [mh_data_p +(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A, AA
+ vpaddd B, BB
+ vpaddd C, CC
+ vpaddd D, DD
+ vpaddd E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ vmovdqa A, [rsp + I*64 + 16*0]
+ vmovdqa B, [rsp + I*64 + 16*1]
+ vmovdqa C, [rsp + I*64 + 16*2]
+ vmovdqa D, [rsp + I*64 + 16*3]
+
+ VMOVPS [mh_digests_p + I*64 + 16*0], A
+ VMOVPS [mh_digests_p + I*64 + 16*1], B
+ VMOVPS [mh_digests_p + I*64 + 16*2], C
+ VMOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm
new file mode 100644
index 000000000..c7e9a889b
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm
@@ -0,0 +1,649 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX2
+;;
+
+%include "reg_sizes.asm"
+default rel
+
+;; Magic functions defined in FIPS 180-1
+;;
+;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+
+
+;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-%%imm)
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-%%imm)
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 32)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 512Byte.
+;; So insert 1 murmur block into every 2 SHA1_STEP_16_79.
+%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J
+
+%macro SHA1_STEP_16_79_0 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+
+ vpsrld %%regF, W16, (32-1)
+ imul mur_data1, mur_c1_r
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ imul mur_data2, mur_c2_r
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ rol mur_data1, R1
+ vpaddd %%regE, %%regE,%%regF
+ rol mur_data2, R2
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ imul mur_data1, mur_c2_r
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ imul mur_data2, mur_c1_r
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+
+%macro SHA1_STEP_16_79_1 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ xor mur_hash1, mur_data1
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ rol mur_hash1, R3
+ vpxor W16, W16, W14
+ add mur_hash1, mur_hash2
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ xor mur_hash2, mur_data2
+ vpor %%regF, %%regF, W16
+ rol mur_hash2, R4
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ vpaddd %%regE, %%regE,%%regF
+ add mur_hash2, mur_hash1
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ add mur_in_p, 16
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbx, 10*16 + 6*8
+ save_reg rbp, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp8
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A ymm0
+%define B ymm1
+%define C ymm2
+%define D ymm3
+%define E ymm4
+
+%define F ymm5
+%define T0 ymm6
+%define T1 ymm7
+%define T2 ymm8
+%define T3 ymm9
+%define T4 ymm10
+%define T5 ymm11
+%define T6 ymm12
+%define T7 ymm13
+%define T8 ymm14
+%define T9 ymm15
+
+%define AA ymm5
+%define BB ymm6
+%define CC ymm7
+%define DD ymm8
+%define EE ymm9
+%define TMP ymm10
+%define FUN ymm11
+%define K ymm12
+%define W14 ymm13
+%define W15 ymm14
+%define W16 ymm15
+
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+;void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+global mh_sha1_murmur3_x64_128_block_avx2:function internal
+func(mh_sha1_murmur3_x64_128_block_avx2)
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 32 Bytes needed by avx2
+ and rsp, ~0x1F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 2
+ VMOVPS A, [mh_digests_p + I*32*5 + 32*0]
+ VMOVPS B, [mh_digests_p + I*32*5 + 32*1]
+ VMOVPS C, [mh_digests_p + I*32*5 + 32*2]
+ VMOVPS D, [mh_digests_p + I*32*5 + 32*3]
+ VMOVPS E, [mh_digests_p + I*32*5 + 32*4]
+
+ vmovdqa [rsp + I*32*5 + 32*0], A
+ vmovdqa [rsp + I*32*5 + 32*1], B
+ vmovdqa [rsp + I*32*5 + 32*2], C
+ vmovdqa [rsp + I*32*5 + 32*3], D
+ vmovdqa [rsp + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2
+%assign I 0
+%rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*32]
+ VMOVPS T1,[mh_in_p + I*64+1*32]
+
+ vpshufb T0, T0, F
+ vmovdqa [mh_data_p +I*32+0*512],T0
+ vpshufb T1, T1, F
+ vmovdqa [mh_data_p +I*32+1*512],T1
+%assign I (I+1)
+%endrep
+
+ mov mh_segs, 0 ;start from the first 8 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+%assign I (I+1)
+%endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32]
+ %rep 4
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*2]
+ PREFETCH_X [mh_in_p + pref+128*3]
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A,A, AA
+ vpaddd B,B, BB
+ vpaddd C,C, CC
+ vpaddd D,D, DD
+ vpaddd E,E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 512
+
+ add mh_data_p, 512
+ add mh_segs, 32
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 2
+ vmovdqa A, [rsp + I*32*5 + 32*0]
+ vmovdqa B, [rsp + I*32*5 + 32*1]
+ vmovdqa C, [rsp + I*32*5 + 32*2]
+ vmovdqa D, [rsp + I*32*5 + 32*3]
+ vmovdqa E, [rsp + I*32*5 + 32*4]
+
+ VMOVPS [mh_digests_p + I*32*5 + 32*0], A
+ VMOVPS [mh_digests_p + I*32*5 + 32*1], B
+ VMOVPS [mh_digests_p + I*32*5 + 32*2], C
+ VMOVPS [mh_digests_p + I*32*5 + 32*3], D
+ VMOVPS [mh_digests_p + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=32
+
+align 32
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+ dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm
new file mode 100644
index 000000000..8cc84959e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm
@@ -0,0 +1,500 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX-512
+;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+default rel
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovdqu64
+;SIMD variables definition
+%define A zmm0
+%define B zmm1
+%define C zmm2
+%define D zmm3
+%define E zmm4
+%define HH0 zmm5
+%define HH1 zmm6
+%define HH2 zmm7
+%define HH3 zmm8
+%define HH4 zmm9
+%define KT zmm10
+%define XTMP0 zmm11
+%define XTMP1 zmm12
+%define SHUF_MASK zmm13
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;using extra 16 ZMM registers to place the inverse input data
+%define W0 zmm16
+%define W1 zmm17
+%define W2 zmm18
+%define W3 zmm19
+%define W4 zmm20
+%define W5 zmm21
+%define W6 zmm22
+%define W7 zmm23
+%define W8 zmm24
+%define W9 zmm25
+%define W10 zmm26
+%define W11 zmm27
+%define W12 zmm28
+%define W13 zmm29
+%define W14 zmm30
+%define W15 zmm31
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;macros definition
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro PROCESS_LOOP 2
+%define %%WT %1
+%define %%F_IMMED %2
+
+ ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt
+ ; E=D, D=C, C=ROTL_30(B), B=A, A=T
+
+ ; Ft
+ ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D)
+ ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D
+ ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D)
+
+ vmovdqa32 XTMP1, B ; Copy B
+ vpaddd E, E, %%WT ; E = E + Wt
+ vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D)
+ vpaddd E, E, KT ; E = E + Wt + Kt
+ vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A)
+ vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt
+ vprold B, B, 30 ; B = ROTL_30(B)
+ vpaddd E, E, XTMP0 ; E = T
+
+ ROTATE_ARGS
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls PROCESS_LOOP 80 and
+;; MSG_SCHED_ROUND_16_79 64 times and processes 1024 Bytes.
+;; So insert 1 murmur block per section_loop.
+%macro PROCESS_LOOP_MUR 2
+%define %%WT %1
+%define %%F_IMMED %2
+
+ ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt
+ ; E=D, D=C, C=ROTL_30(B), B=A, A=T
+
+ ; Ft
+ ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D)
+ ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D
+ ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D)
+
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+ vmovdqa32 XTMP1, B ; Copy B
+ imul mur_data1, mur_c1_r
+ imul mur_data2, mur_c2_r
+ vpaddd E, E, %%WT ; E = E + Wt
+ rol mur_data1, R1
+ rol mur_data2, R2
+ vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D)
+ imul mur_data1, mur_c2_r
+ imul mur_data2, mur_c1_r
+ vpaddd E, E, KT ; E = E + Wt + Kt
+ xor mur_hash1, mur_data1
+ add mur_in_p, 16
+ vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A)
+ rol mur_hash1, R3
+ vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt
+ add mur_hash1, mur_hash2
+ vprold B, B, 30 ; B = ROTL_30(B)
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ vpaddd E, E, XTMP0 ; E = T
+ xor mur_hash2, mur_data2
+
+ ROTATE_ARGS
+%endmacro
+
+%macro MSG_SCHED_ROUND_16_79_MUR 4
+%define %%WT %1
+%define %%WTp2 %2
+%define %%WTp8 %3
+%define %%WTp13 %4
+ ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16)
+ ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt)
+ vpternlogd %%WT, %%WTp2, %%WTp8, 0x96
+ rol mur_hash2, R4
+ vpxord %%WT, %%WT, %%WTp13
+ add mur_hash2, mur_hash1
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ vprold %%WT, %%WT, 1
+%endmacro
+
+%define APPEND(a,b) a %+ b
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ ; remove unwind info macros
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp + 0*16], xmm6
+ movdqa [rsp + 1*16], xmm7
+ movdqa [rsp + 2*16], xmm8
+ movdqa [rsp + 3*16], xmm9
+ movdqa [rsp + 4*16], xmm10
+ movdqa [rsp + 5*16], xmm11
+ movdqa [rsp + 6*16], xmm12
+ movdqa [rsp + 7*16], xmm13
+ movdqa [rsp + 8*16], xmm14
+ movdqa [rsp + 9*16], xmm15
+ mov [rsp + 10*16 + 0*8], r12
+ mov [rsp + 10*16 + 1*8], r13
+ mov [rsp + 10*16 + 2*8], r14
+ mov [rsp + 10*16 + 3*8], r15
+ mov [rsp + 10*16 + 4*8], rdi
+ mov [rsp + 10*16 + 5*8], rsi
+ mov [rsp + 10*16 + 6*8], rbx
+ mov [rsp + 10*16 + 7*8], rbp
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+
+%define pref tmp8
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+[bits 64]
+section .text
+align 32
+
+;void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+global mh_sha1_murmur3_x64_128_block_avx512
+func(mh_sha1_murmur3_x64_128_block_avx512)
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; align rsp to 64 Bytes needed by avx512
+ and rsp, ~0x3f
+
+ ; copy segs_digests into registers.
+ VMOVPS HH0, [mh_digests_p + 64*0]
+ VMOVPS HH1, [mh_digests_p + 64*1]
+ VMOVPS HH2, [mh_digests_p + 64*2]
+ VMOVPS HH3, [mh_digests_p + 64*3]
+ VMOVPS HH4, [mh_digests_p + 64*4]
+ ;a mask used to transform to big-endian data
+ vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK]
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ ;using extra 16 ZMM registers instead of stack
+%assign I 0
+%rep 8
+%assign J (I+1)
+ VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64]
+ VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64]
+
+ vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK
+ vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK
+%assign I (I+2)
+%endrep
+
+ vmovdqa64 A, HH0
+ vmovdqa64 B, HH1
+ vmovdqa64 C, HH2
+ vmovdqa64 D, HH3
+ vmovdqa64 E, HH4
+
+ vmovdqa32 KT, [K00_19]
+%assign I 0xCA
+%assign J 0
+%assign K 2
+%assign L 8
+%assign M 13
+%assign N 0
+%rep 80
+ %if N < 64 ; stitching 64 times
+ PROCESS_LOOP_MUR APPEND(W,J), I
+ MSG_SCHED_ROUND_16_79_MUR APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M)
+ %else ; 64 <= N < 80, without stitching
+ PROCESS_LOOP APPEND(W,J), I
+ %endif
+ %if N = 19
+ vmovdqa32 KT, [K20_39]
+ %assign I 0x96
+ %elif N = 39
+ vmovdqa32 KT, [K40_59]
+ %assign I 0xE8
+ %elif N = 59
+ vmovdqa32 KT, [K60_79]
+ %assign I 0x96
+ %endif
+ %if N % 20 = 19
+ PREFETCH_X [mh_in_p + 1024+128*(N / 20)]
+ PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)]
+ %endif
+%assign J ((J+1)% 16)
+%assign K ((K+1)% 16)
+%assign L ((L+1)% 16)
+%assign M ((M+1)% 16)
+%assign N (N+1)
+%endrep
+
+ ; Add old digest
+ vpaddd HH0,A, HH0
+ vpaddd HH1,B, HH1
+ vpaddd HH2,C, HH2
+ vpaddd HH3,D, HH3
+ vpaddd HH4,E, HH4
+
+ add mh_in_p, 1024
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ ; copy segs_digests to mh_digests_p
+ VMOVPS [mh_digests_p + 64*0], HH0
+ VMOVPS [mh_digests_p + 64*1], HH1
+ VMOVPS [mh_digests_p + 64*2], HH2
+ VMOVPS [mh_digests_p + 64*3], HH3
+ VMOVPS [mh_digests_p + 64*4], HH4
+
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+
+section .data align=64
+
+align 64
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+
+K20_39: dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+
+K40_59: dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+
+K60_79: dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_sha1_murmur3_x64_128_block_avx512
+no_sha1_murmur3_x64_128_block_avx512:
+%endif
+%endif ; HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm
new file mode 100644
index 000000000..174a8518f
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm
@@ -0,0 +1,698 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using SSE
+;;
+
+%include "reg_sizes.asm"
+default rel
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regC
+ pxor %%regF,%%regD
+ pand %%regF,%%regB
+ pxor %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regD
+ pxor %%regF,%%regC
+ pxor %%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regB
+ movdqa %%regT,%%regB
+ por %%regF,%%regC
+ pand %%regT,%%regC
+ pand %%regF,%%regD
+ por %%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ movdqa %%tmp, %%reg
+ pslld %%reg, %%imm
+ psrld %%tmp, (32-%%imm)
+ por %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ paddd %%regE,[%%data + (%%memW * 16)]
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte.
+;; So insert 1 murmur block into every 4 SHA1_STEP_16_79.
+%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J
+
+%macro SHA1_STEP_16_79_0 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c1_r
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c2_r
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_1 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ rol mur_data1, R1
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ pslld W16, 1
+ rol mur_data2, R2
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c2_r
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c1_r
+ PROLD %%regB,30, %%regT
+ add mur_in_p, 16
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_2 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash1, mur_data1
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash1, R3
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash1, mur_hash2
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_3 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash2, mur_data2
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash2, R4
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash2, mur_hash1
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ paddd %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbx, 10*16 + 6*8
+ save_reg rbp, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define MOVPS movups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+;void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+global mh_sha1_murmur3_x64_128_block_sse:function internal
+func(mh_sha1_murmur3_x64_128_block_sse)
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by sse
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ MOVPS A, [mh_digests_p + I*64 + 16*0]
+ MOVPS B, [mh_digests_p + I*64 + 16*1]
+ MOVPS C, [mh_digests_p + I*64 + 16*2]
+ MOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ movdqa [rsp + I*64 + 16*0], A
+ movdqa [rsp + I*64 + 16*1], B
+ movdqa [rsp + I*64 + 16*2], C
+ movdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ movdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ MOVPS T0,[mh_in_p+I*64+0*16]
+ MOVPS T1,[mh_in_p+I*64+1*16]
+ MOVPS T2,[mh_in_p+I*64+2*16]
+ MOVPS T3,[mh_in_p+I*64+3*16]
+
+ pshufb T0, F
+ movdqa [mh_data_p+(I)*16 +0*256],T0
+ pshufb T1, F
+ movdqa [mh_data_p+(I)*16 +1*256],T1
+ pshufb T2, F
+ movdqa [mh_data_p+(I)*16 +2*256],T2
+ pshufb T3, F
+ movdqa [mh_data_p+(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ .segs_loop:
+ ;; Initialize digests
+ movdqa A, [rsp + 0*64 + mh_segs]
+ movdqa B, [rsp + 1*64 + mh_segs]
+ movdqa C, [rsp + 2*64 + mh_segs]
+ movdqa D, [rsp + 3*64 + mh_segs]
+ movdqa E, [rsp + 4*64 + mh_segs]
+
+ movdqa AA, A
+ movdqa BB, B
+ movdqa CC, C
+ movdqa DD, D
+ movdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ movdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 20...39
+ movdqa K, [K20_39]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ movdqa K, [K40_59]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 60...79
+ movdqa K, [K60_79]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ paddd A, AA
+ paddd B, BB
+ paddd C, CC
+ paddd D, DD
+ paddd E, EE
+
+ ; write out digests
+ movdqa [rsp + 0*64 + mh_segs], A
+ movdqa [rsp + 1*64 + mh_segs], B
+ movdqa [rsp + 2*64 + mh_segs], C
+ movdqa [rsp + 3*64 + mh_segs], D
+ movdqa [rsp + 4*64 + mh_segs], E
+
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ movdqa A, [rsp + I*64 + 16*0]
+ movdqa B, [rsp + I*64 + 16*1]
+ movdqa C, [rsp + I*64 + 16*2]
+ movdqa D, [rsp + I*64 + 16*3]
+
+ MOVPS [mh_digests_p + I*64 + 16*0], A
+ MOVPS [mh_digests_p + I*64 + 16*1], B
+ MOVPS [mh_digests_p + I*64 + 16*2], C
+ MOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c
new file mode 100644
index 000000000..6eb998257
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c
@@ -0,0 +1,102 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef FINALIZE_FUNCTION
+#include <stdlib.h> // For NULL
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_base
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base
+#define FINALIZE_FUNCTION_SLVER
+#endif
+
+#define MURMUR_BLOCK_FUNCTION murmur3_x64_128_block
+#define MURMUR_TAIL_FUNCTION murmur3_x64_128_tail
+
+int FINALIZE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, void *mh_sha1_digest,
+ void *murmur3_x64_128_digest)
+{
+ uint8_t *partial_block_buffer, *murmur_tail_data;
+ uint64_t partial_block_len, total_len;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NULL;
+
+ total_len = ctx->total_length;
+ partial_block_len = total_len % MH_SHA1_BLOCK_SIZE;
+ partial_block_buffer = ctx->partial_block_buffer;
+
+ // Calculate murmur3 firstly
+ // because mh_sha1 will change the partial_block_buffer
+ // ( partial_block_buffer = n murmur3 blocks and 1 murmur3 tail)
+ murmur_tail_data =
+ partial_block_buffer + partial_block_len - partial_block_len % MUR_BLOCK_SIZE;
+ MURMUR_BLOCK_FUNCTION(partial_block_buffer, partial_block_len / MUR_BLOCK_SIZE,
+ ctx->murmur3_x64_128_digest);
+ MURMUR_TAIL_FUNCTION(murmur_tail_data, total_len, ctx->murmur3_x64_128_digest);
+
+ /* mh_sha1 final */
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+
+ MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests,
+ aligned_frame_buffer, ctx->mh_sha1_digest);
+
+ /* Output the digests of murmur3 and mh_sha1 */
+ if (mh_sha1_digest != NULL) {
+ ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0];
+ ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1];
+ ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2];
+ ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3];
+ ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4];
+ }
+
+ if (murmur3_x64_128_digest != NULL) {
+ ((uint32_t *) murmur3_x64_128_digest)[0] = ctx->murmur3_x64_128_digest[0];
+ ((uint32_t *) murmur3_x64_128_digest)[1] = ctx->murmur3_x64_128_digest[1];
+ ((uint32_t *) murmur3_x64_128_digest)[2] = ctx->murmur3_x64_128_digest[2];
+ ((uint32_t *) murmur3_x64_128_digest)[3] = ctx->murmur3_x64_128_digest[3];
+ }
+
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+}
+
+#ifdef FINALIZE_FUNCTION_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+ // Version info
+struct slver mh_sha1_murmur3_x64_128_finalize_base_slver_0000025b;
+struct slver mh_sha1_murmur3_x64_128_finalize_base_slver = { 0x025b, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h
new file mode 100644
index 000000000..bb16c58d6
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h
@@ -0,0 +1,202 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_
+#define _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_
+
+/**
+ * @file mh_sha1_murmur3_x64_128_internal.h
+ * @brief mh_sha1_murmur3_x64_128 internal function prototypes and macros
+ *
+ * Interface for mh_sha1_murmur3_x64_128 internal functions
+ *
+ */
+#include <stdint.h>
+#include "mh_sha1_internal.h"
+#include "mh_sha1_murmur3_x64_128.h"
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+ /*******************************************************************
+ * mh_sha1_murmur3_x64_128 API internal function prototypes
+ * Multiple versions of Update and Finalize functions are supplied which use
+ * multiple versions of block and tail process subfunctions.
+ ******************************************************************/
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ // Each function needs an individual C or ASM file because they impact performance much.
+ //They will be called by mh_sha1_murmur3_x64_128_update_XXX.
+ void mh_sha1_murmur3_x64_128_block (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_base (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires SSE
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires AVX
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires AVX2
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires AVX512
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+ /*******************************************************************
+ * murmur hash API
+ ******************************************************************/
+
+ /**
+ * @brief Calculate murmur digest of blocks which size is 16*N.
+ * @param input_data Pointer to input data to be processed
+ * @param num_blocks The number of blocks which size is 16.
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @returns none
+ *
+ */
+ void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]);
+
+ /**
+ * @brief Do the tail process which is less than 16Byte.
+ * @param tail_buffer Pointer to input data to be processed
+ * @param total_len The total length of the input_data
+ * @param digests Murmur3 digest
+ * @returns none
+ *
+ */
+ void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm
new file mode 100644
index 000000000..96502c32c
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm
@@ -0,0 +1,82 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifidn __OUTPUT_FORMAT__, elf64
+%define WRT_OPT wrt ..plt
+%else
+%define WRT_OPT
+%endif
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ [bits 32]
+%else
+ default rel
+ [bits 64]
+
+ extern mh_sha1_murmur3_x64_128_update_sse
+ extern mh_sha1_murmur3_x64_128_update_avx
+ extern mh_sha1_murmur3_x64_128_update_avx2
+ extern mh_sha1_murmur3_x64_128_finalize_sse
+ extern mh_sha1_murmur3_x64_128_finalize_avx
+ extern mh_sha1_murmur3_x64_128_finalize_avx2
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ extern mh_sha1_murmur3_x64_128_update_avx512
+ extern mh_sha1_murmur3_x64_128_finalize_avx512
+ %endif
+
+%endif
+
+extern mh_sha1_murmur3_x64_128_update_base
+extern mh_sha1_murmur3_x64_128_finalize_base
+
+mbin_interface mh_sha1_murmur3_x64_128_update
+mbin_interface mh_sha1_murmur3_x64_128_finalize
+
+%ifidn __OUTPUT_FORMAT__, elf64
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ mbin_dispatch_init6 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2, mh_sha1_murmur3_x64_128_update_avx512
+ mbin_dispatch_init6 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2, mh_sha1_murmur3_x64_128_finalize_avx512
+ %else
+ mbin_dispatch_init5 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2
+ mbin_dispatch_init5 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2
+ %endif
+
+%else
+ mbin_dispatch_init2 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base
+ mbin_dispatch_init2 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base
+%endif
+
+;;; func core, ver, snum
+slversion mh_sha1_murmur3_x64_128_update, 00, 02, 0252
+slversion mh_sha1_murmur3_x64_128_finalize, 00, 02, 0253
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c
new file mode 100644
index 000000000..8a17fdfd7
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c
@@ -0,0 +1,206 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1_murmur3_x64_128.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Loop many times over same
+# define TEST_LEN 16*1024
+# define TEST_LOOPS 20000
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define TEST_LEN 32*1024*1024
+# define TEST_LOOPS 100
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+#define TEST_MEM TEST_LEN
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \
+ printf("The stitch function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest);
+
+void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest)
+{
+ mh_sha1_ref(buffer, len, mh_sha1_digest);
+ murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest);
+
+ return;
+}
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS],
+ uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+ int murmur3_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) {
+ if (murmur3_test[i] != murmur3_base[i])
+ murmur3_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+ if (murmur3_fail) {
+ printf("murmur3 fail test\n");
+ printf("base: ");
+ dump((char *)murmur3_base, 16);
+ printf("ref: ");
+ dump((char *)murmur3_test, 16);
+ }
+
+ return mh_sha1_fail + murmur3_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS],
+ murmur3_base[MURMUR3_x64_128_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL;
+ struct perf start, stop;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n");
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ // mh_sha1_murmur3 base version
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base);
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS / 10; i++) {
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+ }
+ perf_stop(&stop);
+ printf("mh_sha1_murmur3_x64_128_base" TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ //Update feature test
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS; i++) {
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+ }
+ perf_stop(&stop);
+ printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ // Check results
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", TEST_LEN);
+ return -1;
+ }
+
+ if (fail)
+ printf("Test failed function test%d\n", fail);
+ else
+ printf("Pass func check\n");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c
new file mode 100644
index 000000000..a2ea8ce92
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c
@@ -0,0 +1,248 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1_murmur3_x64_128.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \
+ printf("The stitch function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest);
+
+void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest)
+{
+ mh_sha1_ref(buffer, len, mh_sha1_digest);
+ murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest);
+
+ return;
+}
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS],
+ uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+ int murmur3_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) {
+ if (murmur3_test[i] != murmur3_base[i])
+ murmur3_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+ if (murmur3_fail) {
+ printf("murmur3 fail test\n");
+ printf("base: ");
+ dump((char *)murmur3_base, 16);
+ printf("ref: ");
+ dump((char *)murmur3_test, 16);
+ }
+
+ return mh_sha1_fail + murmur3_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS],
+ murmur3_base[MURMUR3_x64_128_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int size, offset;
+ struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL;
+
+ printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages
+ for (size = TEST_LEN; size >= 0; size--) {
+
+ // Fill with rand data
+ rand_buffer(buff, size);
+
+ mh_sha1_murmur3_x64_128_base(buff, size, TEST_SEED, hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ if ((size & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various buffer offsets and sizes
+ printf("offset tests");
+ for (size = TEST_LEN - 256; size > 256; size -= 11) {
+ for (offset = 0; offset < 256; offset++) {
+ mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED,
+ hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail =
+ compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d offset=%d\n", size, offset);
+ return -1;
+ }
+
+ }
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Run efence tests
+ printf("efence tests");
+ for (size = TEST_SIZE; size > 0; size--) {
+ offset = TEST_LEN - size;
+ mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED,
+ hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d offset=%d\n", size, offset);
+ return -1;
+ }
+
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c
new file mode 100644
index 000000000..e8d21ac26
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c
@@ -0,0 +1,107 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef UPDATE_FUNCTION
+#include "mh_sha1_murmur3_x64_128_internal.h"
+#include <string.h>
+
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_base
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_base
+#define UPDATE_FUNCTION_SLVER
+#endif
+
+int UPDATE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, const void *buffer, uint32_t len)
+{
+
+ uint8_t *partial_block_buffer;
+ uint64_t partial_block_len;
+ uint64_t num_blocks;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+ uint32_t *murmur3_x64_128_digest;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NULL;
+
+ if (len == 0)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+
+ partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE;
+ partial_block_buffer = ctx->partial_block_buffer;
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+ murmur3_x64_128_digest = ctx->murmur3_x64_128_digest;
+
+ ctx->total_length += len;
+ // No enough input data for mh_sha1 calculation
+ if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) {
+ memcpy(partial_block_buffer + partial_block_len, input_data, len);
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+ }
+ // mh_sha1 calculation for the previous partial block
+ if (partial_block_len != 0) {
+ memcpy(partial_block_buffer + partial_block_len, input_data,
+ MH_SHA1_BLOCK_SIZE - partial_block_len);
+ //do one_block process
+ BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests,
+ aligned_frame_buffer, murmur3_x64_128_digest, 1);
+ input_data += MH_SHA1_BLOCK_SIZE - partial_block_len;
+ len -= MH_SHA1_BLOCK_SIZE - partial_block_len;
+ memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ // Calculate mh_sha1 for the current blocks
+ num_blocks = len / MH_SHA1_BLOCK_SIZE;
+ if (num_blocks > 0) {
+ //do num_blocks process
+ BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer,
+ murmur3_x64_128_digest, num_blocks);
+ len -= num_blocks * MH_SHA1_BLOCK_SIZE;
+ input_data += num_blocks * MH_SHA1_BLOCK_SIZE;
+ }
+ // Store the partial block
+ if (len != 0) {
+ memcpy(partial_block_buffer, input_data, len);
+ }
+
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+
+}
+
+#ifdef UPDATE_FUNCTION_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+ // Version info
+struct slver mh_sha1_murmur3_x64_128_update_base_slver_0000025a;
+struct slver mh_sha1_murmur3_x64_128_update_base_slver = { 0x025a, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c
new file mode 100644
index 000000000..853e330a4
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c
@@ -0,0 +1,272 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1_murmur3_x64_128.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \
+ printf("The stitch function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest);
+
+void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest)
+{
+ mh_sha1_ref(buffer, len, mh_sha1_digest);
+ murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest);
+
+ return;
+}
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS],
+ uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+ int murmur3_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) {
+ if (murmur3_test[i] != murmur3_base[i])
+ murmur3_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+ if (murmur3_fail) {
+ printf("murmur3 fail test\n");
+ printf("base: ");
+ dump((char *)murmur3_base, 16);
+ printf("ref: ");
+ dump((char *)murmur3_test, 16);
+ }
+
+ return mh_sha1_fail + murmur3_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, i;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS],
+ murmur3_base[MURMUR3_x64_128_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int update_count;
+ int size1, size2, offset, addr_offset;
+ struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL;
+ uint8_t *mem_addr = NULL;
+
+ printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages by update twice.
+ printf("\n various size messages by update twice tests");
+ for (size1 = TEST_LEN; size1 >= 0; size1--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+
+ // subsequent update
+ size2 = TEST_LEN - size1; // size2 is different with the former
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various update count
+ printf("\n various update count tests");
+ for (update_count = 1; update_count <= TEST_LEN; update_count++) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+
+ // subsequent update
+ size1 = TEST_LEN / update_count;
+ size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ for (i = 1, offset = 0; i < update_count; i++) {
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1));
+ offset += size1;
+ }
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // test various start address of ctx.
+ printf("\n various start address of ctx test");
+ free(update_ctx);
+ mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10);
+ for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+
+ // a unaligned offset
+ update_ctx = (struct mh_sha1_murmur3_x64_128_ctx *)(mem_addr + addr_offset);
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail addr_offset=%d\n", addr_offset);
+ return -1;
+ }
+
+ if ((addr_offset & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c
new file mode 100644
index 000000000..75c3d90b5
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c
@@ -0,0 +1,78 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdlib.h> // for NULL
+#include "murmur3_x64_128_internal.c"
+/*******************************************************************
+ * Single API which can calculate murmur3
+ ******************************************************************/
+/**
+ * @brief Get the digest of murmur3_x64_128 through a single API.
+ *
+ * Using murmur3_x64_128_block and murmur3_x64_128_tail.
+ * Used to test the murmur3_x64_128 digest.
+ *
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param murmur_seed Seed as an initial digest of murmur3
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns none
+ *
+ */
+void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest)
+{
+ uint64_t *murmur3_x64_128_hash;
+ uint32_t murmur3_x64_128_hash_dword[4];
+ uint8_t *tail_buffer;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ // Initiate murmur3
+ murmur3_x64_128_hash = (uint64_t *) murmur3_x64_128_hash_dword;
+ murmur3_x64_128_hash[0] = murmur_seed;
+ murmur3_x64_128_hash[1] = murmur_seed;
+
+ // process bodies
+ murmur3_x64_128_block((uint8_t *) input_data, len / MUR_BLOCK_SIZE,
+ murmur3_x64_128_hash_dword);
+
+ // process finalize
+ tail_buffer = (uint8_t *) input_data + len - len % MUR_BLOCK_SIZE;
+ murmur3_x64_128_tail(tail_buffer, len, murmur3_x64_128_hash_dword);
+
+ // output the digests
+ if (murmur3_x64_128_digest != NULL) {
+ murmur3_x64_128_digest[0] = murmur3_x64_128_hash_dword[0];
+ murmur3_x64_128_digest[1] = murmur3_x64_128_hash_dword[1];
+ murmur3_x64_128_digest[2] = murmur3_x64_128_hash_dword[2];
+ murmur3_x64_128_digest[3] = murmur3_x64_128_hash_dword[3];
+ }
+
+ return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c
new file mode 100644
index 000000000..6aab002ef
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c
@@ -0,0 +1,138 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha1_murmur3_x64_128_internal.h"
+#include <stdlib.h> // for NULL
+
+/* murmur3_x64_128 constants */
+// Shift bits of circle rotate
+#define MUR_SH1 31
+#define MUR_SH2 33
+#define MUR_SH3 27
+#define MUR_SH4 31
+#define MUR_SH5 33
+
+#define MUR_MUL 5
+#define MUR_ADD1 0x52dce729
+#define MUR_ADD2 0x38495ab5
+
+#define MUR_CON1 0x87c37b91114253d5LLU
+#define MUR_CON2 0x4cf5ad432745937fLLU
+
+#define MUR_FMUL1 0xff51afd7ed558ccdLLU
+#define MUR_FMUL2 0xc4ceb9fe1a85ec53LLU
+
+/* murmur3_x64_128 inline functions */
+static inline uint64_t blockmix64(uint64_t data, uint64_t conA, uint64_t conB, uint64_t shift)
+{
+ data *= conA;
+ data = (data << shift) | (data >> (64 - shift));
+ data *= conB;
+ return data;
+}
+
+static inline uint64_t hashmix64(uint64_t hashA, uint64_t hashB, uint64_t data, uint64_t add,
+ uint64_t shift)
+{
+ hashA ^= data;
+ hashA = (hashA << shift) | (hashA >> (64 - shift));
+ hashA += hashB;
+ hashA = hashA * MUR_MUL + add;
+ return hashA;
+}
+
+void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ uint64_t data1, data2;
+ uint64_t *input_qword = (uint64_t *) input_data;
+ uint64_t *hash = (uint64_t *) digests;
+ uint32_t i = 0;
+
+ while (i < num_blocks) {
+ data1 = input_qword[i * 2];
+ data2 = input_qword[i * 2 + 1];
+ data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1);
+ data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2);
+ hash[0] = hashmix64(hash[0], hash[1], data1, MUR_ADD1, MUR_SH3);
+ hash[1] = hashmix64(hash[1], hash[0], data2, MUR_ADD2, MUR_SH4);
+ i++;
+ }
+
+ return;
+}
+
+void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ uint64_t data1, data2;
+ uint64_t *hash = (uint64_t *) digests;
+ uint64_t tail_len = total_len % 16;
+ uint8_t *tail = (uint8_t *) tail_buffer;
+
+ union {
+ uint64_t hash[2];
+ uint8_t hashB[16];
+ } hashU;
+
+ // tail
+ hashU.hash[0] = hashU.hash[1] = 0;
+
+ while (tail_len-- > 0)
+ hashU.hashB[tail_len] = tail[tail_len];
+
+ data1 = hashU.hash[0];
+ data2 = hashU.hash[1];
+
+ data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1);
+ data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2);
+
+ hash[0] ^= total_len ^ data1;
+ hash[1] ^= total_len ^ data2;
+
+ hash[0] += hash[1];
+ hash[1] += hash[0];
+
+ hash[0] ^= hash[0] >> MUR_SH5;
+ hash[0] *= MUR_FMUL1;
+ hash[0] ^= hash[0] >> MUR_SH5;
+ hash[0] *= MUR_FMUL2;
+ hash[0] ^= hash[0] >> MUR_SH5;
+
+ hash[1] ^= hash[1] >> MUR_SH5;
+ hash[1] *= MUR_FMUL1;
+ hash[1] ^= hash[1] >> MUR_SH5;
+ hash[1] *= MUR_FMUL2;
+ hash[1] ^= hash[1] >> MUR_SH5;
+
+ hash[0] += hash[1];
+ hash[1] += hash[0];
+
+ return;
+}