summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/mh_sha1
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am83
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c55
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c53
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S124
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S384
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c53
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S35
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S269
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c141
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c70
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c40
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm506
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm508
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm406
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c387
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm498
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c122
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h308
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm77
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c180
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c430
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c217
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c110
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c240
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c204
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am89
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c53
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h91
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c54
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S224
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S482
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c54
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S34
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S271
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c154
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c67
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c43
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm706
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm653
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm504
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm702
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c102
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h202
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm76
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c206
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c248
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c107
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c272
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c85
-rw-r--r--src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c138
50 files changed, 11117 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am
new file mode 100644
index 000000000..696e9c57d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am
@@ -0,0 +1,83 @@
+########################################################################
+# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+lsrc_mh_sha1_base = \
+ mh_sha1/mh_sha1_block_base.c \
+ mh_sha1/mh_sha1_finalize_base.c \
+ mh_sha1/mh_sha1_update_base.c \
+ mh_sha1/sha1_for_mh_sha1.c \
+ mh_sha1/mh_sha1.c
+
+lsrc_x86_64 += \
+ $(lsrc_mh_sha1_base) \
+ mh_sha1/mh_sha1_multibinary.asm \
+ mh_sha1/mh_sha1_block_sse.asm \
+ mh_sha1/mh_sha1_block_avx.asm \
+ mh_sha1/mh_sha1_block_avx2.asm \
+ mh_sha1/mh_sha1_block_avx512.asm \
+ mh_sha1/mh_sha1_avx512.c
+
+lsrc_x86_32 += $(lsrc_x86_64)
+
+lsrc_aarch64 += \
+ $(lsrc_mh_sha1_base) \
+ mh_sha1/aarch64/mh_sha1_multibinary.S \
+ mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c \
+ mh_sha1/aarch64/mh_sha1_block_asimd.S \
+ mh_sha1/aarch64/mh_sha1_asimd.c \
+ mh_sha1/aarch64/mh_sha1_block_ce.S \
+ mh_sha1/aarch64/mh_sha1_ce.c
+
+lsrc_base_aliases += \
+ $(lsrc_mh_sha1_base) \
+ mh_sha1/mh_sha1_base_aliases.c
+
+other_src += mh_sha1/mh_sha1_ref.c \
+ include/reg_sizes.asm \
+ include/multibinary.asm \
+ include/test.h \
+ mh_sha1/mh_sha1_internal.h
+
+src_include += -I $(srcdir)/mh_sha1
+
+extern_hdrs += include/mh_sha1.h
+
+check_tests += mh_sha1/mh_sha1_test
+unit_tests += mh_sha1/mh_sha1_update_test
+
+perf_tests += mh_sha1/mh_sha1_perf
+
+
+mh_sha1_test: mh_sha1_ref.o
+mh_sha1_mh_sha1_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la
+
+mh_sha1_update_test: mh_sha1_ref.o
+mh_sha1_mh_sha1_update_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la
+
+mh_sha1_mh_sha1_perf_LDADD = libisal_crypto.la
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c
new file mode 100644
index 000000000..2ad8871fa
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c
@@ -0,0 +1,55 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha1_update)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(mh_sha1_update_ce);
+
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mh_sha1_update_asimd);
+
+ return PROVIDER_BASIC(mh_sha1_update);
+
+}
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha1_finalize)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(mh_sha1_finalize_ce);
+
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mh_sha1_finalize_asimd);
+
+ return PROVIDER_BASIC(mh_sha1_finalize);
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c
new file mode 100644
index 000000000..c913a64df
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c
@@ -0,0 +1,53 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+void mh_sha1_block_asimd(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+/***************mh_sha1_update***********/
+// mh_sha1_update_asimd.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_asimd
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+// mh_sha1_finalize_asimd.c and mh_sha1_tail_asimd.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_asimd
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_asimd
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S
new file mode 100644
index 000000000..22f716f27
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S
@@ -0,0 +1,124 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+#include "sha1_asimd_common.S"
+
+.macro load_x4_word idx:req
+ ld1 {WORD\idx\().16b},[segs_ptr]
+ add segs_ptr,segs_ptr,#64
+.endm
+
+/*
+ * void mh_sha1_block_asimd (const uint8_t * input_data,
+ * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ * uint32_t num_blocks);
+ * arg 0 pointer to input data
+ * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+ * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+ * arg 3 number of 1KB blocks
+ */
+
+ input_data .req x0
+ sha1_digest .req x1
+ data_buf .req x2
+ num_blocks .req w3
+ src .req x4
+ dst .req x5
+ offs .req x6
+ mh_segs .req x7
+ tmp .req x8
+ segs_ptr .req x9
+ block_ctr .req w10
+
+ .global mh_sha1_block_asimd
+ .type mh_sha1_block_asimd, %function
+mh_sha1_block_asimd:
+ cmp num_blocks, #0
+ beq .return
+ sha1_asimd_save_stack
+
+ mov mh_segs, #0
+.seg_loops:
+ add segs_ptr,input_data,mh_segs
+ mov offs, #64
+ add src, sha1_digest, mh_segs
+ ld1 {VA.4S}, [src], offs
+ ld1 {VB.4S}, [src], offs
+ ld1 {VC.4S}, [src], offs
+ ld1 {VD.4S}, [src], offs
+ ld1 {VE.4S}, [src], offs
+ mov block_ctr,num_blocks
+
+.block_loop:
+ sha1_single
+ subs block_ctr, block_ctr, 1
+ bne .block_loop
+
+ mov offs, #64
+ add dst, sha1_digest, mh_segs
+ st1 {VA.4S}, [dst], offs
+ st1 {VB.4S}, [dst], offs
+ st1 {VC.4S}, [dst], offs
+ st1 {VD.4S}, [dst], offs
+ st1 {VE.4S}, [dst], offs
+
+ add mh_segs, mh_segs, #16
+ cmp mh_segs, #64
+ bne .seg_loops
+
+ sha1_asimd_restore_stack
+.return:
+ ret
+
+ .size mh_sha1_block_asimd, .-mh_sha1_block_asimd
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S
new file mode 100644
index 000000000..12d3c5df2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S
@@ -0,0 +1,384 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 2
+ .p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+.endm
+
+
+
+/*
+Variable list
+*/
+
+ declare_var_vector_reg lane0_msg_0, 0
+ declare_var_vector_reg lane1_msg_0, 1
+ declare_var_vector_reg lane2_msg_0, 2
+ declare_var_vector_reg lane3_msg_0, 3
+ declare_var_vector_reg lane0_msg_1, 4
+ declare_var_vector_reg lane1_msg_1, 5
+ declare_var_vector_reg lane2_msg_1, 6
+ declare_var_vector_reg lane3_msg_1, 7
+ declare_var_vector_reg lane0_msg_2, 8
+ declare_var_vector_reg lane1_msg_2, 9
+ declare_var_vector_reg lane2_msg_2,10
+ declare_var_vector_reg lane3_msg_2,11
+ declare_var_vector_reg lane0_msg_3,12
+ declare_var_vector_reg lane1_msg_3,13
+ declare_var_vector_reg lane2_msg_3,14
+ declare_var_vector_reg lane3_msg_3,15
+
+ declare_var_vector_reg lane0_abcd ,16
+ declare_var_vector_reg lane1_abcd ,17
+ declare_var_vector_reg lane2_abcd ,18
+ declare_var_vector_reg lane3_abcd ,19
+ declare_var_vector_reg lane0_tmp0 ,20
+ declare_var_vector_reg lane1_tmp0 ,21
+ declare_var_vector_reg lane2_tmp0 ,22
+ declare_var_vector_reg lane3_tmp0 ,23
+ declare_var_vector_reg lane0_tmp1 ,24
+ declare_var_vector_reg lane1_tmp1 ,25
+ declare_var_vector_reg lane2_tmp1 ,26
+ declare_var_vector_reg lane3_tmp1 ,27
+
+
+ declare_var_vector_reg e0 ,28
+ declare_var_vector_reg e1 ,29
+ declare_var_vector_reg key ,30
+ declare_var_vector_reg tmp ,31
+
+ key_adr .req x4
+ msg_adr .req x5
+ block_cnt .req x6
+ offs .req x7
+ digest_adr .req x16
+ tmp0_adr .req x17
+ tmp1_adr .req x18
+
+/**
+maros for round 4-67
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req
+ sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s
+ sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s
+ sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s
+ sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s
+ mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0]
+ mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0]
+ mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1]
+ mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2]
+ mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3]
+ \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s
+ \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s
+ \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s
+ \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s
+ ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr]
+ add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s
+ add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s
+ add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s
+ add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s
+ st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr]
+ sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s
+ sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s
+ sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s
+ sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s
+ sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s
+ sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s
+ sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s
+ sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s
+
+.endm
+
+
+/*
+ void mh_sha1_block_ce(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks)
+*/
+/*
+Arguements list
+*/
+ input_data .req x0
+ digests .req x1
+ frame_buffer .req x2
+ num_blocks .req w3
+
+ .global mh_sha1_block_ce
+ .type mh_sha1_block_ce, %function
+mh_sha1_block_ce:
+ //save temp vector registers
+ stp d8, d9, [sp, -128]!
+
+ stp d10, d11, [sp, 16]
+ stp d12, d13, [sp, 32]
+ stp d14, d15, [sp, 48]
+ mov tmp0_adr,frame_buffer
+ add tmp1_adr,tmp0_adr,128
+
+
+start_loop:
+ mov block_cnt,0
+ mov msg_adr,input_data
+lane_loop:
+ mov offs,64
+ adr key_adr,KEY_0
+ //load msg 0
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs
+
+ add digest_adr,digests,block_cnt
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs
+ ldr e0_q,[digest_adr]
+
+ //load key_0
+ ldr key_q,[key_adr]
+
+ rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b
+ rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b
+ rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b
+ rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b
+ rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b
+ rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b
+ rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b
+ rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b
+ rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b
+ rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b
+ rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b
+ rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b
+ rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b
+ rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b
+ rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b
+ rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b
+
+ add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s
+ add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s
+ add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s
+ add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s
+ st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr]
+
+ add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s
+ add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s
+ add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s
+ add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s
+
+ /* rounds 0-3 */
+ sha1h lane0_tmp1_s,lane0_abcd_s
+ sha1h lane1_tmp1_s,lane1_abcd_s
+ sha1h lane2_tmp1_s,lane2_abcd_s
+ sha1h lane3_tmp1_s,lane3_abcd_s
+ mov e1_v.S[0],lane0_tmp1_v.S[0]
+ mov e1_v.S[1],lane1_tmp1_v.S[0]
+ mov e1_v.S[2],lane2_tmp1_v.S[0]
+ mov e1_v.S[3],lane3_tmp1_v.S[0]
+ mov lane0_tmp1_v.S[0],e0_v.S[0]
+ mov lane1_tmp1_v.S[0],e0_v.S[1]
+ mov lane2_tmp1_v.S[0],e0_v.S[2]
+ mov lane3_tmp1_v.S[0],e0_v.S[3]
+ sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s
+ sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s
+ sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s
+ sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s
+ ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr]
+ add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s
+ sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s
+ add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s
+ sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s
+ add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s
+ sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s
+ add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s
+ sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s
+ st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr]
+
+ sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */
+ sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+
+
+ adr key_adr,KEY_1
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */
+ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */
+ sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+
+ adr key_adr,KEY_2
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+ sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+
+ adr key_adr,KEY_3
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+
+ //msg2 and msg1 are free
+ mov lane0_msg_2_v.S[0],e1_v.S[0]
+ mov lane1_msg_2_v.S[0],e1_v.S[1]
+ mov lane2_msg_2_v.S[0],e1_v.S[2]
+ mov lane3_msg_2_v.S[0],e1_v.S[3]
+
+ /* rounds 68-71 */
+ sha1h lane0_msg_1_s,lane0_abcd_s
+ sha1h lane1_msg_1_s,lane1_abcd_s
+ sha1h lane2_msg_1_s,lane2_abcd_s
+ sha1h lane3_msg_1_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s
+ sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s
+ sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s
+ sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s
+ add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s
+ add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s
+ add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s
+ add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s
+ sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s
+ sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s
+ sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s
+ sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s
+
+ /* rounds 72-75 */
+ sha1h lane0_msg_2_s,lane0_abcd_s
+ sha1h lane1_msg_2_s,lane1_abcd_s
+ sha1h lane2_msg_2_s,lane2_abcd_s
+ sha1h lane3_msg_2_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s
+ sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s
+ sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s
+ sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s
+
+ /* rounds 76-79 */
+ sha1h lane0_msg_1_s,lane0_abcd_s
+ sha1h lane1_msg_1_s,lane1_abcd_s
+ sha1h lane2_msg_1_s,lane2_abcd_s
+ sha1h lane3_msg_1_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s
+ sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s
+ sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s
+ sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s
+ add digest_adr,digests,block_cnt
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr]
+
+ add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S
+ add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S
+ add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S
+ add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S
+
+ add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S
+ add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S
+ add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S
+ add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S
+
+ add digest_adr,digests,block_cnt
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs
+ st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr]
+
+ add block_cnt,block_cnt,16
+ cmp block_cnt,64
+ add msg_adr,input_data,block_cnt
+ add digest_adr,digests,block_cnt
+ bcc lane_loop
+
+ subs num_blocks,num_blocks,1
+ add input_data,input_data,1024
+ bhi start_loop
+exit_func:
+ //restore temp register
+ ldp d10, d11, [sp, 16]
+ ldp d12, d13, [sp, 32]
+ ldp d14, d15, [sp, 48]
+ ldp d8, d9, [sp], 128
+ ret
+
+ .size mh_sha1_block_ce, .-mh_sha1_block_ce
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c
new file mode 100644
index 000000000..c35daeab0
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c
@@ -0,0 +1,53 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+void mh_sha1_block_ce(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+/***************mh_sha1_update***********/
+// mh_sha1_update_ce.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_ce
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+// mh_sha1_finalize_ce.c and mh_sha1_tail_ce.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_ce
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_ce
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S
new file mode 100644
index 000000000..9a6d0caea
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S
@@ -0,0 +1,35 @@
+/**********************************************************************
+ Copyright(c) 2020 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#include "aarch64_multibinary.h"
+
+
+mbin_interface mh_sha1_update
+mbin_interface mh_sha1_finalize
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S
new file mode 100644
index 000000000..c8b8dd982
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S
@@ -0,0 +1,269 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+ eor VF.16b, VC.16b, VD.16b
+ and VF.16b, VB.16b, VF.16b
+ eor VF.16b, VD.16b, VF.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+ eor VF.16b, VB.16b, VC.16b
+ eor VF.16b, VF.16b, VD.16b
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+ and vT0.16b, VB.16b, VC.16b
+ and vT1.16b, VB.16b, VD.16b
+ and vT2.16b, VC.16b, VD.16b
+ orr VF.16b, vT0.16b, vT1.16b
+ orr VF.16b, VF.16b, vT2.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+ FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+ .if \windex < 16
+ load_x4_word \windex
+ .endif
+.endm
+
+// FUNC_F0 is merged into STEP_00_15 for efficiency
+.macro SHA1_STEP_00_15_F0 windex:req
+ rev32 WORD\windex\().16b,WORD\windex\().16b
+ next_word=\windex+1
+ load_next_word %next_word
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, VA.4s, 32 - 5
+ add VE.4s, VE.4s, VK.4s
+ sli VT.4s, VA.4s, 5
+ eor VF.16b, VC.16b, VD.16b
+ add VE.4s, VE.4s, WORD\windex\().4s
+ and VF.16b, VB.16b, VF.16b
+ add VE.4s, VE.4s, VT.4s
+ eor VF.16b, VD.16b, VF.16b
+ ushr VT.4s, VB.4s, 32 - 30
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+ eor vT0.16b,\reg_3\().16b,\reg_8\().16b
+ eor VT.16b,\reg_14\().16b,\reg_16\().16b
+ eor vT0.16b,vT0.16b,VT.16b
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, vT0.4s, 32 - 1
+ add VE.4s, VE.4s, VK.4s
+ ushr vT1.4s, VA.4s, 32 - 5
+ sli VT.4s, vT0.4s, 1
+ add VE.4s, VE.4s, VT.4s
+ sli vT1.4s, VA.4s, 5
+ mov \reg_16\().16b,VT.16b
+ add VE.4s, VE.4s, vT1.4s
+ ushr VT.4s, VB.4s, 32 - 30
+ \func_f
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+ VA .req v0
+ VB .req v1
+ VC .req v2
+ VD .req v3
+ VE .req v4
+ VT .req v5
+ VF .req v6
+ VK .req v7
+ WORD0 .req v8
+ WORD1 .req v9
+ WORD2 .req v10
+ WORD3 .req v11
+ WORD4 .req v12
+ WORD5 .req v13
+ WORD6 .req v14
+ WORD7 .req v15
+ WORD8 .req v16
+ WORD9 .req v17
+ WORD10 .req v18
+ WORD11 .req v19
+ WORD12 .req v20
+ WORD13 .req v21
+ WORD14 .req v22
+ WORD15 .req v23
+ vT0 .req v24
+ vT1 .req v25
+ vT2 .req v26
+ vAA .req v27
+ vBB .req v28
+ vCC .req v29
+ vDD .req v30
+ vEE .req v31
+ TT .req v0
+ sha1key_adr .req x15
+
+.macro SWAP_STATES
+ // shifted VB is held in VT after each step
+ .unreq TT
+ TT .req VE
+ .unreq VE
+ VE .req VD
+ .unreq VD
+ VD .req VC
+ .unreq VC
+ VC .req VT
+ .unreq VT
+ VT .req VB
+ .unreq VB
+ VB .req VA
+ .unreq VA
+ VA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+ SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+ .if \windex <= 15
+ SHA1_STEP_00_15_F0 windex
+ .else
+ idx14=((\windex - 14) & 15)
+ idx8=((\windex - 8) & 15)
+ idx3=((\windex - 3) & 15)
+ idx16=(\windex & 15)
+ .if \windex <= 19
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 20 && \windex <= 39
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 40 && \windex <= 59
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 60 && \windex <= 79
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .endif
+
+ SWAP_STATES
+
+ .if \windex == 79
+ // after 80 steps, the registers ABCDET has shifted from
+ // its orignal order of 012345 to 341520
+ // have to swap back for both compile- and run-time correctness
+ mov v0.16b,v3.16b
+ .unreq VA
+ VA .req v0
+
+ mov vT0.16b,v2.16b
+ mov v2.16b,v1.16b
+ mov v1.16b,v4.16b
+ .unreq VB
+ VB .req v1
+ .unreq VC
+ VC .req v2
+
+ mov v3.16b,v5.16b
+ .unreq VD
+ VD .req v3
+
+ mov v4.16b,vT0.16b
+ .unreq VE
+ VE .req v4
+
+ .unreq VT
+ VT .req v5
+ .endif
+.endm
+
+.macro exec_steps idx:req,more:vararg
+ exec_step \idx
+ .ifnb \more
+ exec_steps \more
+ .endif
+.endm
+
+.macro sha1_single
+ load_x4_word 0
+
+ mov vAA.16B, VA.16B
+ mov vBB.16B, VB.16B
+ mov vCC.16B, VC.16B
+ mov vDD.16B, VD.16B
+ mov vEE.16B, VE.16B
+
+ adr sha1key_adr, KEY_0
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+ // 20 ~ 39
+ adr sha1key_adr, KEY_1
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+ // 40 ~ 59
+ adr sha1key_adr, KEY_2
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+ // 60 ~ 79
+ adr sha1key_adr, KEY_3
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+ add VA.4s, vAA.4s, VA.4s
+ add VB.4s, vBB.4s, VB.4s
+ add VC.4s, vCC.4s, VC.4s
+ add VD.4s, vDD.4s, VD.4s
+ add VE.4s, vEE.4s, VE.4s
+.endm
+
+.macro sha1_asimd_save_stack
+ stp d8,d9,[sp, -64]!
+ stp d10,d11,[sp, 16]
+ stp d12,d13,[sp, 32]
+ stp d14,d15,[sp, 48]
+.endm
+
+.macro sha1_asimd_restore_stack
+ ldp d10,d11,[sp, 16]
+ ldp d12,d13,[sp, 32]
+ ldp d14,d15,[sp, 48]
+ ldp d8,d9,[sp],64
+.endm
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c
new file mode 100644
index 000000000..e5d8ad86d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c
@@ -0,0 +1,141 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+int mh_sha1_init(struct mh_sha1_ctx *ctx)
+{
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint32_t i;
+
+ if (ctx == NULL)
+ return MH_SHA1_CTX_ERROR_NULL;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+ for (i = 0; i < HASH_SEGS; i++) {
+ mh_sha1_segs_digests[0][i] = MH_SHA1_H0;
+ mh_sha1_segs_digests[1][i] = MH_SHA1_H1;
+ mh_sha1_segs_digests[2][i] = MH_SHA1_H2;
+ mh_sha1_segs_digests[3][i] = MH_SHA1_H3;
+ mh_sha1_segs_digests[4][i] = MH_SHA1_H4;
+ }
+
+ return MH_SHA1_CTX_ERROR_NONE;
+}
+
+#if (!defined(NOARCH)) && (defined(__i386__) || defined(__x86_64__) \
+ || defined( _M_X64) || defined(_M_IX86))
+/***************mh_sha1_update***********/
+// mh_sha1_update_sse.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_sse
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_update_avx.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_update_avx2.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx2
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+
+// mh_sha1_finalize_sse.c and mh_sha1_tail_sse.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_sse
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_finalize_avx.c and mh_sha1_tail_avx.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+// mh_sha1_finalize_avx2.c and mh_sha1_tail_avx2.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx2
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+// Version info
+struct slver mh_sha1_init_slver_00000271;
+struct slver mh_sha1_init_slver = { 0x0271, 0x00, 0x00 };
+
+// mh_sha1_update version info
+struct slver mh_sha1_update_sse_slver_00000274;
+struct slver mh_sha1_update_sse_slver = { 0x0274, 0x00, 0x00 };
+
+struct slver mh_sha1_update_avx_slver_02000276;
+struct slver mh_sha1_update_avx_slver = { 0x0276, 0x00, 0x02 };
+
+struct slver mh_sha1_update_avx2_slver_04000278;
+struct slver mh_sha1_update_avx2_slver = { 0x0278, 0x00, 0x04 };
+
+// mh_sha1_finalize version info
+struct slver mh_sha1_finalize_sse_slver_00000275;
+struct slver mh_sha1_finalize_sse_slver = { 0x0275, 0x00, 0x00 };
+
+struct slver mh_sha1_finalize_avx_slver_02000277;
+struct slver mh_sha1_finalize_avx_slver = { 0x0277, 0x00, 0x02 };
+
+struct slver mh_sha1_finalize_avx2_slver_04000279;
+struct slver mh_sha1_finalize_avx2_slver = { 0x0279, 0x00, 0x04 };
+
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c
new file mode 100644
index 000000000..1305d048f
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c
@@ -0,0 +1,70 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+#ifdef HAVE_AS_KNOWS_AVX512
+
+/***************mh_sha1_update***********/
+// mh_sha1_update_avx512.c
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx512
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512
+#include "mh_sha1_update_base.c"
+#undef MH_SHA1_UPDATE_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************mh_sha1_finalize AND mh_sha1_tail***********/
+// mh_sha1_tail is used to calculate the last incomplete src data block
+// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail
+// mh_sha1_finalize_avx512.c and mh_sha1_tail_avx512.c
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx512
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512
+#include "mh_sha1_finalize_base.c"
+#undef MH_SHA1_FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+#undef MH_SHA1_BLOCK_FUNCTION
+
+/***************version info***********/
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// mh_sha1_update version info
+struct slver mh_sha1_update_avx512_slver_0600027c;
+struct slver mh_sha1_update_avx512_slver = { 0x027c, 0x00, 0x06 };
+
+// mh_sha1_finalize version info
+struct slver mh_sha1_finalize_avx512_slver_0600027d;
+struct slver mh_sha1_finalize_avx512_slver = { 0x027d, 0x00, 0x06 };
+
+#endif // HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c
new file mode 100644
index 000000000..18cd8161b
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c
@@ -0,0 +1,40 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include "mh_sha1_internal.h"
+#include <string.h>
+int mh_sha1_update(struct mh_sha1_ctx *ctx, const void *buffer, uint32_t len)
+{
+ return mh_sha1_update_base(ctx, buffer, len);
+
+}
+
+int mh_sha1_finalize(struct mh_sha1_ctx *ctx, void *mh_sha1_digest)
+{
+ return mh_sha1_finalize_base(ctx, mh_sha1_digest);
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm
new file mode 100644
index 000000000..f4b5e76a0
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm
@@ -0,0 +1,506 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-(%%imm))
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; non-destructive
+; PROLD_nd reg, imm, tmp, src
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-(%%imm))
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 16)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+
+;void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+mk_global mh_sha1_block_avx, function, internal
+func(mh_sha1_block_avx)
+ endbranch
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by avx
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ VMOVPS A, [mh_digests_p + I*64 + 16*0]
+ VMOVPS B, [mh_digests_p + I*64 + 16*1]
+ VMOVPS C, [mh_digests_p + I*64 + 16*2]
+ VMOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ vmovdqa [rsp + I*64 + 16*0], A
+ vmovdqa [rsp + I*64 + 16*1], B
+ vmovdqa [rsp + I*64 + 16*2], C
+ vmovdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*16]
+ VMOVPS T1,[mh_in_p + I*64+1*16]
+ VMOVPS T2,[mh_in_p + I*64+2*16]
+ VMOVPS T3,[mh_in_p + I*64+3*16]
+
+ vpshufb T0, F
+ vmovdqa [mh_data_p +(I)*16 +0*256],T0
+ vpshufb T1, F
+ vmovdqa [mh_data_p +(I)*16 +1*256],T1
+ vpshufb T2, F
+ vmovdqa [mh_data_p +(I)*16 +2*256],T2
+ vpshufb T3, F
+ vmovdqa [mh_data_p +(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A, AA
+ vpaddd B, BB
+ vpaddd C, CC
+ vpaddd D, DD
+ vpaddd E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 256
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ vmovdqa A, [rsp + I*64 + 16*0]
+ vmovdqa B, [rsp + I*64 + 16*1]
+ vmovdqa C, [rsp + I*64 + 16*2]
+ vmovdqa D, [rsp + I*64 + 16*3]
+
+ VMOVPS [mh_digests_p + I*64 + 16*0], A
+ VMOVPS [mh_digests_p + I*64 + 16*1], B
+ VMOVPS [mh_digests_p + I*64 + 16*2], C
+ VMOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm
new file mode 100644
index 000000000..fed35d83e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm
@@ -0,0 +1,508 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX-2
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;; Magic functions defined in FIPS 180-1
+;;
+;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | ((~ B) & D) )
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpand %%regF, %%regB,%%regC
+ vpandn %%regT, %%regB,%%regD
+ vpor %%regF, %%regT,%%regF
+%endmacro
+
+;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+
+
+;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-%%imm)
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-%%imm)
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 32)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A ymm0
+%define B ymm1
+%define C ymm2
+%define D ymm3
+%define E ymm4
+
+%define F ymm5
+%define T0 ymm6
+%define T1 ymm7
+%define T2 ymm8
+%define T3 ymm9
+%define T4 ymm10
+%define T5 ymm11
+%define T6 ymm12
+%define T7 ymm13
+%define T8 ymm14
+%define T9 ymm15
+
+%define AA ymm5
+%define BB ymm6
+%define CC ymm7
+%define DD ymm8
+%define EE ymm9
+%define TMP ymm10
+%define FUN ymm11
+%define K ymm12
+%define W14 ymm13
+%define W15 ymm14
+%define W16 ymm15
+
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+
+;void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+mk_global mh_sha1_block_avx2, function, internal
+func(mh_sha1_block_avx2)
+ endbranch
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ test loops, loops
+ jz .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 32 Bytes needed by avx2
+ and rsp, ~0x1F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 2
+ VMOVPS A, [mh_digests_p + I*32*5 + 32*0]
+ VMOVPS B, [mh_digests_p + I*32*5 + 32*1]
+ VMOVPS C, [mh_digests_p + I*32*5 + 32*2]
+ VMOVPS D, [mh_digests_p + I*32*5 + 32*3]
+ VMOVPS E, [mh_digests_p + I*32*5 + 32*4]
+
+ vmovdqa [rsp + I*32*5 + 32*0], A
+ vmovdqa [rsp + I*32*5 + 32*1], B
+ vmovdqa [rsp + I*32*5 + 32*2], C
+ vmovdqa [rsp + I*32*5 + 32*3], D
+ vmovdqa [rsp + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vbroadcasti128 F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2
+%assign I 0
+%rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*32]
+ VMOVPS T1,[mh_in_p + I*64+1*32]
+
+ vpshufb T0, T0, F
+ vmovdqa [mh_data_p +I*32+0*512],T0
+ vpshufb T1, T1, F
+ vmovdqa [mh_data_p +I*32+1*512],T1
+%assign I (I+1)
+%endrep
+
+ xor mh_segs, mh_segs ;start from the first 8 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vpbroadcastq K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+%assign I (I+1)
+%endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32]
+ %rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 20...39
+ vpbroadcastq K, [K20_39]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+;; do rounds 40...59
+ vpbroadcastq K, [K40_59]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*2]
+ PREFETCH_X [mh_in_p + pref+128*3]
+;; do rounds 60...79
+ vpbroadcastq K, [K60_79]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A,A, AA
+ vpaddd B,B, BB
+ vpaddd C,C, CC
+ vpaddd D,D, DD
+ vpaddd E,E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 512
+
+ add mh_data_p, 512
+ add mh_segs, 32
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 2
+ vmovdqa A, [rsp + I*32*5 + 32*0]
+ vmovdqa B, [rsp + I*32*5 + 32*1]
+ vmovdqa C, [rsp + I*32*5 + 32*2]
+ vmovdqa D, [rsp + I*32*5 + 32*3]
+ vmovdqa E, [rsp + I*32*5 + 32*4]
+
+ VMOVPS [mh_digests_p + I*32*5 + 32*0], A
+ VMOVPS [mh_digests_p + I*32*5 + 32*1], B
+ VMOVPS [mh_digests_p + I*32*5 + 32*2], C
+ VMOVPS [mh_digests_p + I*32*5 + 32*3], D
+ VMOVPS [mh_digests_p + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .rodata align=32
+
+align 32
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+K00_19: dq 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm
new file mode 100644
index 000000000..a72c21661
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm
@@ -0,0 +1,406 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX-512
+;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovdqu64
+;SIMD variables definition
+%define A zmm0
+%define B zmm1
+%define C zmm2
+%define D zmm3
+%define E zmm4
+%define HH0 zmm5
+%define HH1 zmm6
+%define HH2 zmm7
+%define HH3 zmm8
+%define HH4 zmm9
+%define KT zmm10
+%define XTMP0 zmm11
+%define XTMP1 zmm12
+%define SHUF_MASK zmm13
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;using extra 16 ZMM registers to place the inverse input data
+%define W0 zmm16
+%define W1 zmm17
+%define W2 zmm18
+%define W3 zmm19
+%define W4 zmm20
+%define W5 zmm21
+%define W6 zmm22
+%define W7 zmm23
+%define W8 zmm24
+%define W9 zmm25
+%define W10 zmm26
+%define W11 zmm27
+%define W12 zmm28
+%define W13 zmm29
+%define W14 zmm30
+%define W15 zmm31
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;macros definition
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro PROCESS_LOOP 2
+%define %%WT %1
+%define %%F_IMMED %2
+
+ ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt
+ ; E=D, D=C, C=ROTL_30(B), B=A, A=T
+
+ ; Ft
+ ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D)
+ ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D
+ ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D)
+
+ vmovdqa32 XTMP1, B ; Copy B
+ vpaddd E, E, %%WT ; E = E + Wt
+ vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D)
+ vpaddd E, E, KT ; E = E + Wt + Kt
+ vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A)
+ vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt
+ vprold B, B, 30 ; B = ROTL_30(B)
+ vpaddd E, E, XTMP0 ; E = T
+
+ ROTATE_ARGS
+%endmacro
+
+%macro MSG_SCHED_ROUND_16_79 4
+%define %%WT %1
+%define %%WTp2 %2
+%define %%WTp8 %3
+%define %%WTp13 %4
+ ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16)
+ ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt)
+ vpternlogd %%WT, %%WTp2, %%WTp8, 0x96
+ vpxord %%WT, %%WT, %%WTp13
+ vprold %%WT, %%WT, 1
+%endmacro
+
+%define APPEND(a,b) a %+ b
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ ; remove unwind info macros
+ %define func(x) x: endbranch
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp + 0*16], xmm6
+ movdqa [rsp + 1*16], xmm7
+ movdqa [rsp + 2*16], xmm8
+ movdqa [rsp + 3*16], xmm9
+ movdqa [rsp + 4*16], xmm10
+ movdqa [rsp + 5*16], xmm11
+ movdqa [rsp + 6*16], xmm12
+ movdqa [rsp + 7*16], xmm13
+ movdqa [rsp + 8*16], xmm14
+ movdqa [rsp + 9*16], xmm15
+ mov [rsp + 10*16 + 0*8], r12
+ mov [rsp + 10*16 + 1*8], r13
+ mov [rsp + 10*16 + 2*8], r14
+ mov [rsp + 10*16 + 3*8], r15
+ mov [rsp + 10*16 + 4*8], rdi
+ mov [rsp + 10*16 + 5*8], rsi
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep |
+
+[bits 64]
+section .text
+align 32
+
+;void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+global mh_sha1_block_avx512
+func(mh_sha1_block_avx512)
+ endbranch
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; align rsp to 64 Bytes needed by avx512
+ and rsp, ~0x3f
+
+ ; copy segs_digests into registers.
+ VMOVPS HH0, [mh_digests_p + 64*0]
+ VMOVPS HH1, [mh_digests_p + 64*1]
+ VMOVPS HH2, [mh_digests_p + 64*2]
+ VMOVPS HH3, [mh_digests_p + 64*3]
+ VMOVPS HH4, [mh_digests_p + 64*4]
+ ;a mask used to transform to big-endian data
+ vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK]
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ ;using extra 16 ZMM registers instead of stack
+%assign I 0
+%rep 8
+%assign J (I+1)
+ VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64]
+ VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64]
+
+ vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK
+ vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK
+%assign I (I+2)
+%endrep
+
+ vmovdqa64 A, HH0
+ vmovdqa64 B, HH1
+ vmovdqa64 C, HH2
+ vmovdqa64 D, HH3
+ vmovdqa64 E, HH4
+
+ vmovdqa32 KT, [K00_19]
+%assign I 0xCA
+%assign J 0
+%assign K 2
+%assign L 8
+%assign M 13
+%assign N 0
+%rep 80
+ PROCESS_LOOP APPEND(W,J), I
+ %if N < 64
+ MSG_SCHED_ROUND_16_79 APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M)
+ %endif
+ %if N = 19
+ vmovdqa32 KT, [K20_39]
+ %assign I 0x96
+ %elif N = 39
+ vmovdqa32 KT, [K40_59]
+ %assign I 0xE8
+ %elif N = 59
+ vmovdqa32 KT, [K60_79]
+ %assign I 0x96
+ %endif
+ %if N % 10 = 9
+ PREFETCH_X [mh_in_p + 1024+128*(N / 10)]
+ %endif
+%assign J ((J+1)% 16)
+%assign K ((K+1)% 16)
+%assign L ((L+1)% 16)
+%assign M ((M+1)% 16)
+%assign N (N+1)
+%endrep
+
+ ; Add old digest
+ vpaddd HH0,A, HH0
+ vpaddd HH1,B, HH1
+ vpaddd HH2,C, HH2
+ vpaddd HH3,D, HH3
+ vpaddd HH4,E, HH4
+
+ add mh_in_p, 1024
+ sub loops, 1
+ jne .block_loop
+
+ ; copy segs_digests to mh_digests_p
+ VMOVPS [mh_digests_p + 64*0], HH0
+ VMOVPS [mh_digests_p + 64*1], HH1
+ VMOVPS [mh_digests_p + 64*2], HH2
+ VMOVPS [mh_digests_p + 64*3], HH3
+ VMOVPS [mh_digests_p + 64*4], HH4
+
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+
+section .data align=64
+
+align 64
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+
+K20_39: dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+
+K40_59: dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+
+K60_79: dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_mh_sha1_block_avx512
+no_mh_sha1_block_avx512:
+%endif
+%endif ; HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c
new file mode 100644
index 000000000..402c9741a
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c
@@ -0,0 +1,387 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha1_internal.h"
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// Base multi-hash SHA1 Functions
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+#define store_w(s, i, w, ww) (w[i][s] = to_be32(ww[i*HASH_SEGS+s])) // only used for step 0 ~ 15
+#define update_w(s, i, w) (w[i&15][s] = rol32(w[(i-3)&15][s]^w[(i-8)&15][s]^w[(i-14)&15][s]^w[(i-16)&15][s], 1)) // used for step > 15
+#define update_e_1(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F1(b[s],c[s],d[s]) + K_00_19 + w[i&15][s])
+#define update_e_2(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F2(b[s],c[s],d[s]) + K_20_39 + w[i&15][s])
+#define update_e_3(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F3(b[s],c[s],d[s]) + K_40_59 + w[i&15][s])
+#define update_e_4(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F4(b[s],c[s],d[s]) + K_60_79 + w[i&15][s])
+#define update_b(s, b) (b[s] = rol32(b[s],30))
+
+#define STORE_W(i, w, ww) \
+ store_w(0, i, w, ww); \
+ store_w(1, i, w, ww); \
+ store_w(2, i, w, ww); \
+ store_w(3, i, w, ww); \
+ store_w(4, i, w, ww); \
+ store_w(5, i, w, ww); \
+ store_w(6, i, w, ww); \
+ store_w(7, i, w, ww); \
+ store_w(8, i, w, ww); \
+ store_w(9, i, w, ww); \
+ store_w(10, i, w, ww); \
+ store_w(11, i, w, ww); \
+ store_w(12, i, w, ww); \
+ store_w(13, i, w, ww); \
+ store_w(14, i, w, ww); \
+ store_w(15, i, w, ww)
+
+#define UPDATE_W(i, w) \
+ update_w(0, i, w); \
+ update_w(1, i, w); \
+ update_w(2, i, w); \
+ update_w(3, i, w); \
+ update_w(4, i, w); \
+ update_w(5, i, w); \
+ update_w(6, i, w); \
+ update_w(7, i, w); \
+ update_w(8, i, w); \
+ update_w(9, i, w); \
+ update_w(10, i, w); \
+ update_w(11, i, w); \
+ update_w(12, i, w); \
+ update_w(13, i, w); \
+ update_w(14, i, w); \
+ update_w(15, i, w)
+
+#define UPDATE_E1(a, b, c, d, e, i, w) \
+ update_e_1(0, a, b, c, d, e, i, w); \
+ update_e_1(1, a, b, c, d, e, i, w); \
+ update_e_1(2, a, b, c, d, e, i, w); \
+ update_e_1(3, a, b, c, d, e, i, w); \
+ update_e_1(4, a, b, c, d, e, i, w); \
+ update_e_1(5, a, b, c, d, e, i, w); \
+ update_e_1(6, a, b, c, d, e, i, w); \
+ update_e_1(7, a, b, c, d, e, i, w); \
+ update_e_1(8, a, b, c, d, e, i, w); \
+ update_e_1(9, a, b, c, d, e, i, w); \
+ update_e_1(10, a, b, c, d, e, i, w); \
+ update_e_1(11, a, b, c, d, e, i, w); \
+ update_e_1(12, a, b, c, d, e, i, w); \
+ update_e_1(13, a, b, c, d, e, i, w); \
+ update_e_1(14, a, b, c, d, e, i, w); \
+ update_e_1(15, a, b, c, d, e, i, w)
+
+#define UPDATE_E2(a, b, c, d, e, i, w) \
+ update_e_2(0, a, b, c, d, e, i, w); \
+ update_e_2(1, a, b, c, d, e, i, w); \
+ update_e_2(2, a, b, c, d, e, i, w); \
+ update_e_2(3, a, b, c, d, e, i, w); \
+ update_e_2(4, a, b, c, d, e, i, w); \
+ update_e_2(5, a, b, c, d, e, i, w); \
+ update_e_2(6, a, b, c, d, e, i, w); \
+ update_e_2(7, a, b, c, d, e, i, w); \
+ update_e_2(8, a, b, c, d, e, i, w); \
+ update_e_2(9, a, b, c, d, e, i, w); \
+ update_e_2(10, a, b, c, d, e, i, w); \
+ update_e_2(11, a, b, c, d, e, i, w); \
+ update_e_2(12, a, b, c, d, e, i, w); \
+ update_e_2(13, a, b, c, d, e, i, w); \
+ update_e_2(14, a, b, c, d, e, i, w); \
+ update_e_2(15, a, b, c, d, e, i, w)
+
+#define UPDATE_E3(a, b, c, d, e, i, w) \
+ update_e_3(0, a, b, c, d, e, i, w); \
+ update_e_3(1, a, b, c, d, e, i, w); \
+ update_e_3(2, a, b, c, d, e, i, w); \
+ update_e_3(3, a, b, c, d, e, i, w); \
+ update_e_3(4, a, b, c, d, e, i, w); \
+ update_e_3(5, a, b, c, d, e, i, w); \
+ update_e_3(6, a, b, c, d, e, i, w); \
+ update_e_3(7, a, b, c, d, e, i, w); \
+ update_e_3(8, a, b, c, d, e, i, w); \
+ update_e_3(9, a, b, c, d, e, i, w); \
+ update_e_3(10, a, b, c, d, e, i, w); \
+ update_e_3(11, a, b, c, d, e, i, w); \
+ update_e_3(12, a, b, c, d, e, i, w); \
+ update_e_3(13, a, b, c, d, e, i, w); \
+ update_e_3(14, a, b, c, d, e, i, w); \
+ update_e_3(15, a, b, c, d, e, i, w)
+
+#define UPDATE_E4(a, b, c, d, e, i, w) \
+ update_e_4(0, a, b, c, d, e, i, w); \
+ update_e_4(1, a, b, c, d, e, i, w); \
+ update_e_4(2, a, b, c, d, e, i, w); \
+ update_e_4(3, a, b, c, d, e, i, w); \
+ update_e_4(4, a, b, c, d, e, i, w); \
+ update_e_4(5, a, b, c, d, e, i, w); \
+ update_e_4(6, a, b, c, d, e, i, w); \
+ update_e_4(7, a, b, c, d, e, i, w); \
+ update_e_4(8, a, b, c, d, e, i, w); \
+ update_e_4(9, a, b, c, d, e, i, w); \
+ update_e_4(10, a, b, c, d, e, i, w); \
+ update_e_4(11, a, b, c, d, e, i, w); \
+ update_e_4(12, a, b, c, d, e, i, w); \
+ update_e_4(13, a, b, c, d, e, i, w); \
+ update_e_4(14, a, b, c, d, e, i, w); \
+ update_e_4(15, a, b, c, d, e, i, w)
+
+#define UPDATE_B(b) \
+ update_b(0, b); \
+ update_b(1, b); \
+ update_b(2, b); \
+ update_b(3, b); \
+ update_b(4, b); \
+ update_b(5, b); \
+ update_b(6, b); \
+ update_b(7, b); \
+ update_b(8, b); \
+ update_b(9, b); \
+ update_b(10, b); \
+ update_b(11, b); \
+ update_b(12, b); \
+ update_b(13, b); \
+ update_b(14, b); \
+ update_b(15, b)
+
+static inline void step00_15(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS],
+ uint32_t * ww)
+{
+ STORE_W(i, w, ww);
+ UPDATE_E1(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void step16_19(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E1(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+
+}
+
+static inline void step20_39(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E2(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void step40_59(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E3(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void step60_79(int i, uint32_t * a, uint32_t * b, uint32_t * c,
+ uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS])
+{
+ UPDATE_W(i, w);
+ UPDATE_E4(a, b, c, d, e, i, w);
+ UPDATE_B(b);
+}
+
+static inline void init_abcde(uint32_t * xx, uint32_t n,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS])
+{
+ xx[0] = digests[n][0];
+ xx[1] = digests[n][1];
+ xx[2] = digests[n][2];
+ xx[3] = digests[n][3];
+ xx[4] = digests[n][4];
+ xx[5] = digests[n][5];
+ xx[6] = digests[n][6];
+ xx[7] = digests[n][7];
+ xx[8] = digests[n][8];
+ xx[9] = digests[n][9];
+ xx[10] = digests[n][10];
+ xx[11] = digests[n][11];
+ xx[12] = digests[n][12];
+ xx[13] = digests[n][13];
+ xx[14] = digests[n][14];
+ xx[15] = digests[n][15];
+}
+
+static inline void add_abcde(uint32_t * xx, uint32_t n,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS])
+{
+ digests[n][0] += xx[0];
+ digests[n][1] += xx[1];
+ digests[n][2] += xx[2];
+ digests[n][3] += xx[3];
+ digests[n][4] += xx[4];
+ digests[n][5] += xx[5];
+ digests[n][6] += xx[6];
+ digests[n][7] += xx[7];
+ digests[n][8] += xx[8];
+ digests[n][9] += xx[9];
+ digests[n][10] += xx[10];
+ digests[n][11] += xx[11];
+ digests[n][12] += xx[12];
+ digests[n][13] += xx[13];
+ digests[n][14] += xx[14];
+ digests[n][15] += xx[15];
+}
+
+/*
+ * API to perform 0-79 steps of the multi-hash algorithm for
+ * a single block of data. The caller is responsible for ensuring
+ * a full block of data input.
+ *
+ * Argument:
+ * input - the pointer to the data
+ * digest - the space to hold the digests for all segments.
+ *
+ * Return:
+ * N/A
+ */
+void mh_sha1_single(const uint8_t * input, uint32_t(*digests)[HASH_SEGS],
+ uint8_t * frame_buffer)
+{
+ uint32_t aa[HASH_SEGS], bb[HASH_SEGS], cc[HASH_SEGS], dd[HASH_SEGS], ee[HASH_SEGS];
+ uint32_t *ww = (uint32_t *) input;
+ uint32_t(*w)[HASH_SEGS];
+
+ w = (uint32_t(*)[HASH_SEGS]) frame_buffer;
+
+ init_abcde(aa, 0, digests);
+ init_abcde(bb, 1, digests);
+ init_abcde(cc, 2, digests);
+ init_abcde(dd, 3, digests);
+ init_abcde(ee, 4, digests);
+
+ step00_15(0, aa, bb, cc, dd, ee, w, ww);
+ step00_15(1, ee, aa, bb, cc, dd, w, ww);
+ step00_15(2, dd, ee, aa, bb, cc, w, ww);
+ step00_15(3, cc, dd, ee, aa, bb, w, ww);
+ step00_15(4, bb, cc, dd, ee, aa, w, ww);
+ step00_15(5, aa, bb, cc, dd, ee, w, ww);
+ step00_15(6, ee, aa, bb, cc, dd, w, ww);
+ step00_15(7, dd, ee, aa, bb, cc, w, ww);
+ step00_15(8, cc, dd, ee, aa, bb, w, ww);
+ step00_15(9, bb, cc, dd, ee, aa, w, ww);
+ step00_15(10, aa, bb, cc, dd, ee, w, ww);
+ step00_15(11, ee, aa, bb, cc, dd, w, ww);
+ step00_15(12, dd, ee, aa, bb, cc, w, ww);
+ step00_15(13, cc, dd, ee, aa, bb, w, ww);
+ step00_15(14, bb, cc, dd, ee, aa, w, ww);
+ step00_15(15, aa, bb, cc, dd, ee, w, ww);
+
+ step16_19(16, ee, aa, bb, cc, dd, w);
+ step16_19(17, dd, ee, aa, bb, cc, w);
+ step16_19(18, cc, dd, ee, aa, bb, w);
+ step16_19(19, bb, cc, dd, ee, aa, w);
+
+ step20_39(20, aa, bb, cc, dd, ee, w);
+ step20_39(21, ee, aa, bb, cc, dd, w);
+ step20_39(22, dd, ee, aa, bb, cc, w);
+ step20_39(23, cc, dd, ee, aa, bb, w);
+ step20_39(24, bb, cc, dd, ee, aa, w);
+ step20_39(25, aa, bb, cc, dd, ee, w);
+ step20_39(26, ee, aa, bb, cc, dd, w);
+ step20_39(27, dd, ee, aa, bb, cc, w);
+ step20_39(28, cc, dd, ee, aa, bb, w);
+ step20_39(29, bb, cc, dd, ee, aa, w);
+ step20_39(30, aa, bb, cc, dd, ee, w);
+ step20_39(31, ee, aa, bb, cc, dd, w);
+ step20_39(32, dd, ee, aa, bb, cc, w);
+ step20_39(33, cc, dd, ee, aa, bb, w);
+ step20_39(34, bb, cc, dd, ee, aa, w);
+ step20_39(35, aa, bb, cc, dd, ee, w);
+ step20_39(36, ee, aa, bb, cc, dd, w);
+ step20_39(37, dd, ee, aa, bb, cc, w);
+ step20_39(38, cc, dd, ee, aa, bb, w);
+ step20_39(39, bb, cc, dd, ee, aa, w);
+
+ step40_59(40, aa, bb, cc, dd, ee, w);
+ step40_59(41, ee, aa, bb, cc, dd, w);
+ step40_59(42, dd, ee, aa, bb, cc, w);
+ step40_59(43, cc, dd, ee, aa, bb, w);
+ step40_59(44, bb, cc, dd, ee, aa, w);
+ step40_59(45, aa, bb, cc, dd, ee, w);
+ step40_59(46, ee, aa, bb, cc, dd, w);
+ step40_59(47, dd, ee, aa, bb, cc, w);
+ step40_59(48, cc, dd, ee, aa, bb, w);
+ step40_59(49, bb, cc, dd, ee, aa, w);
+ step40_59(50, aa, bb, cc, dd, ee, w);
+ step40_59(51, ee, aa, bb, cc, dd, w);
+ step40_59(52, dd, ee, aa, bb, cc, w);
+ step40_59(53, cc, dd, ee, aa, bb, w);
+ step40_59(54, bb, cc, dd, ee, aa, w);
+ step40_59(55, aa, bb, cc, dd, ee, w);
+ step40_59(56, ee, aa, bb, cc, dd, w);
+ step40_59(57, dd, ee, aa, bb, cc, w);
+ step40_59(58, cc, dd, ee, aa, bb, w);
+ step40_59(59, bb, cc, dd, ee, aa, w);
+
+ step60_79(60, aa, bb, cc, dd, ee, w);
+ step60_79(61, ee, aa, bb, cc, dd, w);
+ step60_79(62, dd, ee, aa, bb, cc, w);
+ step60_79(63, cc, dd, ee, aa, bb, w);
+ step60_79(64, bb, cc, dd, ee, aa, w);
+ step60_79(65, aa, bb, cc, dd, ee, w);
+ step60_79(66, ee, aa, bb, cc, dd, w);
+ step60_79(67, dd, ee, aa, bb, cc, w);
+ step60_79(68, cc, dd, ee, aa, bb, w);
+ step60_79(69, bb, cc, dd, ee, aa, w);
+ step60_79(70, aa, bb, cc, dd, ee, w);
+ step60_79(71, ee, aa, bb, cc, dd, w);
+ step60_79(72, dd, ee, aa, bb, cc, w);
+ step60_79(73, cc, dd, ee, aa, bb, w);
+ step60_79(74, bb, cc, dd, ee, aa, w);
+ step60_79(75, aa, bb, cc, dd, ee, w);
+ step60_79(76, ee, aa, bb, cc, dd, w);
+ step60_79(77, dd, ee, aa, bb, cc, w);
+ step60_79(78, cc, dd, ee, aa, bb, w);
+ step60_79(79, bb, cc, dd, ee, aa, w);
+
+ add_abcde(aa, 0, digests);
+ add_abcde(bb, 1, digests);
+ add_abcde(cc, 2, digests);
+ add_abcde(dd, 3, digests);
+ add_abcde(ee, 4, digests);
+}
+
+void mh_sha1_block_base(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks)
+{
+ uint32_t i;
+
+ for (i = 0; i < num_blocks; i++) {
+ mh_sha1_single(input_data, digests, frame_buffer);
+ input_data += MH_SHA1_BLOCK_SIZE;
+ }
+
+ return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm
new file mode 100644
index 000000000..3d75d1649
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm
@@ -0,0 +1,498 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using SSE
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regC
+ pxor %%regF,%%regD
+ pand %%regF,%%regB
+ pxor %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regD
+ pxor %%regF,%%regC
+ pxor %%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regB
+ movdqa %%regT,%%regB
+ por %%regF,%%regC
+ pand %%regT,%%regC
+ pand %%regF,%%regD
+ por %%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ movdqa %%tmp, %%reg
+ pslld %%reg, %%imm
+ psrld %%tmp, (32-%%imm)
+ por %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ paddd %%regE,[%%data + (%%memW * 16)]
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ end_prolog
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg3
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp2
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp3
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define MOVPS movups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+
+;void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 number of 1KB blocks
+;
+mk_global mh_sha1_block_sse, function, internal
+func(mh_sha1_block_sse)
+ endbranch
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by sse
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ MOVPS A, [mh_digests_p + I*64 + 16*0]
+ MOVPS B, [mh_digests_p + I*64 + 16*1]
+ MOVPS C, [mh_digests_p + I*64 + 16*2]
+ MOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ movdqa [rsp + I*64 + 16*0], A
+ movdqa [rsp + I*64 + 16*1], B
+ movdqa [rsp + I*64 + 16*2], C
+ movdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ movdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ MOVPS T0,[mh_in_p + I*64+0*16]
+ MOVPS T1,[mh_in_p + I*64+1*16]
+ MOVPS T2,[mh_in_p + I*64+2*16]
+ MOVPS T3,[mh_in_p + I*64+3*16]
+
+ pshufb T0, F
+ movdqa [mh_data_p +(I)*16 +0*256],T0
+ pshufb T1, F
+ movdqa [mh_data_p +(I)*16 +1*256],T1
+ pshufb T2, F
+ movdqa [mh_data_p +(I)*16 +2*256],T2
+ pshufb T3, F
+ movdqa [mh_data_p +(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ movdqa A, [rsp + 0*64 + mh_segs]
+ movdqa B, [rsp + 1*64 + mh_segs]
+ movdqa C, [rsp + 2*64 + mh_segs]
+ movdqa D, [rsp + 3*64 + mh_segs]
+ movdqa E, [rsp + 4*64 + mh_segs]
+
+ movdqa AA, A
+ movdqa BB, B
+ movdqa CC, C
+ movdqa DD, D
+ movdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ movdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+;; do rounds 20...39
+ movdqa K, [K20_39]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ movdqa K, [K40_59]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 60...79
+ movdqa K, [K60_79]
+ %rep 20
+ SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ paddd A, AA
+ paddd B, BB
+ paddd C, CC
+ paddd D, DD
+ paddd E, EE
+
+ ; write out digests
+ movdqa [rsp + 0*64 + mh_segs], A
+ movdqa [rsp + 1*64 + mh_segs], B
+ movdqa [rsp + 2*64 + mh_segs], C
+ movdqa [rsp + 3*64 + mh_segs], D
+ movdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 256
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ movdqa A, [rsp + I*64 + 16*0]
+ movdqa B, [rsp + I*64 + 16*1]
+ movdqa C, [rsp + I*64 + 16*2]
+ movdqa D, [rsp + I*64 + 16*3]
+
+ MOVPS [mh_digests_p + I*64 + 16*0], A
+ MOVPS [mh_digests_p + I*64 + 16*1], B
+ MOVPS [mh_digests_p + I*64 + 16*2], C
+ MOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c
new file mode 100644
index 000000000..3058aaa87
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c
@@ -0,0 +1,122 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/*
+ * mh_sha1_finalize_base.c contains the prototypes of mh_sha1_finalize_XXX
+ * and mh_sha1_tail_XXX. Default definitions are base type which generates
+ * mh_sha1_finalize_base and mh_sha1_tail_base. Other types are generated
+ * through different predefined macros by mh_sha1.c.
+ * mh_sha1_tail is used to calculate the last incomplete block of input
+ * data. mh_sha1_finalize is the mh_sha1_ctx wrapper of mh_sha1_tail.
+ */
+#ifndef MH_SHA1_FINALIZE_FUNCTION
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_base
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base
+#define MH_SHA1_FINALIZE_SLVER
+#endif
+
+void MH_SHA1_TAIL_FUNCTION(uint8_t * partial_buffer, uint32_t total_len,
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer,
+ uint32_t digests[SHA1_DIGEST_WORDS])
+{
+ uint64_t partial_buffer_len, len_in_bit;
+
+ partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE;
+
+ // Padding the first block
+ partial_buffer[partial_buffer_len] = 0x80;
+ partial_buffer_len++;
+ memset(partial_buffer + partial_buffer_len, 0,
+ MH_SHA1_BLOCK_SIZE - partial_buffer_len);
+
+ // Calculate the first block without total_length if padding needs 2 block
+ if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) {
+ MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+ //Padding the second block
+ memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ //Padding the block
+ len_in_bit = to_be64((uint64_t) total_len * 8);
+ *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit;
+ MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+
+ //Calculate multi-hash SHA1 digests (segment digests as input message)
+ sha1_for_mh_sha1((uint8_t *) mh_sha1_segs_digests, digests,
+ 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ return;
+}
+
+int MH_SHA1_FINALIZE_FUNCTION(struct mh_sha1_ctx *ctx, void *mh_sha1_digest)
+{
+ uint8_t *partial_block_buffer;
+ uint64_t total_len;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_CTX_ERROR_NULL;
+
+ total_len = ctx->total_length;
+ partial_block_buffer = ctx->partial_block_buffer;
+
+ /* mh_sha1 tail */
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+
+ MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests,
+ aligned_frame_buffer, ctx->mh_sha1_digest);
+
+ /* Output the digests of mh_sha1 */
+ if (mh_sha1_digest != NULL) {
+ ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0];
+ ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1];
+ ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2];
+ ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3];
+ ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4];
+ }
+
+ return MH_SHA1_CTX_ERROR_NONE;
+}
+
+#ifdef MH_SHA1_FINALIZE_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// Version info
+struct slver mh_sha1_finalize_base_slver_0000027b;
+struct slver mh_sha1_finalize_base_slver = { 0x027b, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h
new file mode 100644
index 000000000..81823048e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h
@@ -0,0 +1,308 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_INTERNAL_H_
+#define _MH_SHA1_INTERNAL_H_
+
+/**
+ * @file mh_sha1_internal.h
+ * @brief mh_sha1 internal function prototypes and macros
+ *
+ * Interface for mh_sha1 internal functions
+ *
+ */
+#include <stdint.h>
+#include "mh_sha1.h"
+#include "endian_helper.h"
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+ // 64byte pointer align
+#define ALIGN_64(pointer) ( ((uint64_t)(pointer) + 0x3F)&(~0x3F) )
+
+ /*******************************************************************
+ *mh_sha1 constants and macros
+ ******************************************************************/
+ /* mh_sha1 constants */
+#define MH_SHA1_H0 0x67452301UL
+#define MH_SHA1_H1 0xefcdab89UL
+#define MH_SHA1_H2 0x98badcfeUL
+#define MH_SHA1_H3 0x10325476UL
+#define MH_SHA1_H4 0xc3d2e1f0UL
+
+#define K_00_19 0x5a827999UL
+#define K_20_39 0x6ed9eba1UL
+#define K_40_59 0x8f1bbcdcUL
+#define K_60_79 0xca62c1d6UL
+
+ /* mh_sha1 macros */
+#define F1(b,c,d) (d ^ (b & (c ^ d)))
+#define F2(b,c,d) (b ^ c ^ d)
+#define F3(b,c,d) ((b & c) | (d & (b | c)))
+#define F4(b,c,d) (b ^ c ^ d)
+
+#define rol32(x, r) (((x)<<(r)) ^ ((x)>>(32-(r))))
+
+ /*******************************************************************
+ * SHA1 API internal function prototypes
+ ******************************************************************/
+
+ /**
+ * @brief Performs complete SHA1 algorithm.
+ *
+ * @param input Pointer to buffer containing the input message.
+ * @param digest Pointer to digest to update.
+ * @param len Length of buffer.
+ * @returns None
+ */
+ void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len);
+
+ /*******************************************************************
+ * mh_sha1 API internal function prototypes
+ * Multiple versions of Update and Finalize functions are supplied which use
+ * multiple versions of block and tail process subfunctions.
+ ******************************************************************/
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @returns none
+ *
+ */
+ void mh_sha1_tail(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_base(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires SSE
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_sse(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires AVX
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_avx(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires AVX2
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_avx2(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Tail process for multi-hash sha1.
+ *
+ * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE.
+ * It will output the final SHA1 digest based on mh_sha1_segs_digests.
+ *
+ * @requires AVX512
+ *
+ * @param partial_buffer Pointer to the start addr of remainder
+ * @param total_len The total length of all sections of input data.
+ * @param mh_sha1_segs_digests The digests of all 16 segments .
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param mh_sha1_digest mh_sha1 digest
+ * @returns none
+ *
+ */
+ void mh_sha1_tail_avx512(uint8_t *partial_buffer, uint32_t total_len,
+ uint32_t (*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_base(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires SSE
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires AVX
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires AVX2
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+ /**
+ * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N.
+ *
+ * @requires AVX512
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm
new file mode 100644
index 000000000..590aa6c5f
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm
@@ -0,0 +1,77 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ [bits 32]
+%else
+ default rel
+ [bits 64]
+
+ extern mh_sha1_update_sse
+ extern mh_sha1_update_avx
+ extern mh_sha1_update_avx2
+ extern mh_sha1_finalize_sse
+ extern mh_sha1_finalize_avx
+ extern mh_sha1_finalize_avx2
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ extern mh_sha1_update_avx512
+ extern mh_sha1_finalize_avx512
+ %endif
+
+%endif
+
+extern mh_sha1_update_base
+extern mh_sha1_finalize_base
+
+mbin_interface mh_sha1_update
+mbin_interface mh_sha1_finalize
+
+%ifidn __OUTPUT_FORMAT__, elf64
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ mbin_dispatch_init6 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2, mh_sha1_update_avx512
+ mbin_dispatch_init6 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2, mh_sha1_finalize_avx512
+ %else
+ mbin_dispatch_init5 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2
+ mbin_dispatch_init5 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2
+ %endif
+
+%else
+ mbin_dispatch_init2 mh_sha1_update, mh_sha1_update_base
+ mbin_dispatch_init2 mh_sha1_finalize, mh_sha1_finalize_base
+%endif
+
+;;; func core, ver, snum
+slversion mh_sha1_update, 00, 02, 0272
+slversion mh_sha1_finalize, 00, 02, 0273
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c
new file mode 100644
index 000000000..4fd6c09a1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c
@@ -0,0 +1,180 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Loop many times over same
+# define TEST_LEN 16*1024
+# define TEST_LOOPS 20000
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define TEST_LEN 32*1024*1024
+# define TEST_LOOPS 100
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+#define TEST_MEM TEST_LEN
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_CTX_ERROR_NONE){ \
+ printf("The mh_sha1 function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+
+ return mh_sha1_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ struct mh_sha1_ctx *update_ctx_test = NULL, *update_ctx_base = NULL;
+ struct perf start, stop;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n");
+
+ buff = malloc(TEST_LEN);
+ update_ctx_test = malloc(sizeof(*update_ctx_test));
+ update_ctx_base = malloc(sizeof(*update_ctx_base));
+
+ if (buff == NULL || update_ctx_base == NULL || update_ctx_test == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ // mh_sha1 base version
+ mh_sha1_init(update_ctx_base);
+ mh_sha1_update_base(update_ctx_base, buff, TEST_LEN);
+ mh_sha1_finalize_base(update_ctx_base, hash_base);
+
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS / 10; i++) {
+ mh_sha1_init(update_ctx_base);
+ mh_sha1_update_base(update_ctx_base, buff, TEST_LEN);
+ mh_sha1_finalize_base(update_ctx_base, hash_base);
+ }
+ perf_stop(&stop);
+ printf("mh_sha1_update_base" TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ //Update feature test
+ CHECK_RETURN(mh_sha1_init(update_ctx_test));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test));
+
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS; i++) {
+ CHECK_RETURN(mh_sha1_init(update_ctx_test));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test));
+ }
+ perf_stop(&stop);
+ printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ // Check results
+ fail = compare_digests(hash_base, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", TEST_LEN);
+ return -1;
+ }
+
+ if (fail)
+ printf("Test failed function test%d\n", fail);
+ else
+ printf("Pass func check\n");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c
new file mode 100644
index 000000000..71caba50e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c
@@ -0,0 +1,430 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_internal.h"
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+ // Macros and sub-functions which already exist in source code file
+ // (sha1_for_mh_sha1.c) is part of ISA-L library as internal functions.
+ // The reason why writing them twice is the linking issue caused by
+ // mh_sha1_ref(). mh_sha1_ref() needs these macros and sub-functions
+ // without linking ISA-L library. So mh_sha1_ref() includes them in
+ // order to contain essential sub-functions in its own object file.
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+#if (__GNUC__ >= 11)
+# define OPT_FIX __attribute__ ((noipa))
+#else
+# define OPT_FIX
+#endif
+
+#define W(x) w[(x) & 15]
+
+#define step00_19(i,a,b,c,d,e) \
+ if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ else W(i) = to_be32(ww[i]); \
+ e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \
+ b = rol32(b,30)
+
+#define step20_39(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \
+ b = rol32(b,30)
+
+#define step40_59(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \
+ b = rol32(b,30)
+
+#define step60_79(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \
+ b = rol32(b,30)
+
+static void OPT_FIX sha1_single_for_mh_sha1_ref(const uint8_t * data, uint32_t digest[])
+{
+ uint32_t a, b, c, d, e;
+ uint32_t w[16] = { 0 };
+ uint32_t *ww = (uint32_t *) data;
+
+ a = digest[0];
+ b = digest[1];
+ c = digest[2];
+ d = digest[3];
+ e = digest[4];
+
+ step00_19(0, a, b, c, d, e);
+ step00_19(1, e, a, b, c, d);
+ step00_19(2, d, e, a, b, c);
+ step00_19(3, c, d, e, a, b);
+ step00_19(4, b, c, d, e, a);
+ step00_19(5, a, b, c, d, e);
+ step00_19(6, e, a, b, c, d);
+ step00_19(7, d, e, a, b, c);
+ step00_19(8, c, d, e, a, b);
+ step00_19(9, b, c, d, e, a);
+ step00_19(10, a, b, c, d, e);
+ step00_19(11, e, a, b, c, d);
+ step00_19(12, d, e, a, b, c);
+ step00_19(13, c, d, e, a, b);
+ step00_19(14, b, c, d, e, a);
+ step00_19(15, a, b, c, d, e);
+ step00_19(16, e, a, b, c, d);
+ step00_19(17, d, e, a, b, c);
+ step00_19(18, c, d, e, a, b);
+ step00_19(19, b, c, d, e, a);
+
+ step20_39(20, a, b, c, d, e);
+ step20_39(21, e, a, b, c, d);
+ step20_39(22, d, e, a, b, c);
+ step20_39(23, c, d, e, a, b);
+ step20_39(24, b, c, d, e, a);
+ step20_39(25, a, b, c, d, e);
+ step20_39(26, e, a, b, c, d);
+ step20_39(27, d, e, a, b, c);
+ step20_39(28, c, d, e, a, b);
+ step20_39(29, b, c, d, e, a);
+ step20_39(30, a, b, c, d, e);
+ step20_39(31, e, a, b, c, d);
+ step20_39(32, d, e, a, b, c);
+ step20_39(33, c, d, e, a, b);
+ step20_39(34, b, c, d, e, a);
+ step20_39(35, a, b, c, d, e);
+ step20_39(36, e, a, b, c, d);
+ step20_39(37, d, e, a, b, c);
+ step20_39(38, c, d, e, a, b);
+ step20_39(39, b, c, d, e, a);
+
+ step40_59(40, a, b, c, d, e);
+ step40_59(41, e, a, b, c, d);
+ step40_59(42, d, e, a, b, c);
+ step40_59(43, c, d, e, a, b);
+ step40_59(44, b, c, d, e, a);
+ step40_59(45, a, b, c, d, e);
+ step40_59(46, e, a, b, c, d);
+ step40_59(47, d, e, a, b, c);
+ step40_59(48, c, d, e, a, b);
+ step40_59(49, b, c, d, e, a);
+ step40_59(50, a, b, c, d, e);
+ step40_59(51, e, a, b, c, d);
+ step40_59(52, d, e, a, b, c);
+ step40_59(53, c, d, e, a, b);
+ step40_59(54, b, c, d, e, a);
+ step40_59(55, a, b, c, d, e);
+ step40_59(56, e, a, b, c, d);
+ step40_59(57, d, e, a, b, c);
+ step40_59(58, c, d, e, a, b);
+ step40_59(59, b, c, d, e, a);
+
+ step60_79(60, a, b, c, d, e);
+ step60_79(61, e, a, b, c, d);
+ step60_79(62, d, e, a, b, c);
+ step60_79(63, c, d, e, a, b);
+ step60_79(64, b, c, d, e, a);
+ step60_79(65, a, b, c, d, e);
+ step60_79(66, e, a, b, c, d);
+ step60_79(67, d, e, a, b, c);
+ step60_79(68, c, d, e, a, b);
+ step60_79(69, b, c, d, e, a);
+ step60_79(70, a, b, c, d, e);
+ step60_79(71, e, a, b, c, d);
+ step60_79(72, d, e, a, b, c);
+ step60_79(73, c, d, e, a, b);
+ step60_79(74, b, c, d, e, a);
+ step60_79(75, a, b, c, d, e);
+ step60_79(76, e, a, b, c, d);
+ step60_79(77, d, e, a, b, c);
+ step60_79(78, c, d, e, a, b);
+ step60_79(79, b, c, d, e, a);
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+}
+
+void sha1_for_mh_sha1_ref(const uint8_t * input_data, uint32_t * digest, const uint32_t len)
+{
+ uint32_t i, j;
+ uint8_t buf[2 * SHA1_BLOCK_SIZE];
+
+ digest[0] = MH_SHA1_H0;
+ digest[1] = MH_SHA1_H1;
+ digest[2] = MH_SHA1_H2;
+ digest[3] = MH_SHA1_H3;
+ digest[4] = MH_SHA1_H4;
+
+ i = len;
+ while (i >= SHA1_BLOCK_SIZE) {
+ sha1_single_for_mh_sha1_ref(input_data, digest);
+ input_data += SHA1_BLOCK_SIZE;
+ i -= SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(buf, input_data, i);
+ buf[i++] = 0x80;
+ for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++)
+ buf[j] = 0;
+
+ if (i > SHA1_BLOCK_SIZE - 8)
+ i = 2 * SHA1_BLOCK_SIZE;
+ else
+ i = SHA1_BLOCK_SIZE;
+
+ *(uint64_t *) (buf + i - 8) = to_be64((uint64_t) len * 8);
+
+ sha1_single_for_mh_sha1_ref(buf, digest);
+ if (i == (2 * SHA1_BLOCK_SIZE))
+ sha1_single_for_mh_sha1_ref(buf + SHA1_BLOCK_SIZE, digest);
+}
+
+/*
+ * buffer to rearrange one segment data from one block.
+ *
+ * Layout of new_data:
+ * segment
+ * -------------------------
+ * w0 | w1 | ... | w15
+ *
+ */
+static inline void transform_input_single(uint32_t * new_data, uint32_t * input,
+ uint32_t segment)
+{
+ new_data[16 * segment + 0] = input[16 * 0 + segment];
+ new_data[16 * segment + 1] = input[16 * 1 + segment];
+ new_data[16 * segment + 2] = input[16 * 2 + segment];
+ new_data[16 * segment + 3] = input[16 * 3 + segment];
+ new_data[16 * segment + 4] = input[16 * 4 + segment];
+ new_data[16 * segment + 5] = input[16 * 5 + segment];
+ new_data[16 * segment + 6] = input[16 * 6 + segment];
+ new_data[16 * segment + 7] = input[16 * 7 + segment];
+ new_data[16 * segment + 8] = input[16 * 8 + segment];
+ new_data[16 * segment + 9] = input[16 * 9 + segment];
+ new_data[16 * segment + 10] = input[16 * 10 + segment];
+ new_data[16 * segment + 11] = input[16 * 11 + segment];
+ new_data[16 * segment + 12] = input[16 * 12 + segment];
+ new_data[16 * segment + 13] = input[16 * 13 + segment];
+ new_data[16 * segment + 14] = input[16 * 14 + segment];
+ new_data[16 * segment + 15] = input[16 * 15 + segment];
+}
+
+// Adapt parameters to sha1_single_for_mh_sha1_ref
+#define sha1_update_one_seg(data, digest) \
+ sha1_single_for_mh_sha1_ref((const uint8_t *)(data), (uint32_t *)(digest))
+
+/*
+ * buffer to Rearrange all segments data from one block.
+ *
+ * Layout of new_data:
+ * segment
+ * -------------------------
+ * seg0: | w0 | w1 | ... | w15
+ * seg1: | w0 | w1 | ... | w15
+ * seg2: | w0 | w1 | ... | w15
+ * ....
+ * seg15: | w0 | w1 | ... | w15
+ *
+ */
+static inline void transform_input(uint32_t * new_data, uint32_t * input, uint32_t block)
+{
+ uint32_t *current_input = input + block * MH_SHA1_BLOCK_SIZE / 4;
+
+ transform_input_single(new_data, current_input, 0);
+ transform_input_single(new_data, current_input, 1);
+ transform_input_single(new_data, current_input, 2);
+ transform_input_single(new_data, current_input, 3);
+ transform_input_single(new_data, current_input, 4);
+ transform_input_single(new_data, current_input, 5);
+ transform_input_single(new_data, current_input, 6);
+ transform_input_single(new_data, current_input, 7);
+ transform_input_single(new_data, current_input, 8);
+ transform_input_single(new_data, current_input, 9);
+ transform_input_single(new_data, current_input, 10);
+ transform_input_single(new_data, current_input, 11);
+ transform_input_single(new_data, current_input, 12);
+ transform_input_single(new_data, current_input, 13);
+ transform_input_single(new_data, current_input, 14);
+ transform_input_single(new_data, current_input, 15);
+
+}
+
+/*
+ * buffer to Calculate all segments' digests from one block.
+ *
+ * Layout of seg_digest:
+ * segment
+ * -------------------------
+ * seg0: | H0 | H1 | ... | H4
+ * seg1: | H0 | H1 | ... | H4
+ * seg2: | H0 | H1 | ... | H4
+ * ....
+ * seg15: | H0 | H1 | ... | H4
+ *
+ */
+static inline void sha1_update_all_segs(uint32_t * new_data,
+ uint32_t(*mh_sha1_seg_digests)[SHA1_DIGEST_WORDS])
+{
+ sha1_update_one_seg(&(new_data)[16 * 0], mh_sha1_seg_digests[0]);
+ sha1_update_one_seg(&(new_data)[16 * 1], mh_sha1_seg_digests[1]);
+ sha1_update_one_seg(&(new_data)[16 * 2], mh_sha1_seg_digests[2]);
+ sha1_update_one_seg(&(new_data)[16 * 3], mh_sha1_seg_digests[3]);
+ sha1_update_one_seg(&(new_data)[16 * 4], mh_sha1_seg_digests[4]);
+ sha1_update_one_seg(&(new_data)[16 * 5], mh_sha1_seg_digests[5]);
+ sha1_update_one_seg(&(new_data)[16 * 6], mh_sha1_seg_digests[6]);
+ sha1_update_one_seg(&(new_data)[16 * 7], mh_sha1_seg_digests[7]);
+ sha1_update_one_seg(&(new_data)[16 * 8], mh_sha1_seg_digests[8]);
+ sha1_update_one_seg(&(new_data)[16 * 9], mh_sha1_seg_digests[9]);
+ sha1_update_one_seg(&(new_data)[16 * 10], mh_sha1_seg_digests[10]);
+ sha1_update_one_seg(&(new_data)[16 * 11], mh_sha1_seg_digests[11]);
+ sha1_update_one_seg(&(new_data)[16 * 12], mh_sha1_seg_digests[12]);
+ sha1_update_one_seg(&(new_data)[16 * 13], mh_sha1_seg_digests[13]);
+ sha1_update_one_seg(&(new_data)[16 * 14], mh_sha1_seg_digests[14]);
+ sha1_update_one_seg(&(new_data)[16 * 15], mh_sha1_seg_digests[15]);
+}
+
+void mh_sha1_block_ref(const uint8_t * input_data, uint32_t(*digests)[HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks)
+{
+ uint32_t i, j;
+ uint32_t *temp_buffer = (uint32_t *) frame_buffer;
+ uint32_t(*trans_digests)[SHA1_DIGEST_WORDS];
+
+ trans_digests = (uint32_t(*)[SHA1_DIGEST_WORDS]) digests;
+
+ // Re-structure seg_digests from 5*16 to 16*5
+ for (j = 0; j < HASH_SEGS; j++) {
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ temp_buffer[j * SHA1_DIGEST_WORDS + i] = digests[i][j];
+ }
+ }
+ memcpy(trans_digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ // Calculate digests for all segments, leveraging sha1 API
+ for (i = 0; i < num_blocks; i++) {
+ transform_input(temp_buffer, (uint32_t *) input_data, i);
+ sha1_update_all_segs(temp_buffer, trans_digests);
+ }
+
+ // Re-structure seg_digests from 16*5 to 5*16
+ for (j = 0; j < HASH_SEGS; j++) {
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ temp_buffer[i * HASH_SEGS + j] = trans_digests[j][i];
+ }
+ }
+ memcpy(digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ return;
+}
+
+void mh_sha1_tail_ref(uint8_t * partial_buffer, uint32_t total_len,
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer,
+ uint32_t digests[SHA1_DIGEST_WORDS])
+{
+ uint64_t partial_buffer_len, len_in_bit;
+
+ partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE;
+
+ // Padding the first block
+ partial_buffer[partial_buffer_len] = 0x80;
+ partial_buffer_len++;
+ memset(partial_buffer + partial_buffer_len, 0,
+ MH_SHA1_BLOCK_SIZE - partial_buffer_len);
+
+ // Calculate the first block without total_length if padding needs 2 block
+ if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) {
+ mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+ //Padding the second block
+ memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ //Padding the block
+ len_in_bit = to_be64((uint64_t) total_len * 8);
+ *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit;
+ mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1);
+
+ //Calculate multi-hash SHA1 digests (segment digests as input message)
+ sha1_for_mh_sha1_ref((uint8_t *) mh_sha1_segs_digests, digests,
+ 4 * SHA1_DIGEST_WORDS * HASH_SEGS);
+
+ return;
+}
+
+void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest)
+{
+ uint64_t total_len;
+ uint64_t num_blocks;
+ uint32_t mh_sha1_segs_digests[SHA1_DIGEST_WORDS][HASH_SEGS];
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE];
+ uint8_t partial_block_buffer[MH_SHA1_BLOCK_SIZE * 2];
+ uint32_t mh_sha1_hash_dword[SHA1_DIGEST_WORDS];
+ uint32_t i;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ /* Initialize digests of all segments */
+ for (i = 0; i < HASH_SEGS; i++) {
+ mh_sha1_segs_digests[0][i] = MH_SHA1_H0;
+ mh_sha1_segs_digests[1][i] = MH_SHA1_H1;
+ mh_sha1_segs_digests[2][i] = MH_SHA1_H2;
+ mh_sha1_segs_digests[3][i] = MH_SHA1_H3;
+ mh_sha1_segs_digests[4][i] = MH_SHA1_H4;
+ }
+
+ total_len = len;
+
+ // Calculate blocks
+ num_blocks = len / MH_SHA1_BLOCK_SIZE;
+ if (num_blocks > 0) {
+ //do num_blocks process
+ mh_sha1_block_ref(input_data, mh_sha1_segs_digests, frame_buffer, num_blocks);
+ len -= num_blocks * MH_SHA1_BLOCK_SIZE;
+ input_data += num_blocks * MH_SHA1_BLOCK_SIZE;
+ }
+ // Store the partial block
+ if (len != 0) {
+ memcpy(partial_block_buffer, input_data, len);
+ }
+
+ /* Finalize */
+ mh_sha1_tail_ref(partial_block_buffer, total_len, mh_sha1_segs_digests,
+ frame_buffer, mh_sha1_hash_dword);
+
+ // Output the digests of mh_sha1
+ if (mh_sha1_digest != NULL) {
+ mh_sha1_digest[0] = mh_sha1_hash_dword[0];
+ mh_sha1_digest[1] = mh_sha1_hash_dword[1];
+ mh_sha1_digest[2] = mh_sha1_hash_dword[2];
+ mh_sha1_digest[3] = mh_sha1_hash_dword[3];
+ mh_sha1_digest[4] = mh_sha1_hash_dword[4];
+ }
+
+ return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c
new file mode 100644
index 000000000..792c4452b
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c
@@ -0,0 +1,217 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_CTX_ERROR_NONE){ \
+ printf("The mh_sha1 function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+#define MH_SHA1_REF mh_sha1_ref
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_ref[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_ref[i])
+ mh_sha1_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("ref: ");
+ dump((char *)hash_ref, 20);
+ printf("test: ");
+ dump((char *)hash_test, 20);
+ }
+
+ return mh_sha1_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int size, offset;
+ struct mh_sha1_ctx *update_ctx = NULL;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_test:\n");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ MH_SHA1_REF(buff, TEST_LEN, hash_ref);
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages
+ for (size = TEST_LEN; size >= 0; size--) {
+
+ // Fill with rand data
+ rand_buffer(buff, size);
+
+ MH_SHA1_REF(buff, size, hash_ref);
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ if ((size & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various buffer offsets and sizes
+ printf("offset tests");
+ for (size = TEST_LEN - 256; size > 256; size -= 11) {
+ for (offset = 0; offset < 256; offset++) {
+ MH_SHA1_REF(buff + offset, size, hash_ref);
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ }
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Run efence tests
+ printf("efence tests");
+ for (size = TEST_SIZE; size > 0; size--) {
+ offset = TEST_LEN - size;
+
+ MH_SHA1_REF(buff + offset, size, hash_ref);
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_test:");
+ printf(" %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c
new file mode 100644
index 000000000..4af220299
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c
@@ -0,0 +1,110 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/*
+ * mh_sha1_update_base.c contains the prototype of mh_sha1_update_XXX.
+ * Default definitions are base type which generates mh_sha1_update_base.
+ * Other types are generated through different predefined macros by mh_sha1.c.
+ */
+#ifndef MH_SHA1_UPDATE_FUNCTION
+#include "mh_sha1_internal.h"
+#include <string.h>
+
+#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_base
+#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base
+#define MH_SHA1_UPDATE_SLVER
+#endif
+
+int MH_SHA1_UPDATE_FUNCTION(struct mh_sha1_ctx *ctx, const void *buffer, uint32_t len)
+{
+
+ uint8_t *partial_block_buffer;
+ uint64_t partial_block_len;
+ uint64_t num_blocks;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_CTX_ERROR_NULL;
+
+ if (len == 0)
+ return MH_SHA1_CTX_ERROR_NONE;
+
+ partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE;
+ partial_block_buffer = ctx->partial_block_buffer;
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+
+ ctx->total_length += len;
+ // No enough input data for mh_sha1 calculation
+ if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) {
+ memcpy(partial_block_buffer + partial_block_len, input_data, len);
+ return MH_SHA1_CTX_ERROR_NONE;
+ }
+ // mh_sha1 calculation for the previous partial block
+ if (partial_block_len != 0) {
+ memcpy(partial_block_buffer + partial_block_len, input_data,
+ MH_SHA1_BLOCK_SIZE - partial_block_len);
+ //do one_block process
+ MH_SHA1_BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests,
+ aligned_frame_buffer, 1);
+ input_data += MH_SHA1_BLOCK_SIZE - partial_block_len;
+ len -= MH_SHA1_BLOCK_SIZE - partial_block_len;
+ memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ // Calculate mh_sha1 for the current blocks
+ num_blocks = len / MH_SHA1_BLOCK_SIZE;
+ if (num_blocks > 0) {
+ //do num_blocks process
+ MH_SHA1_BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer,
+ num_blocks);
+ len -= num_blocks * MH_SHA1_BLOCK_SIZE;
+ input_data += num_blocks * MH_SHA1_BLOCK_SIZE;
+ }
+ // Store the partial block
+ if (len != 0) {
+ memcpy(partial_block_buffer, input_data, len);
+ }
+
+ return MH_SHA1_CTX_ERROR_NONE;
+
+}
+
+#ifdef MH_SHA1_UPDATE_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+ // Version info
+struct slver mh_sha1_update_base_slver_0000027a;
+struct slver mh_sha1_update_base_slver = { 0x027a, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c
new file mode 100644
index 000000000..942dfd09f
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c
@@ -0,0 +1,240 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_CTX_ERROR_NONE){ \
+ printf("The mh_sha1 function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_ref[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_ref[i])
+ mh_sha1_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("ref: ");
+ dump((char *)hash_ref, 20);
+ printf("test: ");
+ dump((char *)hash_test, 20);
+ }
+
+ return mh_sha1_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, i;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int update_count;
+ int size1, size2, offset, addr_offset;
+ struct mh_sha1_ctx *update_ctx = NULL;
+ uint8_t *mem_addr = NULL;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages by update twice.
+ printf("\n various size messages by update twice tests");
+ for (size1 = TEST_LEN; size1 >= 0; size1--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ // subsequent update
+ size2 = TEST_LEN - size1; // size2 is different with the former
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various update count
+ printf("\n various update count tests");
+ for (update_count = 1; update_count <= TEST_LEN; update_count++) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ // subsequent update
+ size1 = TEST_LEN / update_count;
+ size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former
+
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ for (i = 1, offset = 0; i < update_count; i++) {
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1));
+ offset += size1;
+ }
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // test various start address of ctx.
+ printf("\n various start address of ctx test");
+ free(update_ctx);
+ mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10);
+ for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_ref(buff, TEST_LEN, hash_ref);
+
+ // a unaligned offset
+ update_ctx = (struct mh_sha1_ctx *)(mem_addr + addr_offset);
+ CHECK_RETURN(mh_sha1_init(update_ctx));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test));
+
+ fail = compare_digests(hash_ref, hash_test);
+
+ if (fail) {
+ printf("Fail addr_offset=%d\n", addr_offset);
+ return -1;
+ }
+
+ if ((addr_offset & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c
new file mode 100644
index 000000000..224977e6c
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c
@@ -0,0 +1,204 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha1_internal.h"
+#include <string.h>
+
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+// Reference SHA1 Functions for mh_sha1
+////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////
+
+#if (__GNUC__ >= 11)
+# define OPT_FIX __attribute__ ((noipa))
+#else
+# define OPT_FIX
+#endif
+
+#define W(x) w[(x) & 15]
+
+#define step00_19(i,a,b,c,d,e) \
+ if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ else W(i) = to_be32(ww[i]); \
+ e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \
+ b = rol32(b,30)
+
+#define step20_39(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \
+ b = rol32(b,30)
+
+#define step40_59(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \
+ b = rol32(b,30)
+
+#define step60_79(i,a,b,c,d,e) \
+ W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \
+ e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \
+ b = rol32(b,30)
+
+static void OPT_FIX sha1_single_for_mh_sha1(const uint8_t * data, uint32_t digest[])
+{
+ uint32_t a, b, c, d, e;
+ uint32_t w[16] = { 0 };
+ uint32_t *ww = (uint32_t *) data;
+
+ a = digest[0];
+ b = digest[1];
+ c = digest[2];
+ d = digest[3];
+ e = digest[4];
+
+ step00_19(0, a, b, c, d, e);
+ step00_19(1, e, a, b, c, d);
+ step00_19(2, d, e, a, b, c);
+ step00_19(3, c, d, e, a, b);
+ step00_19(4, b, c, d, e, a);
+ step00_19(5, a, b, c, d, e);
+ step00_19(6, e, a, b, c, d);
+ step00_19(7, d, e, a, b, c);
+ step00_19(8, c, d, e, a, b);
+ step00_19(9, b, c, d, e, a);
+ step00_19(10, a, b, c, d, e);
+ step00_19(11, e, a, b, c, d);
+ step00_19(12, d, e, a, b, c);
+ step00_19(13, c, d, e, a, b);
+ step00_19(14, b, c, d, e, a);
+ step00_19(15, a, b, c, d, e);
+ step00_19(16, e, a, b, c, d);
+ step00_19(17, d, e, a, b, c);
+ step00_19(18, c, d, e, a, b);
+ step00_19(19, b, c, d, e, a);
+
+ step20_39(20, a, b, c, d, e);
+ step20_39(21, e, a, b, c, d);
+ step20_39(22, d, e, a, b, c);
+ step20_39(23, c, d, e, a, b);
+ step20_39(24, b, c, d, e, a);
+ step20_39(25, a, b, c, d, e);
+ step20_39(26, e, a, b, c, d);
+ step20_39(27, d, e, a, b, c);
+ step20_39(28, c, d, e, a, b);
+ step20_39(29, b, c, d, e, a);
+ step20_39(30, a, b, c, d, e);
+ step20_39(31, e, a, b, c, d);
+ step20_39(32, d, e, a, b, c);
+ step20_39(33, c, d, e, a, b);
+ step20_39(34, b, c, d, e, a);
+ step20_39(35, a, b, c, d, e);
+ step20_39(36, e, a, b, c, d);
+ step20_39(37, d, e, a, b, c);
+ step20_39(38, c, d, e, a, b);
+ step20_39(39, b, c, d, e, a);
+
+ step40_59(40, a, b, c, d, e);
+ step40_59(41, e, a, b, c, d);
+ step40_59(42, d, e, a, b, c);
+ step40_59(43, c, d, e, a, b);
+ step40_59(44, b, c, d, e, a);
+ step40_59(45, a, b, c, d, e);
+ step40_59(46, e, a, b, c, d);
+ step40_59(47, d, e, a, b, c);
+ step40_59(48, c, d, e, a, b);
+ step40_59(49, b, c, d, e, a);
+ step40_59(50, a, b, c, d, e);
+ step40_59(51, e, a, b, c, d);
+ step40_59(52, d, e, a, b, c);
+ step40_59(53, c, d, e, a, b);
+ step40_59(54, b, c, d, e, a);
+ step40_59(55, a, b, c, d, e);
+ step40_59(56, e, a, b, c, d);
+ step40_59(57, d, e, a, b, c);
+ step40_59(58, c, d, e, a, b);
+ step40_59(59, b, c, d, e, a);
+
+ step60_79(60, a, b, c, d, e);
+ step60_79(61, e, a, b, c, d);
+ step60_79(62, d, e, a, b, c);
+ step60_79(63, c, d, e, a, b);
+ step60_79(64, b, c, d, e, a);
+ step60_79(65, a, b, c, d, e);
+ step60_79(66, e, a, b, c, d);
+ step60_79(67, d, e, a, b, c);
+ step60_79(68, c, d, e, a, b);
+ step60_79(69, b, c, d, e, a);
+ step60_79(70, a, b, c, d, e);
+ step60_79(71, e, a, b, c, d);
+ step60_79(72, d, e, a, b, c);
+ step60_79(73, c, d, e, a, b);
+ step60_79(74, b, c, d, e, a);
+ step60_79(75, a, b, c, d, e);
+ step60_79(76, e, a, b, c, d);
+ step60_79(77, d, e, a, b, c);
+ step60_79(78, c, d, e, a, b);
+ step60_79(79, b, c, d, e, a);
+
+ digest[0] += a;
+ digest[1] += b;
+ digest[2] += c;
+ digest[3] += d;
+ digest[4] += e;
+}
+
+void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len)
+{
+ uint32_t i, j;
+ uint8_t buf[2 * SHA1_BLOCK_SIZE];
+
+ digest[0] = MH_SHA1_H0;
+ digest[1] = MH_SHA1_H1;
+ digest[2] = MH_SHA1_H2;
+ digest[3] = MH_SHA1_H3;
+ digest[4] = MH_SHA1_H4;
+
+ i = len;
+ while (i >= SHA1_BLOCK_SIZE) {
+ sha1_single_for_mh_sha1(input_data, digest);
+ input_data += SHA1_BLOCK_SIZE;
+ i -= SHA1_BLOCK_SIZE;
+ }
+
+ memcpy(buf, input_data, i);
+ buf[i++] = 0x80;
+ for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++)
+ buf[j] = 0;
+
+ if (i > SHA1_BLOCK_SIZE - 8)
+ i = 2 * SHA1_BLOCK_SIZE;
+ else
+ i = SHA1_BLOCK_SIZE;
+
+ *(uint64_t *) (buf + i - 8) = to_be64((uint64_t) len * 8);
+
+ sha1_single_for_mh_sha1(buf, digest);
+ if (i == (2 * SHA1_BLOCK_SIZE))
+ sha1_single_for_mh_sha1(buf + SHA1_BLOCK_SIZE, digest);
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am
new file mode 100644
index 000000000..e6ea6784c
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am
@@ -0,0 +1,89 @@
+########################################################################
+# Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+########################################################################
+
+lsrc_murmur = mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c
+
+lsrc_stitch = mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm
+
+lsrc_stitch += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm
+
+lsrc_x86_64 += $(lsrc_murmur) \
+ $(lsrc_stitch)
+
+lsrc_x86_32 += $(lsrc_x86_64)
+
+lsrc_aarch64 += $(lsrc_murmur) \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \
+ mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c \
+ mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c \
+ mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S \
+ mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c \
+ mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S \
+ mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S
+
+lsrc_base_aliases += $(lsrc_murmur) \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c
+
+other_src += include/reg_sizes.asm \
+ include/multibinary.asm \
+ include/test.h \
+ mh_sha1/mh_sha1_internal.h \
+ mh_sha1_murmur3_x64_128/murmur3_x64_128.c \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h
+
+src_include += -I $(srcdir)/mh_sha1_murmur3_x64_128
+
+extern_hdrs += include/mh_sha1_murmur3_x64_128.h
+
+unit_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test \
+ mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test
+
+perf_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf
+
+
+mh_sha1_murmur3_x64_128_test: mh_sha1_ref.o murmur3_x64_128.o
+mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la
+
+mh_sha1_murmur3_x64_128_update_test: mh_sha1_ref.o murmur3_x64_128.o
+mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_update_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la
+
+mh_sha1_murmur3_x64_128_perf: mh_sha1_ref.o murmur3_x64_128.o
+mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_perf_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c
new file mode 100644
index 000000000..e6993703a
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c
@@ -0,0 +1,53 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include <aarch64_multibinary.h>
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha1_murmur3_x64_128_update)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(mh_sha1_murmur3_update_ce);
+
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mh_sha1_murmur3_update_asimd);
+
+ return PROVIDER_BASIC(mh_sha1_murmur3_x64_128_update);
+}
+
+DEFINE_INTERFACE_DISPATCHER(mh_sha1_murmur3_x64_128_finalize)
+{
+ unsigned long auxval = getauxval(AT_HWCAP);
+ if (auxval & HWCAP_SHA1)
+ return PROVIDER_INFO(mh_sha1_murmur3_finalize_ce);
+
+ if (auxval & HWCAP_ASIMD)
+ return PROVIDER_INFO(mh_sha1_murmur3_finalize_asimd);
+
+ return PROVIDER_BASIC(mh_sha1_murmur3_x64_128_finalize);
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h
new file mode 100644
index 000000000..22b33cbd2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h
@@ -0,0 +1,91 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_MURMUR3_AARCH64_INTERNAL_H_
+#define _MH_SHA1_MURMUR3_AARCH64_INTERNAL_H_
+
+/**
+ * @file mh_sha1_murmur3_aarch64_internal.h
+ * @brief mh_sha1_murmur3_aarch64 internal function prototypes and macros
+ *
+ * Interface for mh_sha1_murmur3_aarch64 internal functions
+ *
+ */
+#include <stdint.h>
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires Crypto Extension
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+void mh_sha1_murmur3_block_ce(const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t
+ murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires ASIMD
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+void mh_sha1_murmur3_block_asimd(const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t
+ murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+
+#ifdef __cplusplus
+}
+#endif
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c
new file mode 100644
index 000000000..9cac8504e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c
@@ -0,0 +1,54 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_murmur3_aarch64_internal.h"
+
+extern void mh_sha1_tail_asimd(uint8_t * partial_buffer, uint32_t total_len,
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t * frame_buffer,
+ uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+extern void mh_sha1_block_asimd(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+// mh_sha1_murmur3_update_asimd.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_update_asimd
+#define BLOCK_FUNCTION mh_sha1_murmur3_block_asimd
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+// mh_sha1_murmur3_finalize_asimd.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_finalize_asimd
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_asimd
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S
new file mode 100644
index 000000000..575129f36
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S
@@ -0,0 +1,224 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+#include "sha1_asimd_common.S"
+.macro sha1_step_16_79_interleave0 windex:req
+ // interleaving murmur3 operation
+ .if (\windex % 4) == 0
+ ldp mur_data1, mur_data2, [mur_data], #16
+ .endif
+ .if (\windex % 4) == 1
+ /* rotate left by 31 bits */
+ ror mur_data1, mur_data1, #64-31
+ /* rotate left by 33 bits */
+ ror mur_data2, mur_data2, #64-33
+ .endif
+ .if (\windex % 4) == 2
+ eor mur_hash1, mur_hash1, mur_data1
+ /* rotate left by 27 bits */
+ ror mur_hash1, mur_hash1, #64-27
+ .endif
+ .if (\windex % 4) == 3
+ eor mur_hash2, mur_hash2, mur_data2
+ /* rotate left by 31 bits */
+ ror mur_hash2, mur_hash2, #64-31
+ .endif
+.endm
+
+.macro sha1_step_16_79_interleave1 windex:req
+ // interleaving murmur3 operation
+ .if (\windex % 4) == 0
+ mul mur_data1, mur_data1, mur_c1
+ mul mur_data2, mur_data2, mur_c2
+ .endif
+ .if (\windex % 4) == 1
+ mul mur_data1, mur_data1, mur_c2
+ mul mur_data2, mur_data2, mur_c1
+ .endif
+ .if (\windex % 4) == 2
+ add mur_hash1, mur_hash1, mur_hash2
+ //mur_hash1 = mur_hash1 * 5 + N1
+ add mur_hash1, mur_hash1, mur_hash1, LSL #2
+ add mur_hash1, mur_n1, mur_hash1
+ .endif
+ .if (\windex % 4) == 3
+ add mur_hash2, mur_hash2, mur_hash1
+ // mur_hash2 = mur_hash2 * 5 + N2
+ add mur_hash2, mur_hash2, mur_hash2, LSL #2
+ add mur_hash2, mur_n2, mur_hash2
+ .endif
+.endm
+
+.macro load_x4_word idx:req
+ ld1 {WORD\idx\().16b},[segs_ptr]
+ add segs_ptr,segs_ptr,#64
+.endm
+
+/*
+ * void mh_sha1_murmur3_block_asimd (const uint8_t * input_data,
+ * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ * uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ * uint32_t num_blocks);
+ * arg 0 pointer to input data
+ * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+ * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+ * arg 3 pointer to murmur3 digest
+ * arg 4 number of 1KB blocks
+ */
+
+ input_data .req x0
+ sha1_digest .req x1
+ data_buf .req x2
+ mur_digest .req x3
+ num_blocks .req w4
+
+ src .req x5
+ dst .req x6
+ offs .req x7
+ mh_segs .req x8
+ tmp .req x9
+ tmpw .req w9
+ segs_ptr .req x10
+ mur_hash1 .req x11
+ mur_hash2 .req x12
+ mur_c1 .req x13
+ mur_c2 .req x14
+ mur_data1 .req x19
+ mur_data2 .req x20
+ mur_data .req x21
+ mur_n1 .req x22
+ mur_n1_w .req w22
+ mur_n2 .req x23
+ mur_n2_w .req w23
+ block_ctr .req w24
+
+ .global mh_sha1_murmur3_block_asimd
+ .type mh_sha1_murmur3_block_asimd, %function
+mh_sha1_murmur3_block_asimd:
+ cmp num_blocks, #0
+ beq .return
+ sha1_asimd_save_stack
+ stp x19, x20, [sp, -48]!
+ stp x21, x22, [sp, 16]
+ stp x23, x24, [sp, 32]
+
+ mov mur_data, input_data
+ ldr mur_hash1, [mur_digest]
+ ldr mur_hash2, [mur_digest, 8]
+ adr mur_c1, C1
+ ldr mur_c1, [mur_c1]
+ adr mur_c2, C2
+ ldr mur_c2, [mur_c2]
+ adr tmp, N1
+ ldr mur_n1_w, [tmp]
+ adr tmp, N2
+ ldr mur_n2_w, [tmp]
+
+ mov mh_segs, #0
+.seg_loops:
+ add segs_ptr,input_data,mh_segs
+ mov offs, #64
+ add src, sha1_digest, mh_segs
+ ld1 {VA.4S}, [src], offs
+ ld1 {VB.4S}, [src], offs
+ ld1 {VC.4S}, [src], offs
+ ld1 {VD.4S}, [src], offs
+ ld1 {VE.4S}, [src], offs
+ mov block_ctr,num_blocks
+
+.block_loop:
+ sha1_single
+ subs block_ctr, block_ctr, 1
+ bne .block_loop
+
+ mov offs, #64
+ add dst, sha1_digest, mh_segs
+ st1 {VA.4S}, [dst], offs
+ st1 {VB.4S}, [dst], offs
+ st1 {VC.4S}, [dst], offs
+ st1 {VD.4S}, [dst], offs
+ st1 {VE.4S}, [dst], offs
+
+ add mh_segs, mh_segs, #16
+ cmp mh_segs, #64
+ bne .seg_loops
+
+ /* save murmur-hash digest */
+ str mur_hash1, [mur_digest], #8
+ str mur_hash2, [mur_digest]
+
+ ldp x21, x22, [sp, 16]
+ ldp x23, x24, [sp, 32]
+ ldp x19, x20, [sp], 48
+ sha1_asimd_restore_stack
+.return:
+ ret
+
+ .size mh_sha1_murmur3_block_asimd, .-mh_sha1_murmur3_block_asimd
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 16
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+N1:
+ .word 0x52dce729
+ .word 0x52dce729
+ .word 0x52dce729
+ .word 0x52dce729
+N2:
+ .word 0x38495ab5
+ .word 0x38495ab5
+ .word 0x38495ab5
+ .word 0x38495ab5
+C1:
+ .dword 0x87c37b91114253d5
+ .dword 0x87c37b91114253d5
+C2:
+ .dword 0x4cf5ad432745937f
+ .dword 0x4cf5ad432745937f
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S
new file mode 100644
index 000000000..7f4256e20
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S
@@ -0,0 +1,482 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+ .arch armv8-a+crypto
+ .text
+ .align 2
+ .p2align 3,,7
+
+/*
+Macros
+*/
+
+.macro declare_var_vector_reg name:req,reg:req
+ \name\()_q .req q\reg
+ \name\()_v .req v\reg
+ \name\()_s .req s\reg
+.endm
+
+
+
+/*
+Variable list
+*/
+
+ declare_var_vector_reg lane0_msg_0, 0
+ declare_var_vector_reg lane1_msg_0, 1
+ declare_var_vector_reg lane2_msg_0, 2
+ declare_var_vector_reg lane3_msg_0, 3
+ declare_var_vector_reg lane0_msg_1, 4
+ declare_var_vector_reg lane1_msg_1, 5
+ declare_var_vector_reg lane2_msg_1, 6
+ declare_var_vector_reg lane3_msg_1, 7
+ declare_var_vector_reg lane0_msg_2, 8
+ declare_var_vector_reg lane1_msg_2, 9
+ declare_var_vector_reg lane2_msg_2,10
+ declare_var_vector_reg lane3_msg_2,11
+ declare_var_vector_reg lane0_msg_3,12
+ declare_var_vector_reg lane1_msg_3,13
+ declare_var_vector_reg lane2_msg_3,14
+ declare_var_vector_reg lane3_msg_3,15
+
+ declare_var_vector_reg lane0_abcd ,16
+ declare_var_vector_reg lane1_abcd ,17
+ declare_var_vector_reg lane2_abcd ,18
+ declare_var_vector_reg lane3_abcd ,19
+ declare_var_vector_reg lane0_tmp0 ,20
+ declare_var_vector_reg lane1_tmp0 ,21
+ declare_var_vector_reg lane2_tmp0 ,22
+ declare_var_vector_reg lane3_tmp0 ,23
+ declare_var_vector_reg lane0_tmp1 ,24
+ declare_var_vector_reg lane1_tmp1 ,25
+ declare_var_vector_reg lane2_tmp1 ,26
+ declare_var_vector_reg lane3_tmp1 ,27
+
+
+ declare_var_vector_reg e0 ,28
+ declare_var_vector_reg e1 ,29
+ declare_var_vector_reg key ,30
+ declare_var_vector_reg tmp ,31
+
+ key_adr .req x5
+ msg_adr .req x6
+ block_cnt .req x7
+ offs .req x8
+ mur_n1 .req x9
+ mur_n1_w .req w9
+ mur_n2 .req x10
+ mur_n2_w .req w10
+ mur_hash1 .req x11
+ mur_hash2 .req x12
+ mur_c1 .req x13
+ mur_c2 .req x14
+ mur_data1 .req x15
+
+ digest_adr .req x16
+ tmp0_adr .req x17
+ tmp1_adr .req x18
+ mur_data2 .req x19
+ mur_data .req x20
+
+.macro murmur3_00
+ ldp mur_data1, mur_data2, [mur_data], #16
+ mul mur_data1, mur_data1, mur_c1
+ mul mur_data2, mur_data2, mur_c2
+.endm
+
+.macro murmur3_01
+ /* rotate left by 31 bits */
+ ror mur_data1, mur_data1, #64-31
+ /* rotate left by 33 bits */
+ ror mur_data2, mur_data2, #64-33
+ mul mur_data1, mur_data1, mur_c2
+ mul mur_data2, mur_data2, mur_c1
+.endm
+
+.macro murmur3_02
+ eor mur_hash1, mur_hash1, mur_data1
+ /* rotate left by 27 bits */
+ ror mur_hash1, mur_hash1, #64-27
+ add mur_hash1, mur_hash1, mur_hash2
+ // mur_hash1 = mur_hash1 * 5 + N1
+ add mur_hash1, mur_hash1, mur_hash1, LSL #2
+ add mur_hash1, mur_n1, mur_hash1
+.endm
+
+.macro murmur3_03
+ eor mur_hash2, mur_hash2, mur_data2
+ /* rotate left by 31 bits */
+ ror mur_hash2, mur_hash2, #64-31
+ add mur_hash2, mur_hash2, mur_hash1
+ // mur_hash2 = mur_hash2 * 5 + N2
+ add mur_hash2, mur_hash2, mur_hash2, LSL #2
+ add mur_hash2, mur_n2, mur_hash2
+.endm
+
+/**
+ * maros for round 4-67
+ * the code execute 16 times per block, allowing the inserted murmur3 operation to process 256 bytes
+*/
+.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req
+ sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s
+ sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s
+ sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s
+ sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s
+ mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0]
+ mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0]
+ mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0]
+ mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1]
+ mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2]
+ mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3]
+ \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s
+ murmur3_00
+ \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s
+ murmur3_01
+ \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s
+ murmur3_02
+ \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s
+ murmur3_03
+ ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr]
+ add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s
+ add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s
+ add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s
+ add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s
+ st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr]
+ sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s
+ sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s
+ sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s
+ sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s
+ sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s
+ sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s
+ sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s
+ sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s
+.endm
+
+
+/*
+ * void mh_sha1_murmur3_block_ce (const uint8_t * input_data,
+ * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ * uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ * uint32_t num_blocks);
+ * arg 0 pointer to input data
+ * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+ * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+ * arg 3 pointer to murmur3 digest
+ * arg 4 number of 1KB blocks
+ */
+
+/*
+Arguements list
+*/
+ input_data .req x0
+ digests .req x1
+ frame_buffer .req x2
+ mur_digest .req x3
+ num_blocks .req w4
+
+ .global mh_sha1_murmur3_block_ce
+ .type mh_sha1_murmur3_block_ce, %function
+mh_sha1_murmur3_block_ce:
+ // save temp vector registers
+ stp d8, d9, [sp, -80]!
+
+ stp d10, d11, [sp, 16]
+ stp d12, d13, [sp, 32]
+ stp d14, d15, [sp, 48]
+ stp x19, x20, [sp, 64]
+
+ mov mur_data, input_data
+ ldr mur_hash1, [mur_digest]
+ ldr mur_hash2, [mur_digest, 8]
+ adr mur_c1, C1
+ ldr mur_c1, [mur_c1]
+ adr mur_c2, C2
+ ldr mur_c2, [mur_c2]
+ adr tmp0_adr, N1
+ ldr mur_n1_w, [tmp0_adr]
+ adr tmp0_adr, N2
+ ldr mur_n2_w, [tmp0_adr]
+
+ mov tmp0_adr,frame_buffer
+ add tmp1_adr,tmp0_adr,128
+
+
+start_loop:
+ mov block_cnt,0
+ mov msg_adr,input_data
+lane_loop:
+ mov offs,64
+ adr key_adr,KEY_0
+ // load msg 0
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs
+
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs
+
+ add digest_adr,digests,block_cnt
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs
+ ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs
+ ldr e0_q,[digest_adr]
+
+ // load key_0
+ ldr key_q,[key_adr]
+
+ rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b
+ rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b
+ rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b
+ rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b
+ rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b
+ rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b
+ rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b
+ rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b
+ rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b
+ rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b
+ rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b
+ rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b
+ rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b
+ rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b
+ rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b
+ rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b
+
+ add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s
+ add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s
+ add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s
+ add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s
+ st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr]
+
+ add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s
+ add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s
+ add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s
+ add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s
+
+ /* rounds 0-3 */
+ sha1h lane0_tmp1_s,lane0_abcd_s
+ sha1h lane1_tmp1_s,lane1_abcd_s
+ sha1h lane2_tmp1_s,lane2_abcd_s
+ sha1h lane3_tmp1_s,lane3_abcd_s
+ mov e1_v.S[0],lane0_tmp1_v.S[0]
+ mov e1_v.S[1],lane1_tmp1_v.S[0]
+ mov e1_v.S[2],lane2_tmp1_v.S[0]
+ mov e1_v.S[3],lane3_tmp1_v.S[0]
+ mov lane0_tmp1_v.S[0],e0_v.S[0]
+ mov lane1_tmp1_v.S[0],e0_v.S[1]
+ mov lane2_tmp1_v.S[0],e0_v.S[2]
+ mov lane3_tmp1_v.S[0],e0_v.S[3]
+ sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s
+ sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s
+ sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s
+ sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s
+ ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr]
+ add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s
+ sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s
+ add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s
+ sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s
+ add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s
+ sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s
+ add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s
+ sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s
+ st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr]
+
+ sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */
+ sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+
+
+ adr key_adr,KEY_1
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */
+ sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */
+ sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+
+ adr key_adr,KEY_2
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+ sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+
+ adr key_adr,KEY_3
+ ldr key_q,[key_adr]
+ sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */
+ sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0
+ sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1
+ sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0
+
+ // msg2 and msg1 are free
+ mov lane0_msg_2_v.S[0],e1_v.S[0]
+ mov lane1_msg_2_v.S[0],e1_v.S[1]
+ mov lane2_msg_2_v.S[0],e1_v.S[2]
+ mov lane3_msg_2_v.S[0],e1_v.S[3]
+
+ /* rounds 68-71 */
+ sha1h lane0_msg_1_s,lane0_abcd_s
+ sha1h lane1_msg_1_s,lane1_abcd_s
+ sha1h lane2_msg_1_s,lane2_abcd_s
+ sha1h lane3_msg_1_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s
+ sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s
+ sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s
+ sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s
+ add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s
+ add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s
+ add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s
+ add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s
+ sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s
+ sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s
+ sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s
+ sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s
+
+ /* rounds 72-75 */
+ sha1h lane0_msg_2_s,lane0_abcd_s
+ sha1h lane1_msg_2_s,lane1_abcd_s
+ sha1h lane2_msg_2_s,lane2_abcd_s
+ sha1h lane3_msg_2_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s
+ sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s
+ sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s
+ sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s
+
+ /* rounds 76-79 */
+ sha1h lane0_msg_1_s,lane0_abcd_s
+ sha1h lane1_msg_1_s,lane1_abcd_s
+ sha1h lane2_msg_1_s,lane2_abcd_s
+ sha1h lane3_msg_1_s,lane3_abcd_s
+ sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s
+ sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s
+ sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s
+ sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s
+ add digest_adr,digests,block_cnt
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs
+ ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs
+ ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr]
+
+ add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S
+ add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S
+ add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S
+ add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S
+
+ add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S
+ add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S
+ add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S
+ add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S
+
+ add digest_adr,digests,block_cnt
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs
+ st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs
+ st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr]
+
+ add block_cnt,block_cnt,16
+ cmp block_cnt,64
+ add msg_adr,input_data,block_cnt
+ add digest_adr,digests,block_cnt
+ bcc lane_loop
+
+ subs num_blocks,num_blocks,1
+ add input_data,input_data,1024
+ bhi start_loop
+
+ /* save murmur-hash digest */
+ str mur_hash1, [mur_digest], #8
+ str mur_hash2, [mur_digest]
+
+exit_func:
+ // restore temp register
+ ldp d10, d11, [sp, 16]
+ ldp d12, d13, [sp, 32]
+ ldp d14, d15, [sp, 48]
+ ldp x19, x20, [sp, 64]
+ ldp d8, d9, [sp], 80
+ ret
+
+ .size mh_sha1_murmur3_block_ce, .-mh_sha1_murmur3_block_ce
+ .section .rodata.cst16,"aM",@progbits,16
+ .align 4
+KEY_0:
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+ .word 0x5a827999
+KEY_1:
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+ .word 0x6ed9eba1
+KEY_2:
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+ .word 0x8f1bbcdc
+KEY_3:
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+ .word 0xca62c1d6
+
+N1:
+ .word 0x52dce729
+ .word 0x52dce729
+ .word 0x52dce729
+ .word 0x52dce729
+N2:
+ .word 0x38495ab5
+ .word 0x38495ab5
+ .word 0x38495ab5
+ .word 0x38495ab5
+
+C1:
+ .dword 0x87c37b91114253d5
+ .dword 0x87c37b91114253d5
+C2:
+ .dword 0x4cf5ad432745937f
+ .dword 0x4cf5ad432745937f
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c
new file mode 100644
index 000000000..4da674fba
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c
@@ -0,0 +1,54 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_murmur3_aarch64_internal.h"
+
+extern void mh_sha1_tail_ce(uint8_t * partial_buffer, uint32_t total_len,
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS],
+ uint8_t * frame_buffer,
+ uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]);
+
+extern void mh_sha1_block_ce(const uint8_t * input_data,
+ uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks);
+
+// mh_sha1_murmur3_update_ce.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_update_ce
+#define BLOCK_FUNCTION mh_sha1_murmur3_block_ce
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+// mh_sha1_murmur3_finalize_ce.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_finalize_ce
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_ce
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S
new file mode 100644
index 000000000..051a6157e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S
@@ -0,0 +1,34 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#include "aarch64_multibinary.h"
+
+mbin_interface mh_sha1_murmur3_x64_128_update
+mbin_interface mh_sha1_murmur3_x64_128_finalize
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S
new file mode 100644
index 000000000..ccc66f41a
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S
@@ -0,0 +1,271 @@
+/**********************************************************************
+ Copyright(c) 2021 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+ .arch armv8-a
+
+// macro F = (D ^ (B & (C ^ D)))
+.macro FUNC_F0
+ eor VF.16b, VC.16b, VD.16b
+ and VF.16b, VB.16b, VF.16b
+ eor VF.16b, VD.16b, VF.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F1
+ eor VF.16b, VB.16b, VC.16b
+ eor VF.16b, VF.16b, VD.16b
+.endm
+
+// F = ((B & C) | (B & D) | (C & D))
+.macro FUNC_F2
+ and vT0.16b, VB.16b, VC.16b
+ and vT1.16b, VB.16b, VD.16b
+ and vT2.16b, VC.16b, VD.16b
+ orr VF.16b, vT0.16b, vT1.16b
+ orr VF.16b, VF.16b, vT2.16b
+.endm
+
+// F = (B ^ C ^ D)
+.macro FUNC_F3
+ FUNC_F1
+.endm
+
+.altmacro
+.macro load_next_word windex
+ .if \windex < 16
+ load_x4_word \windex
+ .endif
+.endm
+
+// FUNC_F0 is merged into STEP_00_15 for efficiency
+.macro SHA1_STEP_00_15_F0 windex:req
+ rev32 WORD\windex\().16b,WORD\windex\().16b
+ next_word=\windex+1
+ load_next_word %next_word
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, VA.4s, 32 - 5
+ add VE.4s, VE.4s, VK.4s
+ sli VT.4s, VA.4s, 5
+ eor VF.16b, VC.16b, VD.16b
+ add VE.4s, VE.4s, WORD\windex\().4s
+ and VF.16b, VB.16b, VF.16b
+ add VE.4s, VE.4s, VT.4s
+ eor VF.16b, VD.16b, VF.16b
+ ushr VT.4s, VB.4s, 32 - 30
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req
+ eor vT0.16b,\reg_3\().16b,\reg_8\().16b
+ eor VT.16b,\reg_14\().16b,\reg_16\().16b
+ sha1_step_16_79_interleave0 \windex
+ eor vT0.16b,vT0.16b,VT.16b
+ sha1_step_16_79_interleave1 \windex
+ // e = (a leftrotate 5) + f + e + k + w[i]
+ ushr VT.4s, vT0.4s, 32 - 1
+ add VE.4s, VE.4s, VK.4s
+ ushr vT1.4s, VA.4s, 32 - 5
+ sli VT.4s, vT0.4s, 1
+ add VE.4s, VE.4s, VT.4s
+ sli vT1.4s, VA.4s, 5
+ mov \reg_16\().16b,VT.16b
+ add VE.4s, VE.4s, vT1.4s
+ ushr VT.4s, VB.4s, 32 - 30
+ \func_f
+ add VE.4s, VE.4s, VF.4s
+ sli VT.4s, VB.4s, 30
+.endm
+
+ VA .req v0
+ VB .req v1
+ VC .req v2
+ VD .req v3
+ VE .req v4
+ VT .req v5
+ VF .req v6
+ VK .req v7
+ WORD0 .req v8
+ WORD1 .req v9
+ WORD2 .req v10
+ WORD3 .req v11
+ WORD4 .req v12
+ WORD5 .req v13
+ WORD6 .req v14
+ WORD7 .req v15
+ WORD8 .req v16
+ WORD9 .req v17
+ WORD10 .req v18
+ WORD11 .req v19
+ WORD12 .req v20
+ WORD13 .req v21
+ WORD14 .req v22
+ WORD15 .req v23
+ vT0 .req v24
+ vT1 .req v25
+ vT2 .req v26
+ vAA .req v27
+ vBB .req v28
+ vCC .req v29
+ vDD .req v30
+ vEE .req v31
+ TT .req v0
+ sha1key_adr .req x15
+
+.macro SWAP_STATES
+ // shifted VB is held in VT after each step
+ .unreq TT
+ TT .req VE
+ .unreq VE
+ VE .req VD
+ .unreq VD
+ VD .req VC
+ .unreq VC
+ VC .req VT
+ .unreq VT
+ VT .req VB
+ .unreq VB
+ VB .req VA
+ .unreq VA
+ VA .req TT
+.endm
+
+.altmacro
+.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req
+ SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\()
+.endm
+
+.macro exec_step windex:req
+ .if \windex <= 15
+ SHA1_STEP_00_15_F0 windex
+ .else
+ idx14=((\windex - 14) & 15)
+ idx8=((\windex - 8) & 15)
+ idx3=((\windex - 3) & 15)
+ idx16=(\windex & 15)
+ .if \windex <= 19
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 20 && \windex <= 39
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 40 && \windex <= 59
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .if \windex >= 60 && \windex <= 79
+ SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16
+ .endif
+ .endif
+
+ SWAP_STATES
+
+ .if \windex == 79
+ // after 80 steps, the registers ABCDET has shifted from
+ // its orignal order of 012345 to 341520
+ // have to swap back for both compile- and run-time correctness
+ mov v0.16b,v3.16b
+ .unreq VA
+ VA .req v0
+
+ mov vT0.16b,v2.16b
+ mov v2.16b,v1.16b
+ mov v1.16b,v4.16b
+ .unreq VB
+ VB .req v1
+ .unreq VC
+ VC .req v2
+
+ mov v3.16b,v5.16b
+ .unreq VD
+ VD .req v3
+
+ mov v4.16b,vT0.16b
+ .unreq VE
+ VE .req v4
+
+ .unreq VT
+ VT .req v5
+ .endif
+.endm
+
+.macro exec_steps idx:req,more:vararg
+ exec_step \idx
+ .ifnb \more
+ exec_steps \more
+ .endif
+.endm
+
+.macro sha1_single
+ load_x4_word 0
+
+ mov vAA.16B, VA.16B
+ mov vBB.16B, VB.16B
+ mov vCC.16B, VC.16B
+ mov vDD.16B, VD.16B
+ mov vEE.16B, VE.16B
+
+ adr sha1key_adr, KEY_0
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
+
+ // 20 ~ 39
+ adr sha1key_adr, KEY_1
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39
+
+ // 40 ~ 59
+ adr sha1key_adr, KEY_2
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59
+
+ // 60 ~ 79
+ adr sha1key_adr, KEY_3
+ ld1 {VK.4s}, [sha1key_adr]
+ exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79
+
+ add VA.4s, vAA.4s, VA.4s
+ add VB.4s, vBB.4s, VB.4s
+ add VC.4s, vCC.4s, VC.4s
+ add VD.4s, vDD.4s, VD.4s
+ add VE.4s, vEE.4s, VE.4s
+.endm
+
+.macro sha1_asimd_save_stack
+ stp d8,d9,[sp, -64]!
+ stp d10,d11,[sp, 16]
+ stp d12,d13,[sp, 32]
+ stp d14,d15,[sp, 48]
+.endm
+
+.macro sha1_asimd_restore_stack
+ ldp d10,d11,[sp, 16]
+ ldp d12,d13,[sp, 32]
+ ldp d14,d15,[sp, 48]
+ ldp d8,d9,[sp],64
+.endm
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c
new file mode 100644
index 000000000..518adb797
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c
@@ -0,0 +1,154 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+int mh_sha1_murmur3_x64_128_init(struct mh_sha1_murmur3_x64_128_ctx *ctx, uint64_t murmur_seed)
+{
+ uint64_t *murmur3_x64_128_hash;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint32_t i;
+
+ if (ctx == NULL)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NULL;
+
+ memset(ctx, 0, sizeof(*ctx));
+
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+ for (i = 0; i < HASH_SEGS; i++) {
+ mh_sha1_segs_digests[0][i] = MH_SHA1_H0;
+ mh_sha1_segs_digests[1][i] = MH_SHA1_H1;
+ mh_sha1_segs_digests[2][i] = MH_SHA1_H2;
+ mh_sha1_segs_digests[3][i] = MH_SHA1_H3;
+ mh_sha1_segs_digests[4][i] = MH_SHA1_H4;
+ }
+
+ murmur3_x64_128_hash = (uint64_t *) ctx->murmur3_x64_128_digest;
+ murmur3_x64_128_hash[0] = murmur_seed;
+ murmur3_x64_128_hash[1] = murmur_seed;
+
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+}
+
+void mh_sha1_murmur3_x64_128_block_base(const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t
+ murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks)
+{
+
+ mh_sha1_block_base(input_data, mh_sha1_digests, frame_buffer, num_blocks);
+
+ murmur3_x64_128_block(input_data,
+ num_blocks * MH_SHA1_BLOCK_SIZE / MUR_BLOCK_SIZE,
+ murmur3_x64_128_digests);
+
+ return;
+}
+
+#if (!defined(NOARCH)) && (defined(__i386__) || defined(__x86_64__) \
+ || defined( _M_X64) || defined(_M_IX86))
+/***************mh_sha1_murmur3_x64_128_update***********/
+// mh_sha1_murmur3_x64_128_update_sse.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_sse
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_sse
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+// mh_sha1_murmur3_x64_128_update_avx.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+// mh_sha1_murmur3_x64_128_update_avx2.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx2
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx2
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+/***************mh_sha1_murmur3_x64_128_finalize***********/
+// mh_sha1_murmur3_x64_128_finalize_sse.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_sse
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+// mh_sha1_murmur3_x64_128_finalize_avx.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+// mh_sha1_murmur3_x64_128_finalize_avx2.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx2
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// Version info
+struct slver mh_sha1_murmur3_x64_128_init_slver_00000251;
+struct slver mh_sha1_murmur3_x64_128_init_slver = { 0x0251, 0x00, 0x00 };
+
+// mh_sha1_murmur3_x64_128_update version info
+struct slver mh_sha1_murmur3_x64_128_update_sse_slver_00000254;
+struct slver mh_sha1_murmur3_x64_128_update_sse_slver = { 0x0254, 0x00, 0x00 };
+
+struct slver mh_sha1_murmur3_x64_128_update_avx_slver_02000256;
+struct slver mh_sha1_murmur3_x64_128_update_avx_slver = { 0x0256, 0x00, 0x02 };
+
+struct slver mh_sha1_murmur3_x64_128_update_avx2_slver_04000258;
+struct slver mh_sha1_murmur3_x64_128_update_avx2_slver = { 0x0258, 0x00, 0x04 };
+
+// mh_sha1_murmur3_x64_128_finalize version info
+struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver_00000255;
+struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver = { 0x0255, 0x00, 0x00 };
+
+struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver_02000257;
+struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver = { 0x0257, 0x00, 0x02 };
+
+struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver_04000259;
+struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver = { 0x0259, 0x00, 0x04 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c
new file mode 100644
index 000000000..fbef1ac13
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c
@@ -0,0 +1,67 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <string.h>
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+#ifdef HAVE_AS_KNOWS_AVX512
+
+/***************mh_sha1_murmur3_x64_128_update***********/
+// mh_sha1_murmur3_x64_128_update_avx512.c
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx512
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx512
+#include "mh_sha1_murmur3_x64_128_update_base.c"
+#undef UPDATE_FUNCTION
+#undef BLOCK_FUNCTION
+
+/***************mh_sha1_murmur3_x64_128_finalize***********/
+// mh_sha1_murmur3_x64_128_finalize_avx512.c
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx512
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512
+#include "mh_sha1_murmur3_x64_128_finalize_base.c"
+#undef FINALIZE_FUNCTION
+#undef MH_SHA1_TAIL_FUNCTION
+
+/***************version info***********/
+
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+// mh_sha1_murmur3_x64_128_update version info
+struct slver mh_sha1_murmur3_x64_128_update_avx512_slver_0600025c;
+struct slver mh_sha1_murmur3_x64_128_update_avx512_slver = { 0x025c, 0x00, 0x06 };
+
+// mh_sha1_murmur3_x64_128_finalize version info
+struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver_0600025d;
+struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver = { 0x025d, 0x00, 0x06 };
+
+#endif // HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c
new file mode 100644
index 000000000..28f15086d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c
@@ -0,0 +1,43 @@
+/**********************************************************************
+ Copyright(c) 2019 Arm Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Arm Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+#include "mh_sha1_murmur3_x64_128_internal.h"
+#include <string.h>
+int mh_sha1_murmur3_x64_128_update(struct mh_sha1_murmur3_x64_128_ctx *ctx, const void *buffer,
+ uint32_t len)
+{
+ return mh_sha1_murmur3_x64_128_update_base(ctx, buffer, len);
+
+}
+
+int mh_sha1_murmur3_x64_128_finalize(struct mh_sha1_murmur3_x64_128_ctx *ctx,
+ void *mh_sha1_digest, void *murmur3_x64_128_digest)
+{
+ return mh_sha1_murmur3_x64_128_finalize_base(ctx, mh_sha1_digest,
+ murmur3_x64_128_digest);
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm
new file mode 100644
index 000000000..4611494e0
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm
@@ -0,0 +1,706 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-(%%imm))
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; non-destructive
+; PROLD_nd reg, imm, tmp, src
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-(%%imm))
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 16)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte.
+;; So insert 1 murmur block into every 4 SHA1_STEP_16_79.
+%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J
+
+%macro SHA1_STEP_16_79_0 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c1_r
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c2_r
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_1 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ rol mur_data1, R1
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ rol mur_data2, R2
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c2_r
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c1_r
+ PROLD %%regB,30, %%regT
+ add mur_in_p, 16
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_2 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash1, mur_data1
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash1, R3
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash1, mur_hash2
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_3 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash2, mur_data2
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash2, R4
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash2, mur_hash1
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbx, 10*16 + 6*8
+ save_reg rbp, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+;void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+mk_global mh_sha1_murmur3_x64_128_block_avx, function, internal
+func(mh_sha1_murmur3_x64_128_block_avx)
+ endbranch
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by avx
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ VMOVPS A, [mh_digests_p + I*64 + 16*0]
+ VMOVPS B, [mh_digests_p + I*64 + 16*1]
+ VMOVPS C, [mh_digests_p + I*64 + 16*2]
+ VMOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ vmovdqa [rsp + I*64 + 16*0], A
+ vmovdqa [rsp + I*64 + 16*1], B
+ vmovdqa [rsp + I*64 + 16*2], C
+ vmovdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*16]
+ VMOVPS T1,[mh_in_p + I*64+1*16]
+ VMOVPS T2,[mh_in_p + I*64+2*16]
+ VMOVPS T3,[mh_in_p + I*64+3*16]
+
+ vpshufb T0, F
+ vmovdqa [mh_data_p +(I)*16 +0*256],T0
+ vpshufb T1, F
+ vmovdqa [mh_data_p +(I)*16 +1*256],T1
+ vpshufb T2, F
+ vmovdqa [mh_data_p +(I)*16 +2*256],T2
+ vpshufb T3, F
+ vmovdqa [mh_data_p +(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A, AA
+ vpaddd B, BB
+ vpaddd C, CC
+ vpaddd D, DD
+ vpaddd E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ vmovdqa A, [rsp + I*64 + 16*0]
+ vmovdqa B, [rsp + I*64 + 16*1]
+ vmovdqa C, [rsp + I*64 + 16*2]
+ vmovdqa D, [rsp + I*64 + 16*3]
+
+ VMOVPS [mh_digests_p + I*64 + 16*0], A
+ VMOVPS [mh_digests_p + I*64 + 16*1], B
+ VMOVPS [mh_digests_p + I*64 + 16*2], C
+ VMOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm
new file mode 100644
index 000000000..3fb440bf1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm
@@ -0,0 +1,653 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX2
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;; Magic functions defined in FIPS 180-1
+;;
+;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF, %%regC,%%regD
+ vpand %%regF, %%regF,%%regB
+ vpxor %%regF, %%regF,%%regD
+%endmacro
+
+;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpxor %%regF,%%regD,%%regC
+ vpxor %%regF,%%regF,%%regB
+%endmacro
+
+
+
+;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ vpor %%regF,%%regB,%%regC
+ vpand %%regT,%%regB,%%regC
+ vpand %%regF,%%regF,%%regD
+ vpor %%regF,%%regF,%%regT
+%endmacro
+
+;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ vpsrld %%tmp, %%reg, (32-%%imm)
+ vpslld %%reg, %%reg, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD_nd 4
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+%define %%src %4
+ vpsrld %%tmp, %%src, (32-%%imm)
+ vpslld %%reg, %%src, %%imm
+ vpor %%reg, %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ vpaddd %%regE, %%regE,[%%data + (%%memW * 32)]
+ PROLD_nd %%regT,5, %%regF,%%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE, %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ vpaddd %%regE, %%regE,%%regF
+
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 512Byte.
+;; So insert 1 murmur block into every 2 SHA1_STEP_16_79.
+%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J
+
+%macro SHA1_STEP_16_79_0 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ vpxor W16, W16, W14
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+
+ vpsrld %%regF, W16, (32-1)
+ imul mur_data1, mur_c1_r
+ vpslld W16, W16, 1
+ vpor %%regF, %%regF, W16
+ imul mur_data2, mur_c2_r
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ rol mur_data1, R1
+ vpaddd %%regE, %%regE,%%regF
+ rol mur_data2, R2
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ imul mur_data1, mur_c2_r
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ imul mur_data2, mur_c1_r
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+
+%macro SHA1_STEP_16_79_1 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ vpaddd %%regE, %%regE,%%immCNT
+ xor mur_hash1, mur_data1
+ vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32]
+ rol mur_hash1, R3
+ vpxor W16, W16, W14
+ add mur_hash1, mur_hash2
+ vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32]
+ vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32]
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ vpsrld %%regF, W16, (32-1)
+ vpslld W16, W16, 1
+ xor mur_hash2, mur_data2
+ vpor %%regF, %%regF, W16
+ rol mur_hash2, R4
+ ROTATE_W
+
+ vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF
+ vpaddd %%regE, %%regE,%%regF
+ add mur_hash2, mur_hash1
+ PROLD_nd %%regT,5, %%regF, %%regA
+ vpaddd %%regE, %%regE,%%regT
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ add mur_in_p, 16
+ vpaddd %%regE,%%regE,%%regF
+%endmacro
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbx, 10*16 + 6*8
+ save_reg rbp, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+
+%define pref tmp8
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovups
+
+%define A ymm0
+%define B ymm1
+%define C ymm2
+%define D ymm3
+%define E ymm4
+
+%define F ymm5
+%define T0 ymm6
+%define T1 ymm7
+%define T2 ymm8
+%define T3 ymm9
+%define T4 ymm10
+%define T5 ymm11
+%define T6 ymm12
+%define T7 ymm13
+%define T8 ymm14
+%define T9 ymm15
+
+%define AA ymm5
+%define BB ymm6
+%define CC ymm7
+%define DD ymm8
+%define EE ymm9
+%define TMP ymm10
+%define FUN ymm11
+%define K ymm12
+%define W14 ymm13
+%define W15 ymm14
+%define W16 ymm15
+
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+;void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+mk_global mh_sha1_murmur3_x64_128_block_avx2, function, internal
+func(mh_sha1_murmur3_x64_128_block_avx2)
+ endbranch
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 32 Bytes needed by avx2
+ and rsp, ~0x1F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 2
+ VMOVPS A, [mh_digests_p + I*32*5 + 32*0]
+ VMOVPS B, [mh_digests_p + I*32*5 + 32*1]
+ VMOVPS C, [mh_digests_p + I*32*5 + 32*2]
+ VMOVPS D, [mh_digests_p + I*32*5 + 32*3]
+ VMOVPS E, [mh_digests_p + I*32*5 + 32*4]
+
+ vmovdqa [rsp + I*32*5 + 32*0], A
+ vmovdqa [rsp + I*32*5 + 32*1], B
+ vmovdqa [rsp + I*32*5 + 32*2], C
+ vmovdqa [rsp + I*32*5 + 32*3], D
+ vmovdqa [rsp + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2
+%assign I 0
+%rep 16
+ VMOVPS T0,[mh_in_p + I*64+0*32]
+ VMOVPS T1,[mh_in_p + I*64+1*32]
+
+ vpshufb T0, T0, F
+ vmovdqa [mh_data_p +I*32+0*512],T0
+ vpshufb T1, T1, F
+ vmovdqa [mh_data_p +I*32+1*512],T1
+%assign I (I+1)
+%endrep
+
+ mov mh_segs, 0 ;start from the first 8 segments
+ mov pref, 1024 ;avoid prefetch repeadtedly
+ .segs_loop:
+ ;; Initialize digests
+ vmovdqa A, [rsp + 0*64 + mh_segs]
+ vmovdqa B, [rsp + 1*64 + mh_segs]
+ vmovdqa C, [rsp + 2*64 + mh_segs]
+ vmovdqa D, [rsp + 3*64 + mh_segs]
+ vmovdqa E, [rsp + 4*64 + mh_segs]
+
+ vmovdqa AA, A
+ vmovdqa BB, B
+ vmovdqa CC, C
+ vmovdqa DD, D
+ vmovdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ vmovdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+%assign I (I+1)
+%endrep
+
+;; do rounds 16...19
+ vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32]
+ vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32]
+ %rep 4
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*0]
+ PREFETCH_X [mh_in_p + pref+128*1]
+;; do rounds 20...39
+ vmovdqa K, [K20_39]
+ %rep 20
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+;; do rounds 40...59
+ vmovdqa K, [K40_59]
+ %rep 20
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+ PREFETCH_X [mh_in_p + pref+128*2]
+ PREFETCH_X [mh_in_p + pref+128*3]
+;; do rounds 60...79
+ vmovdqa K, [K60_79]
+ %rep 20
+ %assign J (I % 2)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ vpaddd A,A, AA
+ vpaddd B,B, BB
+ vpaddd C,C, CC
+ vpaddd D,D, DD
+ vpaddd E,E, EE
+
+ ; write out digests
+ vmovdqa [rsp + 0*64 + mh_segs], A
+ vmovdqa [rsp + 1*64 + mh_segs], B
+ vmovdqa [rsp + 2*64 + mh_segs], C
+ vmovdqa [rsp + 3*64 + mh_segs], D
+ vmovdqa [rsp + 4*64 + mh_segs], E
+
+ add pref, 512
+
+ add mh_data_p, 512
+ add mh_segs, 32
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 2
+ vmovdqa A, [rsp + I*32*5 + 32*0]
+ vmovdqa B, [rsp + I*32*5 + 32*1]
+ vmovdqa C, [rsp + I*32*5 + 32*2]
+ vmovdqa D, [rsp + I*32*5 + 32*3]
+ vmovdqa E, [rsp + I*32*5 + 32*4]
+
+ VMOVPS [mh_digests_p + I*32*5 + 32*0], A
+ VMOVPS [mh_digests_p + I*32*5 + 32*1], B
+ VMOVPS [mh_digests_p + I*32*5 + 32*2], C
+ VMOVPS [mh_digests_p + I*32*5 + 32*3], D
+ VMOVPS [mh_digests_p + I*32*5 + 32*4], E
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=32
+
+align 32
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+ dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm
new file mode 100644
index 000000000..a5c157078
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm
@@ -0,0 +1,504 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using AVX-512
+;;
+
+%include "reg_sizes.asm"
+
+%ifdef HAVE_AS_KNOWS_AVX512
+
+[bits 64]
+default rel
+section .text
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define VMOVPS vmovdqu64
+;SIMD variables definition
+%define A zmm0
+%define B zmm1
+%define C zmm2
+%define D zmm3
+%define E zmm4
+%define HH0 zmm5
+%define HH1 zmm6
+%define HH2 zmm7
+%define HH3 zmm8
+%define HH4 zmm9
+%define KT zmm10
+%define XTMP0 zmm11
+%define XTMP1 zmm12
+%define SHUF_MASK zmm13
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;using extra 16 ZMM registers to place the inverse input data
+%define W0 zmm16
+%define W1 zmm17
+%define W2 zmm18
+%define W3 zmm19
+%define W4 zmm20
+%define W5 zmm21
+%define W6 zmm22
+%define W7 zmm23
+%define W8 zmm24
+%define W9 zmm25
+%define W10 zmm26
+%define W11 zmm27
+%define W12 zmm28
+%define W13 zmm29
+%define W14 zmm30
+%define W15 zmm31
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;macros definition
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%macro PROCESS_LOOP 2
+%define %%WT %1
+%define %%F_IMMED %2
+
+ ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt
+ ; E=D, D=C, C=ROTL_30(B), B=A, A=T
+
+ ; Ft
+ ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D)
+ ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D
+ ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D)
+
+ vmovdqa32 XTMP1, B ; Copy B
+ vpaddd E, E, %%WT ; E = E + Wt
+ vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D)
+ vpaddd E, E, KT ; E = E + Wt + Kt
+ vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A)
+ vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt
+ vprold B, B, 30 ; B = ROTL_30(B)
+ vpaddd E, E, XTMP0 ; E = T
+
+ ROTATE_ARGS
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls PROCESS_LOOP 80 and
+;; MSG_SCHED_ROUND_16_79 64 times and processes 1024 Bytes.
+;; So insert 1 murmur block per section_loop.
+%macro PROCESS_LOOP_MUR 2
+%define %%WT %1
+%define %%F_IMMED %2
+
+ ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt
+ ; E=D, D=C, C=ROTL_30(B), B=A, A=T
+
+ ; Ft
+ ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D)
+ ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D
+ ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D)
+
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+ vmovdqa32 XTMP1, B ; Copy B
+ imul mur_data1, mur_c1_r
+ imul mur_data2, mur_c2_r
+ vpaddd E, E, %%WT ; E = E + Wt
+ rol mur_data1, R1
+ rol mur_data2, R2
+ vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D)
+ imul mur_data1, mur_c2_r
+ imul mur_data2, mur_c1_r
+ vpaddd E, E, KT ; E = E + Wt + Kt
+ xor mur_hash1, mur_data1
+ add mur_in_p, 16
+ vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A)
+ rol mur_hash1, R3
+ vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt
+ add mur_hash1, mur_hash2
+ vprold B, B, 30 ; B = ROTL_30(B)
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ vpaddd E, E, XTMP0 ; E = T
+ xor mur_hash2, mur_data2
+
+ ROTATE_ARGS
+%endmacro
+
+%macro MSG_SCHED_ROUND_16_79_MUR 4
+%define %%WT %1
+%define %%WTp2 %2
+%define %%WTp8 %3
+%define %%WTp13 %4
+ ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16)
+ ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt)
+ vpternlogd %%WT, %%WTp2, %%WTp8, 0x96
+ rol mur_hash2, R4
+ vpxord %%WT, %%WT, %%WTp13
+ add mur_hash2, mur_hash1
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ vprold %%WT, %%WT, 1
+%endmacro
+
+%define APPEND(a,b) a %+ b
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ ; remove unwind info macros
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ sub rsp, stack_size
+ movdqa [rsp + 0*16], xmm6
+ movdqa [rsp + 1*16], xmm7
+ movdqa [rsp + 2*16], xmm8
+ movdqa [rsp + 3*16], xmm9
+ movdqa [rsp + 4*16], xmm10
+ movdqa [rsp + 5*16], xmm11
+ movdqa [rsp + 6*16], xmm12
+ movdqa [rsp + 7*16], xmm13
+ movdqa [rsp + 8*16], xmm14
+ movdqa [rsp + 9*16], xmm15
+ mov [rsp + 10*16 + 0*8], r12
+ mov [rsp + 10*16 + 1*8], r13
+ mov [rsp + 10*16 + 2*8], r14
+ mov [rsp + 10*16 + 3*8], r15
+ mov [rsp + 10*16 + 4*8], rdi
+ mov [rsp + 10*16 + 5*8], rsi
+ mov [rsp + 10*16 + 6*8], rbx
+ mov [rsp + 10*16 + 7*8], rbp
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+
+%define pref tmp8
+%macro PREFETCH_X 1
+%define %%mem %1
+ prefetchnta %%mem
+%endmacro
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+[bits 64]
+section .text
+align 32
+
+;void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+global mh_sha1_murmur3_x64_128_block_avx512
+func(mh_sha1_murmur3_x64_128_block_avx512)
+ endbranch
+ FUNC_SAVE
+
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; align rsp to 64 Bytes needed by avx512
+ and rsp, ~0x3f
+
+ ; copy segs_digests into registers.
+ VMOVPS HH0, [mh_digests_p + 64*0]
+ VMOVPS HH1, [mh_digests_p + 64*1]
+ VMOVPS HH2, [mh_digests_p + 64*2]
+ VMOVPS HH3, [mh_digests_p + 64*3]
+ VMOVPS HH4, [mh_digests_p + 64*4]
+ ;a mask used to transform to big-endian data
+ vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK]
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ ;using extra 16 ZMM registers instead of stack
+%assign I 0
+%rep 8
+%assign J (I+1)
+ VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64]
+ VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64]
+
+ vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK
+ vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK
+%assign I (I+2)
+%endrep
+
+ vmovdqa64 A, HH0
+ vmovdqa64 B, HH1
+ vmovdqa64 C, HH2
+ vmovdqa64 D, HH3
+ vmovdqa64 E, HH4
+
+ vmovdqa32 KT, [K00_19]
+%assign I 0xCA
+%assign J 0
+%assign K 2
+%assign L 8
+%assign M 13
+%assign N 0
+%rep 80
+ %if N < 64 ; stitching 64 times
+ PROCESS_LOOP_MUR APPEND(W,J), I
+ MSG_SCHED_ROUND_16_79_MUR APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M)
+ %else ; 64 <= N < 80, without stitching
+ PROCESS_LOOP APPEND(W,J), I
+ %endif
+ %if N = 19
+ vmovdqa32 KT, [K20_39]
+ %assign I 0x96
+ %elif N = 39
+ vmovdqa32 KT, [K40_59]
+ %assign I 0xE8
+ %elif N = 59
+ vmovdqa32 KT, [K60_79]
+ %assign I 0x96
+ %endif
+ %if N % 20 = 19
+ PREFETCH_X [mh_in_p + 1024+128*(N / 20)]
+ PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)]
+ %endif
+%assign J ((J+1)% 16)
+%assign K ((K+1)% 16)
+%assign L ((L+1)% 16)
+%assign M ((M+1)% 16)
+%assign N (N+1)
+%endrep
+
+ ; Add old digest
+ vpaddd HH0,A, HH0
+ vpaddd HH1,B, HH1
+ vpaddd HH2,C, HH2
+ vpaddd HH3,D, HH3
+ vpaddd HH4,E, HH4
+
+ add mh_in_p, 1024
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ ; copy segs_digests to mh_digests_p
+ VMOVPS [mh_digests_p + 64*0], HH0
+ VMOVPS [mh_digests_p + 64*1], HH1
+ VMOVPS [mh_digests_p + 64*2], HH2
+ VMOVPS [mh_digests_p + 64*3], HH3
+ VMOVPS [mh_digests_p + 64*4], HH4
+
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+
+section .data align=64
+
+align 64
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+ dq 0x0405060700010203
+ dq 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+ dq 0x5A8279995A827999
+
+K20_39: dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+ dq 0x6ED9EBA16ED9EBA1
+
+K40_59: dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+ dq 0x8F1BBCDC8F1BBCDC
+
+K60_79: dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+ dq 0xCA62C1D6CA62C1D6
+
+%else
+%ifidn __OUTPUT_FORMAT__, win64
+global no_sha1_murmur3_x64_128_block_avx512
+no_sha1_murmur3_x64_128_block_avx512:
+%endif
+%endif ; HAVE_AS_KNOWS_AVX512
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm
new file mode 100644
index 000000000..ebd1b8b49
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm
@@ -0,0 +1,702 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+;; code to compute 16 SHA1 using SSE
+;;
+
+%include "reg_sizes.asm"
+
+[bits 64]
+default rel
+section .text
+
+;; Magic functions defined in FIPS 180-1
+;;
+; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D)))
+%macro MAGIC_F0 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regC
+ pxor %%regF,%%regD
+ pand %%regF,%%regB
+ pxor %%regF,%%regD
+%endmacro
+
+; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F1 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regD
+ pxor %%regF,%%regC
+ pxor %%regF,%%regB
+%endmacro
+
+; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D))
+%macro MAGIC_F2 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ movdqa %%regF,%%regB
+ movdqa %%regT,%%regB
+ por %%regF,%%regC
+ pand %%regT,%%regC
+ pand %%regF,%%regD
+ por %%regF,%%regT
+%endmacro
+
+; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D)
+%macro MAGIC_F3 5
+%define %%regF %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regT %5
+ MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT
+%endmacro
+
+; PROLD reg, imm, tmp
+%macro PROLD 3
+%define %%reg %1
+%define %%imm %2
+%define %%tmp %3
+ movdqa %%tmp, %%reg
+ pslld %%reg, %%imm
+ psrld %%tmp, (32-%%imm)
+ por %%reg, %%tmp
+%endmacro
+
+%macro SHA1_STEP_00_15 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ paddd %%regE,[%%data + (%%memW * 16)]
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%macro SHA1_STEP_16_79 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+;; Insert murmur's instructions into this macro.
+;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte.
+;; So insert 1 murmur block into every 4 SHA1_STEP_16_79.
+%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J
+
+%macro SHA1_STEP_16_79_0 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ mov mur_data1, [mur_in_p]
+ mov mur_data2, [mur_in_p + 8]
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c1_r
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c2_r
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_1 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ rol mur_data1, R1
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ movdqa %%regF, W16
+ pslld W16, 1
+ rol mur_data2, R2
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ imul mur_data1, mur_c2_r
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ imul mur_data2, mur_c1_r
+ PROLD %%regB,30, %%regT
+ add mur_in_p, 16
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_2 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash1, mur_data1
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash1, R3
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash1, mur_hash2
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1]
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ paddd %%regE,%%regF
+%endmacro
+
+%macro SHA1_STEP_16_79_3 11
+%define %%regA %1
+%define %%regB %2
+%define %%regC %3
+%define %%regD %4
+%define %%regE %5
+%define %%regT %6
+%define %%regF %7
+%define %%memW %8
+%define %%immCNT %9
+%define %%MAGIC %10
+%define %%data %11
+ paddd %%regE,%%immCNT
+ movdqa W14, [%%data + ((%%memW - 14) & 15) * 16]
+ xor mur_hash2, mur_data2
+ pxor W16, W14
+ pxor W16, [%%data + ((%%memW - 8) & 15) * 16]
+ pxor W16, [%%data + ((%%memW - 3) & 15) * 16]
+ rol mur_hash2, R4
+ movdqa %%regF, W16
+ pslld W16, 1
+ psrld %%regF, (32-1)
+ por %%regF, W16
+
+ ROTATE_W
+
+ movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF
+ add mur_hash2, mur_hash1
+ paddd %%regE,%%regF
+ movdqa %%regT,%%regA
+ PROLD %%regT,5, %%regF
+ paddd %%regE,%%regT
+ %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D)
+ PROLD %%regB,30, %%regT
+ lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2]
+ paddd %%regE,%%regF
+%endmacro
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifidn __OUTPUT_FORMAT__, elf64
+ ; Linux
+ %define arg0 rdi
+ %define arg1 rsi
+ %define arg2 rdx
+ %define arg3 rcx
+
+ %define arg4 r8d
+ %define arg5 r9
+
+ %define tmp1 r10
+ %define tmp2 r11
+ %define tmp3 r12 ; must be saved and restored
+ %define tmp4 r13 ; must be saved and restored
+ %define tmp5 r14 ; must be saved and restored
+ %define tmp6 r15 ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define func(x) x:
+ %macro FUNC_SAVE 0
+ push r12
+ push r13
+ push r14
+ push r15
+ push rbx
+ push rbp
+ %endmacro
+ %macro FUNC_RESTORE 0
+ pop rbp
+ pop rbx
+ pop r15
+ pop r14
+ pop r13
+ pop r12
+ %endmacro
+%else
+ ; Windows
+ %define arg0 rcx
+ %define arg1 rdx
+ %define arg2 r8
+ %define arg3 r9
+
+ %define arg4 r10d
+ %define arg5 r11
+ %define tmp1 r12 ; must be saved and restored
+ %define tmp2 r13 ; must be saved and restored
+ %define tmp3 r14 ; must be saved and restored
+ %define tmp4 r15 ; must be saved and restored
+ %define tmp5 rdi ; must be saved and restored
+ %define tmp6 rsi ; must be saved and restored
+ %define tmp7 rbx ; must be saved and restored
+ %define tmp8 rbp ; must be saved and restored
+ %define return rax
+
+ %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8
+ %define PS 8
+ %define arg(x) [rsp + stack_size + PS + PS*x]
+ %define func(x) proc_frame x
+ %macro FUNC_SAVE 0
+ alloc_stack stack_size
+ save_xmm128 xmm6, 0*16
+ save_xmm128 xmm7, 1*16
+ save_xmm128 xmm8, 2*16
+ save_xmm128 xmm9, 3*16
+ save_xmm128 xmm10, 4*16
+ save_xmm128 xmm11, 5*16
+ save_xmm128 xmm12, 6*16
+ save_xmm128 xmm13, 7*16
+ save_xmm128 xmm14, 8*16
+ save_xmm128 xmm15, 9*16
+ save_reg r12, 10*16 + 0*8
+ save_reg r13, 10*16 + 1*8
+ save_reg r14, 10*16 + 2*8
+ save_reg r15, 10*16 + 3*8
+ save_reg rdi, 10*16 + 4*8
+ save_reg rsi, 10*16 + 5*8
+ save_reg rbx, 10*16 + 6*8
+ save_reg rbp, 10*16 + 7*8
+ end_prolog
+ mov arg4, arg(4)
+ %endmacro
+
+ %macro FUNC_RESTORE 0
+ movdqa xmm6, [rsp + 0*16]
+ movdqa xmm7, [rsp + 1*16]
+ movdqa xmm8, [rsp + 2*16]
+ movdqa xmm9, [rsp + 3*16]
+ movdqa xmm10, [rsp + 4*16]
+ movdqa xmm11, [rsp + 5*16]
+ movdqa xmm12, [rsp + 6*16]
+ movdqa xmm13, [rsp + 7*16]
+ movdqa xmm14, [rsp + 8*16]
+ movdqa xmm15, [rsp + 9*16]
+ mov r12, [rsp + 10*16 + 0*8]
+ mov r13, [rsp + 10*16 + 1*8]
+ mov r14, [rsp + 10*16 + 2*8]
+ mov r15, [rsp + 10*16 + 3*8]
+ mov rdi, [rsp + 10*16 + 4*8]
+ mov rsi, [rsp + 10*16 + 5*8]
+ mov rbx, [rsp + 10*16 + 6*8]
+ mov rbp, [rsp + 10*16 + 7*8]
+ add rsp, stack_size
+ %endmacro
+%endif
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define loops arg4
+;variables of mh_sha1
+%define mh_in_p arg0
+%define mh_digests_p arg1
+%define mh_data_p arg2
+%define mh_segs tmp1
+;variables of murmur3
+%define mur_in_p tmp2
+%define mur_digest_p arg3
+%define mur_hash1 tmp3
+%define mur_hash2 tmp4
+%define mur_data1 tmp5
+%define mur_data2 return
+%define mur_c1_r tmp6
+%define mur_c2_r arg5
+; constants of murmur3_x64_128
+%define R1 31
+%define R2 33
+%define R3 27
+%define R4 31
+%define M 5
+%define N1 0x52dce729;DWORD
+%define N2 0x38495ab5;DWORD
+%define C1 QWORD(0x87c37b91114253d5)
+%define C2 QWORD(0x4cf5ad432745937f)
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;variables used by storing segs_digests on stack
+%define RSP_SAVE tmp7
+%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%define MOVPS movups
+
+%define A xmm0
+%define B xmm1
+%define C xmm2
+%define D xmm3
+%define E xmm4
+%define F xmm5 ; tmp
+%define G xmm6 ; tmp
+
+%define TMP G
+%define FUN F
+%define K xmm7
+
+%define AA xmm8
+%define BB xmm9
+%define CC xmm10
+%define DD xmm11
+%define EE xmm12
+
+%define T0 xmm6
+%define T1 xmm7
+%define T2 xmm8
+%define T3 xmm9
+%define T4 xmm10
+%define T5 xmm11
+
+%macro ROTATE_ARGS 0
+%xdefine TMP_ E
+%xdefine E D
+%xdefine D C
+%xdefine C B
+%xdefine B A
+%xdefine A TMP_
+%endm
+
+%define W14 xmm13
+%define W15 xmm14
+%define W16 xmm15
+
+%macro ROTATE_W 0
+%xdefine TMP_ W16
+%xdefine W16 W15
+%xdefine W15 W14
+%xdefine W14 TMP_
+%endm
+
+
+;init hash digests
+; segs_digests:low addr-> high_addr
+; a | b | c | ...| p | (16)
+; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap |
+; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp |
+; ....
+; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep |
+
+align 32
+;void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data,
+; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+; uint32_t num_blocks);
+; arg 0 pointer to input data
+; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5])
+; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data.
+; arg 3 pointer to murmur3 digest
+; arg 4 number of 1KB blocks
+;
+mk_global mh_sha1_murmur3_x64_128_block_sse, function, internal
+func(mh_sha1_murmur3_x64_128_block_sse)
+ endbranch
+ FUNC_SAVE
+ ; save rsp
+ mov RSP_SAVE, rsp
+
+ cmp loops, 0
+ jle .return
+
+ ; leave enough space to store segs_digests
+ sub rsp, FRAMESZ
+ ; align rsp to 16 Bytes needed by sse
+ and rsp, ~0x0F
+
+ %assign I 0 ; copy segs_digests into stack
+ %rep 5
+ MOVPS A, [mh_digests_p + I*64 + 16*0]
+ MOVPS B, [mh_digests_p + I*64 + 16*1]
+ MOVPS C, [mh_digests_p + I*64 + 16*2]
+ MOVPS D, [mh_digests_p + I*64 + 16*3]
+
+ movdqa [rsp + I*64 + 16*0], A
+ movdqa [rsp + I*64 + 16*1], B
+ movdqa [rsp + I*64 + 16*2], C
+ movdqa [rsp + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+
+ ;init murmur variables
+ mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1
+ ;load murmur hash digests and multiplier
+ mov mur_hash1, [mur_digest_p]
+ mov mur_hash2, [mur_digest_p + 8]
+ mov mur_c1_r, C1
+ mov mur_c2_r, C2
+
+.block_loop:
+ ;transform to big-endian data and store on aligned_frame
+ movdqa F, [PSHUFFLE_BYTE_FLIP_MASK]
+ ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4
+ %assign I 0
+ %rep 16
+ MOVPS T0,[mh_in_p+I*64+0*16]
+ MOVPS T1,[mh_in_p+I*64+1*16]
+ MOVPS T2,[mh_in_p+I*64+2*16]
+ MOVPS T3,[mh_in_p+I*64+3*16]
+
+ pshufb T0, F
+ movdqa [mh_data_p+(I)*16 +0*256],T0
+ pshufb T1, F
+ movdqa [mh_data_p+(I)*16 +1*256],T1
+ pshufb T2, F
+ movdqa [mh_data_p+(I)*16 +2*256],T2
+ pshufb T3, F
+ movdqa [mh_data_p+(I)*16 +3*256],T3
+ %assign I (I+1)
+ %endrep
+
+ mov mh_segs, 0 ;start from the first 4 segments
+ .segs_loop:
+ ;; Initialize digests
+ movdqa A, [rsp + 0*64 + mh_segs]
+ movdqa B, [rsp + 1*64 + mh_segs]
+ movdqa C, [rsp + 2*64 + mh_segs]
+ movdqa D, [rsp + 3*64 + mh_segs]
+ movdqa E, [rsp + 4*64 + mh_segs]
+
+ movdqa AA, A
+ movdqa BB, B
+ movdqa CC, C
+ movdqa DD, D
+ movdqa EE, E
+;;
+;; perform 0-79 steps
+;;
+ movdqa K, [K00_19]
+;; do rounds 0...15
+ %assign I 0
+ %rep 16
+ SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 16...19
+ movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16]
+ movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16]
+ %rep 4
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 20...39
+ movdqa K, [K20_39]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 40...59
+ movdqa K, [K40_59]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+;; do rounds 60...79
+ movdqa K, [K60_79]
+ %rep 20
+ %assign J (I % 4)
+ SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p
+ ROTATE_ARGS
+ %assign I (I+1)
+ %endrep
+
+ paddd A, AA
+ paddd B, BB
+ paddd C, CC
+ paddd D, DD
+ paddd E, EE
+
+ ; write out digests
+ movdqa [rsp + 0*64 + mh_segs], A
+ movdqa [rsp + 1*64 + mh_segs], B
+ movdqa [rsp + 2*64 + mh_segs], C
+ movdqa [rsp + 3*64 + mh_segs], D
+ movdqa [rsp + 4*64 + mh_segs], E
+
+ add mh_data_p, 256
+ add mh_segs, 16
+ cmp mh_segs, 64
+ jc .segs_loop
+
+ sub mh_data_p, (1024)
+ add mh_in_p, (1024)
+ sub loops, 1
+ jne .block_loop
+
+ ;store murmur-hash digest
+ mov [mur_digest_p], mur_hash1
+ mov [mur_digest_p + 8], mur_hash2
+
+ %assign I 0 ; copy segs_digests back to mh_digests_p
+ %rep 5
+ movdqa A, [rsp + I*64 + 16*0]
+ movdqa B, [rsp + I*64 + 16*1]
+ movdqa C, [rsp + I*64 + 16*2]
+ movdqa D, [rsp + I*64 + 16*3]
+
+ MOVPS [mh_digests_p + I*64 + 16*0], A
+ MOVPS [mh_digests_p + I*64 + 16*1], B
+ MOVPS [mh_digests_p + I*64 + 16*2], C
+ MOVPS [mh_digests_p + I*64 + 16*3], D
+ %assign I (I+1)
+ %endrep
+ mov rsp, RSP_SAVE ; restore rsp
+
+.return:
+ FUNC_RESTORE
+ ret
+
+endproc_frame
+
+section .data align=16
+
+align 16
+PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b
+
+K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999
+K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1
+K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC
+K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c
new file mode 100644
index 000000000..4d09abf1d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c
@@ -0,0 +1,102 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef FINALIZE_FUNCTION
+#include <stdlib.h> // For NULL
+#include "mh_sha1_murmur3_x64_128_internal.h"
+
+#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_base
+#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base
+#define FINALIZE_FUNCTION_SLVER
+#endif
+
+#define MURMUR_BLOCK_FUNCTION murmur3_x64_128_block
+#define MURMUR_TAIL_FUNCTION murmur3_x64_128_tail
+
+int FINALIZE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, void *mh_sha1_digest,
+ void *murmur3_x64_128_digest)
+{
+ uint8_t *partial_block_buffer, *murmur_tail_data;
+ uint64_t partial_block_len, total_len;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NULL;
+
+ total_len = ctx->total_length;
+ partial_block_len = total_len % MH_SHA1_BLOCK_SIZE;
+ partial_block_buffer = ctx->partial_block_buffer;
+
+ // Calculate murmur3 firstly
+ // because mh_sha1 will change the partial_block_buffer
+ // ( partial_block_buffer = n murmur3 blocks and 1 murmur3 tail)
+ murmur_tail_data =
+ partial_block_buffer + partial_block_len - partial_block_len % MUR_BLOCK_SIZE;
+ MURMUR_BLOCK_FUNCTION(partial_block_buffer, partial_block_len / MUR_BLOCK_SIZE,
+ ctx->murmur3_x64_128_digest);
+ MURMUR_TAIL_FUNCTION(murmur_tail_data, total_len, ctx->murmur3_x64_128_digest);
+
+ /* mh_sha1 final */
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+
+ MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests,
+ aligned_frame_buffer, ctx->mh_sha1_digest);
+
+ /* Output the digests of murmur3 and mh_sha1 */
+ if (mh_sha1_digest != NULL) {
+ ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0];
+ ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1];
+ ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2];
+ ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3];
+ ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4];
+ }
+
+ if (murmur3_x64_128_digest != NULL) {
+ ((uint32_t *) murmur3_x64_128_digest)[0] = ctx->murmur3_x64_128_digest[0];
+ ((uint32_t *) murmur3_x64_128_digest)[1] = ctx->murmur3_x64_128_digest[1];
+ ((uint32_t *) murmur3_x64_128_digest)[2] = ctx->murmur3_x64_128_digest[2];
+ ((uint32_t *) murmur3_x64_128_digest)[3] = ctx->murmur3_x64_128_digest[3];
+ }
+
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+}
+
+#ifdef FINALIZE_FUNCTION_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+ // Version info
+struct slver mh_sha1_murmur3_x64_128_finalize_base_slver_0000025b;
+struct slver mh_sha1_murmur3_x64_128_finalize_base_slver = { 0x025b, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h
new file mode 100644
index 000000000..e77837347
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h
@@ -0,0 +1,202 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_
+#define _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_
+
+/**
+ * @file mh_sha1_murmur3_x64_128_internal.h
+ * @brief mh_sha1_murmur3_x64_128 internal function prototypes and macros
+ *
+ * Interface for mh_sha1_murmur3_x64_128 internal functions
+ *
+ */
+#include <stdint.h>
+#include "mh_sha1_internal.h"
+#include "mh_sha1_murmur3_x64_128.h"
+
+#ifdef __cplusplus
+ extern "C" {
+#endif
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+ /*******************************************************************
+ * mh_sha1_murmur3_x64_128 API internal function prototypes
+ * Multiple versions of Update and Finalize functions are supplied which use
+ * multiple versions of block and tail process subfunctions.
+ ******************************************************************/
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ // Each function needs an individual C or ASM file because they impact performance much.
+ //They will be called by mh_sha1_murmur3_x64_128_update_XXX.
+ void mh_sha1_murmur3_x64_128_block (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_base (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires SSE
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires AVX
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires AVX2
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+
+ /**
+ * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N
+ *
+ * @requires AVX512
+ *
+ * @param input_data Pointer to input data to be processed
+ * @param mh_sha1_digests 16 segments digests
+ * @param frame_buffer Pointer to buffer which is a temp working area
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @param num_blocks The number of blocks.
+ * @returns none
+ *
+ */
+ void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data,
+ uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS],
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE],
+ uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t num_blocks);
+ /*******************************************************************
+ * murmur hash API
+ ******************************************************************/
+
+ /**
+ * @brief Calculate murmur digest of blocks which size is 16*N.
+ * @param input_data Pointer to input data to be processed
+ * @param num_blocks The number of blocks which size is 16.
+ * @param murmur3_x64_128_digests Murmur3 digest
+ * @returns none
+ *
+ */
+ void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]);
+
+ /**
+ * @brief Do the tail process which is less than 16Byte.
+ * @param tail_buffer Pointer to input data to be processed
+ * @param total_len The total length of the input_data
+ * @param digests Murmur3 digest
+ * @returns none
+ *
+ */
+ void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm
new file mode 100644
index 000000000..6f9e54cdd
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm
@@ -0,0 +1,76 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%include "reg_sizes.asm"
+%include "multibinary.asm"
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ [bits 32]
+%else
+ default rel
+ [bits 64]
+
+ extern mh_sha1_murmur3_x64_128_update_sse
+ extern mh_sha1_murmur3_x64_128_update_avx
+ extern mh_sha1_murmur3_x64_128_update_avx2
+ extern mh_sha1_murmur3_x64_128_finalize_sse
+ extern mh_sha1_murmur3_x64_128_finalize_avx
+ extern mh_sha1_murmur3_x64_128_finalize_avx2
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ extern mh_sha1_murmur3_x64_128_update_avx512
+ extern mh_sha1_murmur3_x64_128_finalize_avx512
+ %endif
+
+%endif
+
+extern mh_sha1_murmur3_x64_128_update_base
+extern mh_sha1_murmur3_x64_128_finalize_base
+
+mbin_interface mh_sha1_murmur3_x64_128_update
+mbin_interface mh_sha1_murmur3_x64_128_finalize
+
+%ifidn __OUTPUT_FORMAT__, elf64
+
+ %ifdef HAVE_AS_KNOWS_AVX512
+ mbin_dispatch_init6 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2, mh_sha1_murmur3_x64_128_update_avx512
+ mbin_dispatch_init6 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2, mh_sha1_murmur3_x64_128_finalize_avx512
+ %else
+ mbin_dispatch_init5 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2
+ mbin_dispatch_init5 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2
+ %endif
+
+%else
+ mbin_dispatch_init2 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base
+ mbin_dispatch_init2 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base
+%endif
+
+;;; func core, ver, snum
+slversion mh_sha1_murmur3_x64_128_update, 00, 02, 0252
+slversion mh_sha1_murmur3_x64_128_finalize, 00, 02, 0253
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c
new file mode 100644
index 000000000..77ebb964e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c
@@ -0,0 +1,206 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1_murmur3_x64_128.h"
+#include "test.h"
+
+//#define CACHED_TEST
+#ifdef CACHED_TEST
+// Loop many times over same
+# define TEST_LEN 16*1024
+# define TEST_LOOPS 20000
+# define TEST_TYPE_STR "_warm"
+#else
+// Uncached test. Pull from large mem base.
+# define TEST_LEN 32*1024*1024
+# define TEST_LOOPS 100
+# define TEST_TYPE_STR "_cold"
+#endif
+
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+#define TEST_MEM TEST_LEN
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \
+ printf("The stitch function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest);
+
+void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest)
+{
+ mh_sha1_ref(buffer, len, mh_sha1_digest);
+ murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest);
+
+ return;
+}
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS],
+ uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+ int murmur3_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) {
+ if (murmur3_test[i] != murmur3_base[i])
+ murmur3_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+ if (murmur3_fail) {
+ printf("murmur3 fail test\n");
+ printf("base: ");
+ dump((char *)murmur3_base, 16);
+ printf("ref: ");
+ dump((char *)murmur3_test, 16);
+ }
+
+ return mh_sha1_fail + murmur3_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int i, fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS],
+ murmur3_base[MURMUR3_x64_128_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL;
+ struct perf start, stop;
+
+ printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n");
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ // mh_sha1_murmur3 base version
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base);
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS / 10; i++) {
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+ }
+ perf_stop(&stop);
+ printf("mh_sha1_murmur3_x64_128_base" TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ //Update feature test
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ perf_start(&start);
+ for (i = 0; i < TEST_LOOPS; i++) {
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+ }
+ perf_stop(&stop);
+ printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": ");
+ perf_print(stop, start, (long long)TEST_MEM * i);
+
+ // Check results
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", TEST_LEN);
+ return -1;
+ }
+
+ if (fail)
+ printf("Test failed function test%d\n", fail);
+ else
+ printf("Pass func check\n");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c
new file mode 100644
index 000000000..22ab6d1f9
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c
@@ -0,0 +1,248 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1_murmur3_x64_128.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \
+ printf("The stitch function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest);
+
+void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest)
+{
+ mh_sha1_ref(buffer, len, mh_sha1_digest);
+ murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest);
+
+ return;
+}
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS],
+ uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+ int murmur3_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) {
+ if (murmur3_test[i] != murmur3_base[i])
+ murmur3_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+ if (murmur3_fail) {
+ printf("murmur3 fail test\n");
+ printf("base: ");
+ dump((char *)murmur3_base, 16);
+ printf("ref: ");
+ dump((char *)murmur3_test, 16);
+ }
+
+ return mh_sha1_fail + murmur3_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS],
+ murmur3_base[MURMUR3_x64_128_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int size, offset;
+ struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL;
+
+ printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages
+ for (size = TEST_LEN; size >= 0; size--) {
+
+ // Fill with rand data
+ rand_buffer(buff, size);
+
+ mh_sha1_murmur3_x64_128_base(buff, size, TEST_SEED, hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d\n", size);
+ return -1;
+ }
+
+ if ((size & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various buffer offsets and sizes
+ printf("offset tests");
+ for (size = TEST_LEN - 256; size > 256; size -= 11) {
+ for (offset = 0; offset < 256; offset++) {
+ mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED,
+ hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail =
+ compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d offset=%d\n", size, offset);
+ return -1;
+ }
+
+ }
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Run efence tests
+ printf("efence tests");
+ for (size = TEST_SIZE; size > 0; size--) {
+ offset = TEST_LEN - size;
+ mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED,
+ hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size=%d offset=%d\n", size, offset);
+ return -1;
+ }
+
+ if ((size & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c
new file mode 100644
index 000000000..0e7a3970d
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c
@@ -0,0 +1,107 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef UPDATE_FUNCTION
+#include "mh_sha1_murmur3_x64_128_internal.h"
+#include <string.h>
+
+#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_base
+#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_base
+#define UPDATE_FUNCTION_SLVER
+#endif
+
+int UPDATE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, const void *buffer, uint32_t len)
+{
+
+ uint8_t *partial_block_buffer;
+ uint64_t partial_block_len;
+ uint64_t num_blocks;
+ uint32_t(*mh_sha1_segs_digests)[HASH_SEGS];
+ uint8_t *aligned_frame_buffer;
+ uint32_t *murmur3_x64_128_digest;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ if (ctx == NULL)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NULL;
+
+ if (len == 0)
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+
+ partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE;
+ partial_block_buffer = ctx->partial_block_buffer;
+ aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer);
+ mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests;
+ murmur3_x64_128_digest = ctx->murmur3_x64_128_digest;
+
+ ctx->total_length += len;
+ // No enough input data for mh_sha1 calculation
+ if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) {
+ memcpy(partial_block_buffer + partial_block_len, input_data, len);
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+ }
+ // mh_sha1 calculation for the previous partial block
+ if (partial_block_len != 0) {
+ memcpy(partial_block_buffer + partial_block_len, input_data,
+ MH_SHA1_BLOCK_SIZE - partial_block_len);
+ //do one_block process
+ BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests,
+ aligned_frame_buffer, murmur3_x64_128_digest, 1);
+ input_data += MH_SHA1_BLOCK_SIZE - partial_block_len;
+ len -= MH_SHA1_BLOCK_SIZE - partial_block_len;
+ memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE);
+ }
+ // Calculate mh_sha1 for the current blocks
+ num_blocks = len / MH_SHA1_BLOCK_SIZE;
+ if (num_blocks > 0) {
+ //do num_blocks process
+ BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer,
+ murmur3_x64_128_digest, num_blocks);
+ len -= num_blocks * MH_SHA1_BLOCK_SIZE;
+ input_data += num_blocks * MH_SHA1_BLOCK_SIZE;
+ }
+ // Store the partial block
+ if (len != 0) {
+ memcpy(partial_block_buffer, input_data, len);
+ }
+
+ return MH_SHA1_MURMUR3_CTX_ERROR_NONE;
+
+}
+
+#ifdef UPDATE_FUNCTION_SLVER
+struct slver {
+ uint16_t snum;
+ uint8_t ver;
+ uint8_t core;
+};
+
+ // Version info
+struct slver mh_sha1_murmur3_x64_128_update_base_slver_0000025a;
+struct slver mh_sha1_murmur3_x64_128_update_base_slver = { 0x025a, 0x00, 0x00 };
+#endif
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c
new file mode 100644
index 000000000..6ae888e21
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c
@@ -0,0 +1,272 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "mh_sha1_murmur3_x64_128.h"
+
+#define TEST_LEN 16*1024
+#define TEST_SIZE 8*1024
+#define TEST_MEM TEST_LEN
+#ifndef TEST_SEED
+# define TEST_SEED 0x1234
+#endif
+
+#define str(s) #s
+#define xstr(s) str(s)
+
+#define _FUNC_TOKEN(func, type) func##type
+#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type)
+
+#ifndef MH_SHA1_FUNC_TYPE
+#define MH_SHA1_FUNC_TYPE
+#endif
+
+#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE)
+#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE)
+
+#define CHECK_RETURN(state) do{ \
+ if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \
+ printf("The stitch function is failed.\n"); \
+ return 1; \
+ } \
+ }while(0)
+
+extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest);
+
+extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest);
+
+void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest)
+{
+ mh_sha1_ref(buffer, len, mh_sha1_digest);
+ murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest);
+
+ return;
+}
+
+// Generates pseudo-random data
+void rand_buffer(uint8_t * buf, long buffer_size)
+{
+ long i;
+ for (i = 0; i < buffer_size; i++)
+ buf[i] = rand();
+}
+
+void dump(char *buf, int len)
+{
+ int i;
+ for (i = 0; i < len;) {
+ printf(" %2x", 0xff & buf[i++]);
+ if (i % 20 == 0)
+ printf("\n");
+ }
+ if (i % 20 != 0)
+ printf("\n");
+}
+
+int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS],
+ uint32_t hash_test[SHA1_DIGEST_WORDS],
+ uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS],
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ int i;
+ int mh_sha1_fail = 0;
+ int murmur3_fail = 0;
+
+ for (i = 0; i < SHA1_DIGEST_WORDS; i++) {
+ if (hash_test[i] != hash_base[i])
+ mh_sha1_fail++;
+ }
+
+ for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) {
+ if (murmur3_test[i] != murmur3_base[i])
+ murmur3_fail++;
+ }
+
+ if (mh_sha1_fail) {
+ printf("mh_sha1 fail test\n");
+ printf("base: ");
+ dump((char *)hash_base, 20);
+ printf("ref: ");
+ dump((char *)hash_test, 20);
+ }
+ if (murmur3_fail) {
+ printf("murmur3 fail test\n");
+ printf("base: ");
+ dump((char *)murmur3_base, 16);
+ printf("ref: ");
+ dump((char *)murmur3_test, 16);
+ }
+
+ return mh_sha1_fail + murmur3_fail;
+}
+
+int main(int argc, char *argv[])
+{
+ int fail = 0, i;
+ uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS];
+ uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS],
+ murmur3_base[MURMUR3_x64_128_DIGEST_WORDS];
+ uint8_t *buff = NULL;
+ int update_count;
+ int size1, size2, offset, addr_offset;
+ struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL;
+ uint8_t *mem_addr = NULL;
+
+ printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:");
+
+ srand(TEST_SEED);
+
+ buff = malloc(TEST_LEN);
+ update_ctx = malloc(sizeof(*update_ctx));
+
+ if (buff == NULL || update_ctx == NULL) {
+ printf("malloc failed test aborted\n");
+ return -1;
+ }
+ // Rand test1
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base);
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("fail rand1 test\n");
+ return -1;
+ } else
+ putchar('.');
+
+ // Test various size messages by update twice.
+ printf("\n various size messages by update twice tests");
+ for (size1 = TEST_LEN; size1 >= 0; size1--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+
+ // subsequent update
+ size2 = TEST_LEN - size1; // size2 is different with the former
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // Test various update count
+ printf("\n various update count tests");
+ for (update_count = 1; update_count <= TEST_LEN; update_count++) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+
+ // subsequent update
+ size1 = TEST_LEN / update_count;
+ size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former
+
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ for (i = 1, offset = 0; i < update_count; i++) {
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1));
+ offset += size1;
+ }
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail size1=%d\n", size1);
+ return -1;
+ }
+
+ if ((size2 & 0xff) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ // test various start address of ctx.
+ printf("\n various start address of ctx test");
+ free(update_ctx);
+ mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10);
+ for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) {
+
+ // Fill with rand data
+ rand_buffer(buff, TEST_LEN);
+
+ mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base,
+ murmur3_base);
+
+ // a unaligned offset
+ update_ctx = (struct mh_sha1_murmur3_x64_128_ctx *)(mem_addr + addr_offset);
+ CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED));
+ CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN));
+ CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test));
+
+ fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test);
+
+ if (fail) {
+ printf("Fail addr_offset=%d\n", addr_offset);
+ return -1;
+ }
+
+ if ((addr_offset & 0xf) == 0) {
+ putchar('.');
+ fflush(0);
+ }
+ }
+
+ printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail");
+
+ return fail;
+
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c
new file mode 100644
index 000000000..f5fe30a83
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c
@@ -0,0 +1,85 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include <stdlib.h> // for NULL
+#include "murmur3_x64_128_internal.c"
+
+#if (__GNUC__ >= 11)
+# define OPT_FIX2 __attribute__ ((optimize(1)))
+#else
+# define OPT_FIX2
+#endif
+
+/*******************************************************************
+ * Single API which can calculate murmur3
+ ******************************************************************/
+/**
+ * @brief Get the digest of murmur3_x64_128 through a single API.
+ *
+ * Using murmur3_x64_128_block and murmur3_x64_128_tail.
+ * Used to test the murmur3_x64_128 digest.
+ *
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param murmur_seed Seed as an initial digest of murmur3
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns none
+ *
+ */
+void OPT_FIX2 murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed,
+ uint32_t * murmur3_x64_128_digest)
+{
+ uint64_t *murmur3_x64_128_hash;
+ uint32_t murmur3_x64_128_hash_dword[4];
+ uint8_t *tail_buffer;
+ const uint8_t *input_data = (const uint8_t *)buffer;
+
+ // Initiate murmur3
+ murmur3_x64_128_hash = (uint64_t *) murmur3_x64_128_hash_dword;
+ murmur3_x64_128_hash[0] = murmur_seed;
+ murmur3_x64_128_hash[1] = murmur_seed;
+
+ // process bodies
+ murmur3_x64_128_block((uint8_t *) input_data, len / MUR_BLOCK_SIZE,
+ murmur3_x64_128_hash_dword);
+
+ // process finalize
+ tail_buffer = (uint8_t *) input_data + len - len % MUR_BLOCK_SIZE;
+ murmur3_x64_128_tail(tail_buffer, len, murmur3_x64_128_hash_dword);
+
+ // output the digests
+ if (murmur3_x64_128_digest != NULL) {
+ murmur3_x64_128_digest[0] = murmur3_x64_128_hash_dword[0];
+ murmur3_x64_128_digest[1] = murmur3_x64_128_hash_dword[1];
+ murmur3_x64_128_digest[2] = murmur3_x64_128_hash_dword[2];
+ murmur3_x64_128_digest[3] = murmur3_x64_128_hash_dword[3];
+ }
+
+ return;
+}
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c
new file mode 100644
index 000000000..67eabd0c4
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c
@@ -0,0 +1,138 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#include "mh_sha1_murmur3_x64_128_internal.h"
+#include <stdlib.h> // for NULL
+
+/* murmur3_x64_128 constants */
+// Shift bits of circle rotate
+#define MUR_SH1 31
+#define MUR_SH2 33
+#define MUR_SH3 27
+#define MUR_SH4 31
+#define MUR_SH5 33
+
+#define MUR_MUL 5
+#define MUR_ADD1 0x52dce729
+#define MUR_ADD2 0x38495ab5
+
+#define MUR_CON1 0x87c37b91114253d5LLU
+#define MUR_CON2 0x4cf5ad432745937fLLU
+
+#define MUR_FMUL1 0xff51afd7ed558ccdLLU
+#define MUR_FMUL2 0xc4ceb9fe1a85ec53LLU
+
+/* murmur3_x64_128 inline functions */
+static inline uint64_t blockmix64(uint64_t data, uint64_t conA, uint64_t conB, uint64_t shift)
+{
+ data *= conA;
+ data = (data << shift) | (data >> (64 - shift));
+ data *= conB;
+ return data;
+}
+
+static inline uint64_t hashmix64(uint64_t hashA, uint64_t hashB, uint64_t data, uint64_t add,
+ uint64_t shift)
+{
+ hashA ^= data;
+ hashA = (hashA << shift) | (hashA >> (64 - shift));
+ hashA += hashB;
+ hashA = hashA * MUR_MUL + add;
+ return hashA;
+}
+
+void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ uint64_t data1, data2;
+ uint64_t *input_qword = (uint64_t *) input_data;
+ uint64_t *hash = (uint64_t *) digests;
+ uint32_t i = 0;
+
+ while (i < num_blocks) {
+ data1 = input_qword[i * 2];
+ data2 = input_qword[i * 2 + 1];
+ data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1);
+ data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2);
+ hash[0] = hashmix64(hash[0], hash[1], data1, MUR_ADD1, MUR_SH3);
+ hash[1] = hashmix64(hash[1], hash[0], data2, MUR_ADD2, MUR_SH4);
+ i++;
+ }
+
+ return;
+}
+
+void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len,
+ uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS])
+{
+ uint64_t data1, data2;
+ uint64_t *hash = (uint64_t *) digests;
+ uint64_t tail_len = total_len % 16;
+ uint8_t *tail = (uint8_t *) tail_buffer;
+
+ union {
+ uint64_t hash[2];
+ uint8_t hashB[16];
+ } hashU;
+
+ // tail
+ hashU.hash[0] = hashU.hash[1] = 0;
+
+ while (tail_len-- > 0)
+ hashU.hashB[tail_len] = tail[tail_len];
+
+ data1 = hashU.hash[0];
+ data2 = hashU.hash[1];
+
+ data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1);
+ data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2);
+
+ hash[0] ^= total_len ^ data1;
+ hash[1] ^= total_len ^ data2;
+
+ hash[0] += hash[1];
+ hash[1] += hash[0];
+
+ hash[0] ^= hash[0] >> MUR_SH5;
+ hash[0] *= MUR_FMUL1;
+ hash[0] ^= hash[0] >> MUR_SH5;
+ hash[0] *= MUR_FMUL2;
+ hash[0] ^= hash[0] >> MUR_SH5;
+
+ hash[1] ^= hash[1] >> MUR_SH5;
+ hash[1] *= MUR_FMUL1;
+ hash[1] ^= hash[1] >> MUR_SH5;
+ hash[1] *= MUR_FMUL2;
+ hash[1] ^= hash[1] >> MUR_SH5;
+
+ hash[0] += hash[1];
+ hash[1] += hash[0];
+
+ return;
+}