diff options
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128')
16 files changed, 4273 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am new file mode 100644 index 000000000..98cd59efc --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am @@ -0,0 +1,71 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +#Requires unit mh_sha1 +AM_CFLAGS += -I mh_sha1 + +lsrc_murmur = mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c + +lsrc_stitch = mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm + +lsrc_stitch += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm + +lsrc += $(lsrc_murmur) \ + $(lsrc_stitch) + +other_src += include/reg_sizes.asm \ + include/multibinary.asm \ + include/test.h \ + mh_sha1/mh_sha1_internal.h \ + mh_sha1_murmur3_x64_128/murmur3_x64_128.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h + +extern_hdrs += include/mh_sha1_murmur3_x64_128.h + +unit_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test + +perf_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf + + +mh_sha1_murmur3_x64_128_test: mh_sha1_ref.o murmur3_x64_128.o +mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la + +mh_sha1_murmur3_x64_128_update_test: mh_sha1_ref.o murmur3_x64_128.o +mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_update_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la + +mh_sha1_murmur3_x64_128_perf: mh_sha1_ref.o murmur3_x64_128.o +mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_perf_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c new file mode 100644 index 000000000..12cb3644d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c @@ -0,0 +1,151 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_murmur3_x64_128_internal.h" + +int mh_sha1_murmur3_x64_128_init(struct mh_sha1_murmur3_x64_128_ctx *ctx, uint64_t murmur_seed) +{ + uint64_t *murmur3_x64_128_hash; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint32_t i; + + if (ctx == NULL) + return MH_SHA1_MURMUR3_CTX_ERROR_NULL; + + memset(ctx, 0, sizeof(*ctx)); + + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + for (i = 0; i < HASH_SEGS; i++) { + mh_sha1_segs_digests[0][i] = MH_SHA1_H0; + mh_sha1_segs_digests[1][i] = MH_SHA1_H1; + mh_sha1_segs_digests[2][i] = MH_SHA1_H2; + mh_sha1_segs_digests[3][i] = MH_SHA1_H3; + mh_sha1_segs_digests[4][i] = MH_SHA1_H4; + } + + murmur3_x64_128_hash = (uint64_t *) ctx->murmur3_x64_128_digest; + murmur3_x64_128_hash[0] = murmur_seed; + murmur3_x64_128_hash[1] = murmur_seed; + + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; +} + +void mh_sha1_murmur3_x64_128_block_base(const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t + murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks) +{ + + mh_sha1_block_base(input_data, mh_sha1_digests, frame_buffer, num_blocks); + + murmur3_x64_128_block(input_data, + num_blocks * MH_SHA1_BLOCK_SIZE / MUR_BLOCK_SIZE, + murmur3_x64_128_digests); + + return; +} + +/***************mh_sha1_murmur3_x64_128_update***********/ +// mh_sha1_murmur3_x64_128_update_sse.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_sse +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_sse +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +// mh_sha1_murmur3_x64_128_update_avx.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +// mh_sha1_murmur3_x64_128_update_avx2.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx2 +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx2 +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +/***************mh_sha1_murmur3_x64_128_finalize***********/ +// mh_sha1_murmur3_x64_128_finalize_sse.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_sse +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +// mh_sha1_murmur3_x64_128_finalize_avx.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +// mh_sha1_murmur3_x64_128_finalize_avx2.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx2 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2 +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +/***************version info***********/ + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// Version info +struct slver mh_sha1_murmur3_x64_128_init_slver_00000251; +struct slver mh_sha1_murmur3_x64_128_init_slver = { 0x0251, 0x00, 0x00 }; + +// mh_sha1_murmur3_x64_128_update version info +struct slver mh_sha1_murmur3_x64_128_update_sse_slver_00000254; +struct slver mh_sha1_murmur3_x64_128_update_sse_slver = { 0x0254, 0x00, 0x00 }; + +struct slver mh_sha1_murmur3_x64_128_update_avx_slver_02000256; +struct slver mh_sha1_murmur3_x64_128_update_avx_slver = { 0x0256, 0x00, 0x02 }; + +struct slver mh_sha1_murmur3_x64_128_update_avx2_slver_04000258; +struct slver mh_sha1_murmur3_x64_128_update_avx2_slver = { 0x0258, 0x00, 0x04 }; + +// mh_sha1_murmur3_x64_128_finalize version info +struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver_00000255; +struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver = { 0x0255, 0x00, 0x00 }; + +struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver_02000257; +struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver = { 0x0257, 0x00, 0x02 }; + +struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver_04000259; +struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver = { 0x0259, 0x00, 0x04 }; diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c new file mode 100644 index 000000000..e380a8795 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c @@ -0,0 +1,67 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_murmur3_x64_128_internal.h" + +#ifdef HAVE_AS_KNOWS_AVX512 + +/***************mh_sha1_murmur3_x64_128_update***********/ +// mh_sha1_murmur3_x64_128_update_avx512.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx512 +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx512 +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +/***************mh_sha1_murmur3_x64_128_finalize***********/ +// mh_sha1_murmur3_x64_128_finalize_avx512.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx512 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512 +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +/***************version info***********/ + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// mh_sha1_murmur3_x64_128_update version info +struct slver mh_sha1_murmur3_x64_128_update_avx512_slver_0600025c; +struct slver mh_sha1_murmur3_x64_128_update_avx512_slver = { 0x025c, 0x00, 0x06 }; + +// mh_sha1_murmur3_x64_128_finalize version info +struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver_0600025d; +struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver = { 0x025d, 0x00, 0x06 }; + +#endif // HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm new file mode 100644 index 000000000..ca35e6961 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm @@ -0,0 +1,702 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX +;; + +%include "reg_sizes.asm" +default rel + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-(%%imm)) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; non-destructive +; PROLD_nd reg, imm, tmp, src +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-(%%imm)) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 16)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte. +;; So insert 1 murmur block into every 4 SHA1_STEP_16_79. +%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J + +%macro SHA1_STEP_16_79_0 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c1_r + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c2_r + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_1 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + rol mur_data1, R1 + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + rol mur_data2, R2 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c2_r + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c1_r + PROLD %%regB,30, %%regT + add mur_in_p, 16 + vpaddd %%regE,%%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_2 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash1, mur_data1 + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash1, R3 + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash1, mur_hash2 + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_3 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash2, mur_data2 + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash2, R4 + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash2, mur_hash1 + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + save_reg rbx, 10*16 + 6*8 + save_reg rbp, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 +;void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +global mh_sha1_murmur3_x64_128_block_avx:function internal +func(mh_sha1_murmur3_x64_128_block_avx) + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by avx + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + VMOVPS A, [mh_digests_p + I*64 + 16*0] + VMOVPS B, [mh_digests_p + I*64 + 16*1] + VMOVPS C, [mh_digests_p + I*64 + 16*2] + VMOVPS D, [mh_digests_p + I*64 + 16*3] + + vmovdqa [rsp + I*64 + 16*0], A + vmovdqa [rsp + I*64 + 16*1], B + vmovdqa [rsp + I*64 + 16*2], C + vmovdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + VMOVPS T0,[mh_in_p + I*64+0*16] + VMOVPS T1,[mh_in_p + I*64+1*16] + VMOVPS T2,[mh_in_p + I*64+2*16] + VMOVPS T3,[mh_in_p + I*64+3*16] + + vpshufb T0, F + vmovdqa [mh_data_p +(I)*16 +0*256],T0 + vpshufb T1, F + vmovdqa [mh_data_p +(I)*16 +1*256],T1 + vpshufb T2, F + vmovdqa [mh_data_p +(I)*16 +2*256],T2 + vpshufb T3, F + vmovdqa [mh_data_p +(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A, AA + vpaddd B, BB + vpaddd C, CC + vpaddd D, DD + vpaddd E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + vmovdqa A, [rsp + I*64 + 16*0] + vmovdqa B, [rsp + I*64 + 16*1] + vmovdqa C, [rsp + I*64 + 16*2] + vmovdqa D, [rsp + I*64 + 16*3] + + VMOVPS [mh_digests_p + I*64 + 16*0], A + VMOVPS [mh_digests_p + I*64 + 16*1], B + VMOVPS [mh_digests_p + I*64 + 16*2], C + VMOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm new file mode 100644 index 000000000..c7e9a889b --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm @@ -0,0 +1,649 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX2 +;; + +%include "reg_sizes.asm" +default rel + +;; Magic functions defined in FIPS 180-1 +;; +;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + + + +;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-%%imm) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-%%imm) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 32)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 512Byte. +;; So insert 1 murmur block into every 2 SHA1_STEP_16_79. +%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J + +%macro SHA1_STEP_16_79_0 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + + vpsrld %%regF, W16, (32-1) + imul mur_data1, mur_c1_r + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + imul mur_data2, mur_c2_r + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + rol mur_data1, R1 + vpaddd %%regE, %%regE,%%regF + rol mur_data2, R2 + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + imul mur_data1, mur_c2_r + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + imul mur_data2, mur_c1_r + vpaddd %%regE,%%regE,%%regF +%endmacro + + +%macro SHA1_STEP_16_79_1 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + xor mur_hash1, mur_data1 + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + rol mur_hash1, R3 + vpxor W16, W16, W14 + add mur_hash1, mur_hash2 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + xor mur_hash2, mur_data2 + vpor %%regF, %%regF, W16 + rol mur_hash2, R4 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + vpaddd %%regE, %%regE,%%regF + add mur_hash2, mur_hash1 + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + add mur_in_p, 16 + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + save_reg rbx, 10*16 + 6*8 + save_reg rbp, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp8 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A ymm0 +%define B ymm1 +%define C ymm2 +%define D ymm3 +%define E ymm4 + +%define F ymm5 +%define T0 ymm6 +%define T1 ymm7 +%define T2 ymm8 +%define T3 ymm9 +%define T4 ymm10 +%define T5 ymm11 +%define T6 ymm12 +%define T7 ymm13 +%define T8 ymm14 +%define T9 ymm15 + +%define AA ymm5 +%define BB ymm6 +%define CC ymm7 +%define DD ymm8 +%define EE ymm9 +%define TMP ymm10 +%define FUN ymm11 +%define K ymm12 +%define W14 ymm13 +%define W15 ymm14 +%define W16 ymm15 + + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 +;void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +global mh_sha1_murmur3_x64_128_block_avx2:function internal +func(mh_sha1_murmur3_x64_128_block_avx2) + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 32 Bytes needed by avx2 + and rsp, ~0x1F + + %assign I 0 ; copy segs_digests into stack + %rep 2 + VMOVPS A, [mh_digests_p + I*32*5 + 32*0] + VMOVPS B, [mh_digests_p + I*32*5 + 32*1] + VMOVPS C, [mh_digests_p + I*32*5 + 32*2] + VMOVPS D, [mh_digests_p + I*32*5 + 32*3] + VMOVPS E, [mh_digests_p + I*32*5 + 32*4] + + vmovdqa [rsp + I*32*5 + 32*0], A + vmovdqa [rsp + I*32*5 + 32*1], B + vmovdqa [rsp + I*32*5 + 32*2], C + vmovdqa [rsp + I*32*5 + 32*3], D + vmovdqa [rsp + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2 +%assign I 0 +%rep 16 + VMOVPS T0,[mh_in_p + I*64+0*32] + VMOVPS T1,[mh_in_p + I*64+1*32] + + vpshufb T0, T0, F + vmovdqa [mh_data_p +I*32+0*512],T0 + vpshufb T1, T1, F + vmovdqa [mh_data_p +I*32+1*512],T1 +%assign I (I+1) +%endrep + + mov mh_segs, 0 ;start from the first 8 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS +%assign I (I+1) +%endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32] + %rep 4 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*2] + PREFETCH_X [mh_in_p + pref+128*3] +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A,A, AA + vpaddd B,B, BB + vpaddd C,C, CC + vpaddd D,D, DD + vpaddd E,E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add pref, 512 + + add mh_data_p, 512 + add mh_segs, 32 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 2 + vmovdqa A, [rsp + I*32*5 + 32*0] + vmovdqa B, [rsp + I*32*5 + 32*1] + vmovdqa C, [rsp + I*32*5 + 32*2] + vmovdqa D, [rsp + I*32*5 + 32*3] + vmovdqa E, [rsp + I*32*5 + 32*4] + + VMOVPS [mh_digests_p + I*32*5 + 32*0], A + VMOVPS [mh_digests_p + I*32*5 + 32*1], B + VMOVPS [mh_digests_p + I*32*5 + 32*2], C + VMOVPS [mh_digests_p + I*32*5 + 32*3], D + VMOVPS [mh_digests_p + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=32 + +align 32 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + dq 0x0405060700010203, 0x0c0d0e0f08090a0b +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 + dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm new file mode 100644 index 000000000..8cc84959e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm @@ -0,0 +1,500 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX-512 +;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 +default rel + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovdqu64 +;SIMD variables definition +%define A zmm0 +%define B zmm1 +%define C zmm2 +%define D zmm3 +%define E zmm4 +%define HH0 zmm5 +%define HH1 zmm6 +%define HH2 zmm7 +%define HH3 zmm8 +%define HH4 zmm9 +%define KT zmm10 +%define XTMP0 zmm11 +%define XTMP1 zmm12 +%define SHUF_MASK zmm13 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;using extra 16 ZMM registers to place the inverse input data +%define W0 zmm16 +%define W1 zmm17 +%define W2 zmm18 +%define W3 zmm19 +%define W4 zmm20 +%define W5 zmm21 +%define W6 zmm22 +%define W7 zmm23 +%define W8 zmm24 +%define W9 zmm25 +%define W10 zmm26 +%define W11 zmm27 +%define W12 zmm28 +%define W13 zmm29 +%define W14 zmm30 +%define W15 zmm31 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;macros definition +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro PROCESS_LOOP 2 +%define %%WT %1 +%define %%F_IMMED %2 + + ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt + ; E=D, D=C, C=ROTL_30(B), B=A, A=T + + ; Ft + ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) + ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D + ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) + + vmovdqa32 XTMP1, B ; Copy B + vpaddd E, E, %%WT ; E = E + Wt + vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) + vpaddd E, E, KT ; E = E + Wt + Kt + vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) + vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt + vprold B, B, 30 ; B = ROTL_30(B) + vpaddd E, E, XTMP0 ; E = T + + ROTATE_ARGS +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls PROCESS_LOOP 80 and +;; MSG_SCHED_ROUND_16_79 64 times and processes 1024 Bytes. +;; So insert 1 murmur block per section_loop. +%macro PROCESS_LOOP_MUR 2 +%define %%WT %1 +%define %%F_IMMED %2 + + ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt + ; E=D, D=C, C=ROTL_30(B), B=A, A=T + + ; Ft + ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) + ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D + ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) + + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + vmovdqa32 XTMP1, B ; Copy B + imul mur_data1, mur_c1_r + imul mur_data2, mur_c2_r + vpaddd E, E, %%WT ; E = E + Wt + rol mur_data1, R1 + rol mur_data2, R2 + vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) + imul mur_data1, mur_c2_r + imul mur_data2, mur_c1_r + vpaddd E, E, KT ; E = E + Wt + Kt + xor mur_hash1, mur_data1 + add mur_in_p, 16 + vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) + rol mur_hash1, R3 + vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt + add mur_hash1, mur_hash2 + vprold B, B, 30 ; B = ROTL_30(B) + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + vpaddd E, E, XTMP0 ; E = T + xor mur_hash2, mur_data2 + + ROTATE_ARGS +%endmacro + +%macro MSG_SCHED_ROUND_16_79_MUR 4 +%define %%WT %1 +%define %%WTp2 %2 +%define %%WTp8 %3 +%define %%WTp13 %4 + ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16) + ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt) + vpternlogd %%WT, %%WTp2, %%WTp8, 0x96 + rol mur_hash2, R4 + vpxord %%WT, %%WT, %%WTp13 + add mur_hash2, mur_hash1 + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + vprold %%WT, %%WT, 1 +%endmacro + +%define APPEND(a,b) a %+ b +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + ; remove unwind info macros + %define func(x) x: + %macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp + 0*16], xmm6 + movdqa [rsp + 1*16], xmm7 + movdqa [rsp + 2*16], xmm8 + movdqa [rsp + 3*16], xmm9 + movdqa [rsp + 4*16], xmm10 + movdqa [rsp + 5*16], xmm11 + movdqa [rsp + 6*16], xmm12 + movdqa [rsp + 7*16], xmm13 + movdqa [rsp + 8*16], xmm14 + movdqa [rsp + 9*16], xmm15 + mov [rsp + 10*16 + 0*8], r12 + mov [rsp + 10*16 + 1*8], r13 + mov [rsp + 10*16 + 2*8], r14 + mov [rsp + 10*16 + 3*8], r15 + mov [rsp + 10*16 + 4*8], rdi + mov [rsp + 10*16 + 5*8], rsi + mov [rsp + 10*16 + 6*8], rbx + mov [rsp + 10*16 + 7*8], rbp + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 + +%define pref tmp8 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +[bits 64] +section .text +align 32 + +;void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +global mh_sha1_murmur3_x64_128_block_avx512 +func(mh_sha1_murmur3_x64_128_block_avx512) + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; align rsp to 64 Bytes needed by avx512 + and rsp, ~0x3f + + ; copy segs_digests into registers. + VMOVPS HH0, [mh_digests_p + 64*0] + VMOVPS HH1, [mh_digests_p + 64*1] + VMOVPS HH2, [mh_digests_p + 64*2] + VMOVPS HH3, [mh_digests_p + 64*3] + VMOVPS HH4, [mh_digests_p + 64*4] + ;a mask used to transform to big-endian data + vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK] + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + ;using extra 16 ZMM registers instead of stack +%assign I 0 +%rep 8 +%assign J (I+1) + VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64] + VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64] + + vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK + vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK +%assign I (I+2) +%endrep + + vmovdqa64 A, HH0 + vmovdqa64 B, HH1 + vmovdqa64 C, HH2 + vmovdqa64 D, HH3 + vmovdqa64 E, HH4 + + vmovdqa32 KT, [K00_19] +%assign I 0xCA +%assign J 0 +%assign K 2 +%assign L 8 +%assign M 13 +%assign N 0 +%rep 80 + %if N < 64 ; stitching 64 times + PROCESS_LOOP_MUR APPEND(W,J), I + MSG_SCHED_ROUND_16_79_MUR APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M) + %else ; 64 <= N < 80, without stitching + PROCESS_LOOP APPEND(W,J), I + %endif + %if N = 19 + vmovdqa32 KT, [K20_39] + %assign I 0x96 + %elif N = 39 + vmovdqa32 KT, [K40_59] + %assign I 0xE8 + %elif N = 59 + vmovdqa32 KT, [K60_79] + %assign I 0x96 + %endif + %if N % 20 = 19 + PREFETCH_X [mh_in_p + 1024+128*(N / 20)] + PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)] + %endif +%assign J ((J+1)% 16) +%assign K ((K+1)% 16) +%assign L ((L+1)% 16) +%assign M ((M+1)% 16) +%assign N (N+1) +%endrep + + ; Add old digest + vpaddd HH0,A, HH0 + vpaddd HH1,B, HH1 + vpaddd HH2,C, HH2 + vpaddd HH3,D, HH3 + vpaddd HH4,E, HH4 + + add mh_in_p, 1024 + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + ; copy segs_digests to mh_digests_p + VMOVPS [mh_digests_p + 64*0], HH0 + VMOVPS [mh_digests_p + 64*1], HH1 + VMOVPS [mh_digests_p + 64*2], HH2 + VMOVPS [mh_digests_p + 64*3], HH3 + VMOVPS [mh_digests_p + 64*4], HH4 + + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + + +section .data align=64 + +align 64 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + +K20_39: dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + +K40_59: dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + +K60_79: dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_sha1_murmur3_x64_128_block_avx512 +no_sha1_murmur3_x64_128_block_avx512: +%endif +%endif ; HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm new file mode 100644 index 000000000..174a8518f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm @@ -0,0 +1,698 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using SSE +;; + +%include "reg_sizes.asm" +default rel + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regC + pxor %%regF,%%regD + pand %%regF,%%regB + pxor %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regD + pxor %%regF,%%regC + pxor %%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regB + movdqa %%regT,%%regB + por %%regF,%%regC + pand %%regT,%%regC + pand %%regF,%%regD + por %%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + movdqa %%tmp, %%reg + pslld %%reg, %%imm + psrld %%tmp, (32-%%imm) + por %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + paddd %%regE,[%%data + (%%memW * 16)] + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte. +;; So insert 1 murmur block into every 4 SHA1_STEP_16_79. +%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J + +%macro SHA1_STEP_16_79_0 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c1_r + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c2_r + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_1 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + rol mur_data1, R1 + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + pslld W16, 1 + rol mur_data2, R2 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c2_r + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c1_r + PROLD %%regB,30, %%regT + add mur_in_p, 16 + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_2 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash1, mur_data1 + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash1, R3 + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash1, mur_hash2 + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_3 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash2, mur_data2 + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash2, R4 + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash2, mur_hash1 + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + paddd %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + save_reg rbx, 10*16 + 6*8 + save_reg rbp, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define MOVPS movups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 +;void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +global mh_sha1_murmur3_x64_128_block_sse:function internal +func(mh_sha1_murmur3_x64_128_block_sse) + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by sse + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + MOVPS A, [mh_digests_p + I*64 + 16*0] + MOVPS B, [mh_digests_p + I*64 + 16*1] + MOVPS C, [mh_digests_p + I*64 + 16*2] + MOVPS D, [mh_digests_p + I*64 + 16*3] + + movdqa [rsp + I*64 + 16*0], A + movdqa [rsp + I*64 + 16*1], B + movdqa [rsp + I*64 + 16*2], C + movdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + movdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + MOVPS T0,[mh_in_p+I*64+0*16] + MOVPS T1,[mh_in_p+I*64+1*16] + MOVPS T2,[mh_in_p+I*64+2*16] + MOVPS T3,[mh_in_p+I*64+3*16] + + pshufb T0, F + movdqa [mh_data_p+(I)*16 +0*256],T0 + pshufb T1, F + movdqa [mh_data_p+(I)*16 +1*256],T1 + pshufb T2, F + movdqa [mh_data_p+(I)*16 +2*256],T2 + pshufb T3, F + movdqa [mh_data_p+(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + .segs_loop: + ;; Initialize digests + movdqa A, [rsp + 0*64 + mh_segs] + movdqa B, [rsp + 1*64 + mh_segs] + movdqa C, [rsp + 2*64 + mh_segs] + movdqa D, [rsp + 3*64 + mh_segs] + movdqa E, [rsp + 4*64 + mh_segs] + + movdqa AA, A + movdqa BB, B + movdqa CC, C + movdqa DD, D + movdqa EE, E +;; +;; perform 0-79 steps +;; + movdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 20...39 + movdqa K, [K20_39] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + movdqa K, [K40_59] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 60...79 + movdqa K, [K60_79] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + paddd A, AA + paddd B, BB + paddd C, CC + paddd D, DD + paddd E, EE + + ; write out digests + movdqa [rsp + 0*64 + mh_segs], A + movdqa [rsp + 1*64 + mh_segs], B + movdqa [rsp + 2*64 + mh_segs], C + movdqa [rsp + 3*64 + mh_segs], D + movdqa [rsp + 4*64 + mh_segs], E + + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + movdqa A, [rsp + I*64 + 16*0] + movdqa B, [rsp + I*64 + 16*1] + movdqa C, [rsp + I*64 + 16*2] + movdqa D, [rsp + I*64 + 16*3] + + MOVPS [mh_digests_p + I*64 + 16*0], A + MOVPS [mh_digests_p + I*64 + 16*1], B + MOVPS [mh_digests_p + I*64 + 16*2], C + MOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c new file mode 100644 index 000000000..6eb998257 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c @@ -0,0 +1,102 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef FINALIZE_FUNCTION +#include <stdlib.h> // For NULL +#include "mh_sha1_murmur3_x64_128_internal.h" + +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_base +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base +#define FINALIZE_FUNCTION_SLVER +#endif + +#define MURMUR_BLOCK_FUNCTION murmur3_x64_128_block +#define MURMUR_TAIL_FUNCTION murmur3_x64_128_tail + +int FINALIZE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, void *mh_sha1_digest, + void *murmur3_x64_128_digest) +{ + uint8_t *partial_block_buffer, *murmur_tail_data; + uint64_t partial_block_len, total_len; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + + if (ctx == NULL) + return MH_SHA1_MURMUR3_CTX_ERROR_NULL; + + total_len = ctx->total_length; + partial_block_len = total_len % MH_SHA1_BLOCK_SIZE; + partial_block_buffer = ctx->partial_block_buffer; + + // Calculate murmur3 firstly + // because mh_sha1 will change the partial_block_buffer + // ( partial_block_buffer = n murmur3 blocks and 1 murmur3 tail) + murmur_tail_data = + partial_block_buffer + partial_block_len - partial_block_len % MUR_BLOCK_SIZE; + MURMUR_BLOCK_FUNCTION(partial_block_buffer, partial_block_len / MUR_BLOCK_SIZE, + ctx->murmur3_x64_128_digest); + MURMUR_TAIL_FUNCTION(murmur_tail_data, total_len, ctx->murmur3_x64_128_digest); + + /* mh_sha1 final */ + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + + MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests, + aligned_frame_buffer, ctx->mh_sha1_digest); + + /* Output the digests of murmur3 and mh_sha1 */ + if (mh_sha1_digest != NULL) { + ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0]; + ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1]; + ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2]; + ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3]; + ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4]; + } + + if (murmur3_x64_128_digest != NULL) { + ((uint32_t *) murmur3_x64_128_digest)[0] = ctx->murmur3_x64_128_digest[0]; + ((uint32_t *) murmur3_x64_128_digest)[1] = ctx->murmur3_x64_128_digest[1]; + ((uint32_t *) murmur3_x64_128_digest)[2] = ctx->murmur3_x64_128_digest[2]; + ((uint32_t *) murmur3_x64_128_digest)[3] = ctx->murmur3_x64_128_digest[3]; + } + + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; +} + +#ifdef FINALIZE_FUNCTION_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + + // Version info +struct slver mh_sha1_murmur3_x64_128_finalize_base_slver_0000025b; +struct slver mh_sha1_murmur3_x64_128_finalize_base_slver = { 0x025b, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h new file mode 100644 index 000000000..bb16c58d6 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h @@ -0,0 +1,202 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_ +#define _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_ + +/** + * @file mh_sha1_murmur3_x64_128_internal.h + * @brief mh_sha1_murmur3_x64_128 internal function prototypes and macros + * + * Interface for mh_sha1_murmur3_x64_128 internal functions + * + */ +#include <stdint.h> +#include "mh_sha1_internal.h" +#include "mh_sha1_murmur3_x64_128.h" + +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef _MSC_VER +# define inline __inline +#endif + + /******************************************************************* + * mh_sha1_murmur3_x64_128 API internal function prototypes + * Multiple versions of Update and Finalize functions are supplied which use + * multiple versions of block and tail process subfunctions. + ******************************************************************/ + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + // Each function needs an individual C or ASM file because they impact performance much. + //They will be called by mh_sha1_murmur3_x64_128_update_XXX. + void mh_sha1_murmur3_x64_128_block (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_base (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires SSE + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires AVX + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires AVX2 + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires AVX512 + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + /******************************************************************* + * murmur hash API + ******************************************************************/ + + /** + * @brief Calculate murmur digest of blocks which size is 16*N. + * @param input_data Pointer to input data to be processed + * @param num_blocks The number of blocks which size is 16. + * @param murmur3_x64_128_digests Murmur3 digest + * @returns none + * + */ + void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]); + + /** + * @brief Do the tail process which is less than 16Byte. + * @param tail_buffer Pointer to input data to be processed + * @param total_len The total length of the input_data + * @param digests Murmur3 digest + * @returns none + * + */ + void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm new file mode 100644 index 000000000..96502c32c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm @@ -0,0 +1,82 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifidn __OUTPUT_FORMAT__, elf64 +%define WRT_OPT wrt ..plt +%else +%define WRT_OPT +%endif + +%include "reg_sizes.asm" +%include "multibinary.asm" + +%ifidn __OUTPUT_FORMAT__, elf32 + [bits 32] +%else + default rel + [bits 64] + + extern mh_sha1_murmur3_x64_128_update_sse + extern mh_sha1_murmur3_x64_128_update_avx + extern mh_sha1_murmur3_x64_128_update_avx2 + extern mh_sha1_murmur3_x64_128_finalize_sse + extern mh_sha1_murmur3_x64_128_finalize_avx + extern mh_sha1_murmur3_x64_128_finalize_avx2 + + %ifdef HAVE_AS_KNOWS_AVX512 + extern mh_sha1_murmur3_x64_128_update_avx512 + extern mh_sha1_murmur3_x64_128_finalize_avx512 + %endif + +%endif + +extern mh_sha1_murmur3_x64_128_update_base +extern mh_sha1_murmur3_x64_128_finalize_base + +mbin_interface mh_sha1_murmur3_x64_128_update +mbin_interface mh_sha1_murmur3_x64_128_finalize + +%ifidn __OUTPUT_FORMAT__, elf64 + + %ifdef HAVE_AS_KNOWS_AVX512 + mbin_dispatch_init6 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2, mh_sha1_murmur3_x64_128_update_avx512 + mbin_dispatch_init6 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2, mh_sha1_murmur3_x64_128_finalize_avx512 + %else + mbin_dispatch_init5 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2 + mbin_dispatch_init5 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2 + %endif + +%else + mbin_dispatch_init2 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base + mbin_dispatch_init2 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base +%endif + +;;; func core, ver, snum +slversion mh_sha1_murmur3_x64_128_update, 00, 02, 0252 +slversion mh_sha1_murmur3_x64_128_finalize, 00, 02, 0253 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c new file mode 100644 index 000000000..8a17fdfd7 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c @@ -0,0 +1,206 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1_murmur3_x64_128.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Loop many times over same +# define TEST_LEN 16*1024 +# define TEST_LOOPS 20000 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define TEST_LEN 32*1024*1024 +# define TEST_LOOPS 100 +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif +#define TEST_MEM TEST_LEN + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \ + printf("The stitch function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest); + +void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest) +{ + mh_sha1_ref(buffer, len, mh_sha1_digest); + murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest); + + return; +} + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS], + uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + int murmur3_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) { + if (murmur3_test[i] != murmur3_base[i]) + murmur3_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + if (murmur3_fail) { + printf("murmur3 fail test\n"); + printf("base: "); + dump((char *)murmur3_base, 16); + printf("ref: "); + dump((char *)murmur3_test, 16); + } + + return mh_sha1_fail + murmur3_fail; +} + +int main(int argc, char *argv[]) +{ + int i, fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS], + murmur3_base[MURMUR3_x64_128_DIGEST_WORDS]; + uint8_t *buff = NULL; + struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL; + struct perf start, stop; + + printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n"); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + // mh_sha1_murmur3 base version + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base); + perf_start(&start); + for (i = 0; i < TEST_LOOPS / 10; i++) { + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + } + perf_stop(&stop); + printf("mh_sha1_murmur3_x64_128_base" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + //Update feature test + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + perf_start(&start); + for (i = 0; i < TEST_LOOPS; i++) { + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + } + perf_stop(&stop); + printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + // Check results + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d\n", TEST_LEN); + return -1; + } + + if (fail) + printf("Test failed function test%d\n", fail); + else + printf("Pass func check\n"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c new file mode 100644 index 000000000..a2ea8ce92 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c @@ -0,0 +1,248 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1_murmur3_x64_128.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \ + printf("The stitch function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest); + +void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest) +{ + mh_sha1_ref(buffer, len, mh_sha1_digest); + murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest); + + return; +} + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS], + uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + int murmur3_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) { + if (murmur3_test[i] != murmur3_base[i]) + murmur3_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + if (murmur3_fail) { + printf("murmur3 fail test\n"); + printf("base: "); + dump((char *)murmur3_base, 16); + printf("ref: "); + dump((char *)murmur3_test, 16); + } + + return mh_sha1_fail + murmur3_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS], + murmur3_base[MURMUR3_x64_128_DIGEST_WORDS]; + uint8_t *buff = NULL; + int size, offset; + struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL; + + printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages + for (size = TEST_LEN; size >= 0; size--) { + + // Fill with rand data + rand_buffer(buff, size); + + mh_sha1_murmur3_x64_128_base(buff, size, TEST_SEED, hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + if ((size & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various buffer offsets and sizes + printf("offset tests"); + for (size = TEST_LEN - 256; size > 256; size -= 11) { + for (offset = 0; offset < 256; offset++) { + mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED, + hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = + compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d offset=%d\n", size, offset); + return -1; + } + + } + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + // Run efence tests + printf("efence tests"); + for (size = TEST_SIZE; size > 0; size--) { + offset = TEST_LEN - size; + mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED, + hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d offset=%d\n", size, offset); + return -1; + } + + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c new file mode 100644 index 000000000..e8d21ac26 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c @@ -0,0 +1,107 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef UPDATE_FUNCTION +#include "mh_sha1_murmur3_x64_128_internal.h" +#include <string.h> + +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_base +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_base +#define UPDATE_FUNCTION_SLVER +#endif + +int UPDATE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, const void *buffer, uint32_t len) +{ + + uint8_t *partial_block_buffer; + uint64_t partial_block_len; + uint64_t num_blocks; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + uint32_t *murmur3_x64_128_digest; + const uint8_t *input_data = (const uint8_t *)buffer; + + if (ctx == NULL) + return MH_SHA1_MURMUR3_CTX_ERROR_NULL; + + if (len == 0) + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; + + partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE; + partial_block_buffer = ctx->partial_block_buffer; + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + murmur3_x64_128_digest = ctx->murmur3_x64_128_digest; + + ctx->total_length += len; + // No enough input data for mh_sha1 calculation + if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) { + memcpy(partial_block_buffer + partial_block_len, input_data, len); + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; + } + // mh_sha1 calculation for the previous partial block + if (partial_block_len != 0) { + memcpy(partial_block_buffer + partial_block_len, input_data, + MH_SHA1_BLOCK_SIZE - partial_block_len); + //do one_block process + BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests, + aligned_frame_buffer, murmur3_x64_128_digest, 1); + input_data += MH_SHA1_BLOCK_SIZE - partial_block_len; + len -= MH_SHA1_BLOCK_SIZE - partial_block_len; + memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + // Calculate mh_sha1 for the current blocks + num_blocks = len / MH_SHA1_BLOCK_SIZE; + if (num_blocks > 0) { + //do num_blocks process + BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer, + murmur3_x64_128_digest, num_blocks); + len -= num_blocks * MH_SHA1_BLOCK_SIZE; + input_data += num_blocks * MH_SHA1_BLOCK_SIZE; + } + // Store the partial block + if (len != 0) { + memcpy(partial_block_buffer, input_data, len); + } + + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; + +} + +#ifdef UPDATE_FUNCTION_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + + // Version info +struct slver mh_sha1_murmur3_x64_128_update_base_slver_0000025a; +struct slver mh_sha1_murmur3_x64_128_update_base_slver = { 0x025a, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c new file mode 100644 index 000000000..853e330a4 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c @@ -0,0 +1,272 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1_murmur3_x64_128.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \ + printf("The stitch function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest); + +void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest) +{ + mh_sha1_ref(buffer, len, mh_sha1_digest); + murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest); + + return; +} + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS], + uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + int murmur3_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) { + if (murmur3_test[i] != murmur3_base[i]) + murmur3_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + if (murmur3_fail) { + printf("murmur3 fail test\n"); + printf("base: "); + dump((char *)murmur3_base, 16); + printf("ref: "); + dump((char *)murmur3_test, 16); + } + + return mh_sha1_fail + murmur3_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0, i; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS], + murmur3_base[MURMUR3_x64_128_DIGEST_WORDS]; + uint8_t *buff = NULL; + int update_count; + int size1, size2, offset, addr_offset; + struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL; + uint8_t *mem_addr = NULL; + + printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages by update twice. + printf("\n various size messages by update twice tests"); + for (size1 = TEST_LEN; size1 >= 0; size1--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + + // subsequent update + size2 = TEST_LEN - size1; // size2 is different with the former + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various update count + printf("\n various update count tests"); + for (update_count = 1; update_count <= TEST_LEN; update_count++) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + + // subsequent update + size1 = TEST_LEN / update_count; + size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + for (i = 1, offset = 0; i < update_count; i++) { + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1)); + offset += size1; + } + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // test various start address of ctx. + printf("\n various start address of ctx test"); + free(update_ctx); + mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10); + for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + + // a unaligned offset + update_ctx = (struct mh_sha1_murmur3_x64_128_ctx *)(mem_addr + addr_offset); + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail addr_offset=%d\n", addr_offset); + return -1; + } + + if ((addr_offset & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; + +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c new file mode 100644 index 000000000..75c3d90b5 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c @@ -0,0 +1,78 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdlib.h> // for NULL +#include "murmur3_x64_128_internal.c" +/******************************************************************* + * Single API which can calculate murmur3 + ******************************************************************/ +/** + * @brief Get the digest of murmur3_x64_128 through a single API. + * + * Using murmur3_x64_128_block and murmur3_x64_128_tail. + * Used to test the murmur3_x64_128 digest. + * + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param murmur_seed Seed as an initial digest of murmur3 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns none + * + */ +void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest) +{ + uint64_t *murmur3_x64_128_hash; + uint32_t murmur3_x64_128_hash_dword[4]; + uint8_t *tail_buffer; + const uint8_t *input_data = (const uint8_t *)buffer; + + // Initiate murmur3 + murmur3_x64_128_hash = (uint64_t *) murmur3_x64_128_hash_dword; + murmur3_x64_128_hash[0] = murmur_seed; + murmur3_x64_128_hash[1] = murmur_seed; + + // process bodies + murmur3_x64_128_block((uint8_t *) input_data, len / MUR_BLOCK_SIZE, + murmur3_x64_128_hash_dword); + + // process finalize + tail_buffer = (uint8_t *) input_data + len - len % MUR_BLOCK_SIZE; + murmur3_x64_128_tail(tail_buffer, len, murmur3_x64_128_hash_dword); + + // output the digests + if (murmur3_x64_128_digest != NULL) { + murmur3_x64_128_digest[0] = murmur3_x64_128_hash_dword[0]; + murmur3_x64_128_digest[1] = murmur3_x64_128_hash_dword[1]; + murmur3_x64_128_digest[2] = murmur3_x64_128_hash_dword[2]; + murmur3_x64_128_digest[3] = murmur3_x64_128_hash_dword[3]; + } + + return; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c new file mode 100644 index 000000000..6aab002ef --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c @@ -0,0 +1,138 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "mh_sha1_murmur3_x64_128_internal.h" +#include <stdlib.h> // for NULL + +/* murmur3_x64_128 constants */ +// Shift bits of circle rotate +#define MUR_SH1 31 +#define MUR_SH2 33 +#define MUR_SH3 27 +#define MUR_SH4 31 +#define MUR_SH5 33 + +#define MUR_MUL 5 +#define MUR_ADD1 0x52dce729 +#define MUR_ADD2 0x38495ab5 + +#define MUR_CON1 0x87c37b91114253d5LLU +#define MUR_CON2 0x4cf5ad432745937fLLU + +#define MUR_FMUL1 0xff51afd7ed558ccdLLU +#define MUR_FMUL2 0xc4ceb9fe1a85ec53LLU + +/* murmur3_x64_128 inline functions */ +static inline uint64_t blockmix64(uint64_t data, uint64_t conA, uint64_t conB, uint64_t shift) +{ + data *= conA; + data = (data << shift) | (data >> (64 - shift)); + data *= conB; + return data; +} + +static inline uint64_t hashmix64(uint64_t hashA, uint64_t hashB, uint64_t data, uint64_t add, + uint64_t shift) +{ + hashA ^= data; + hashA = (hashA << shift) | (hashA >> (64 - shift)); + hashA += hashB; + hashA = hashA * MUR_MUL + add; + return hashA; +} + +void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]) +{ + uint64_t data1, data2; + uint64_t *input_qword = (uint64_t *) input_data; + uint64_t *hash = (uint64_t *) digests; + uint32_t i = 0; + + while (i < num_blocks) { + data1 = input_qword[i * 2]; + data2 = input_qword[i * 2 + 1]; + data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1); + data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2); + hash[0] = hashmix64(hash[0], hash[1], data1, MUR_ADD1, MUR_SH3); + hash[1] = hashmix64(hash[1], hash[0], data2, MUR_ADD2, MUR_SH4); + i++; + } + + return; +} + +void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]) +{ + uint64_t data1, data2; + uint64_t *hash = (uint64_t *) digests; + uint64_t tail_len = total_len % 16; + uint8_t *tail = (uint8_t *) tail_buffer; + + union { + uint64_t hash[2]; + uint8_t hashB[16]; + } hashU; + + // tail + hashU.hash[0] = hashU.hash[1] = 0; + + while (tail_len-- > 0) + hashU.hashB[tail_len] = tail[tail_len]; + + data1 = hashU.hash[0]; + data2 = hashU.hash[1]; + + data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1); + data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2); + + hash[0] ^= total_len ^ data1; + hash[1] ^= total_len ^ data2; + + hash[0] += hash[1]; + hash[1] += hash[0]; + + hash[0] ^= hash[0] >> MUR_SH5; + hash[0] *= MUR_FMUL1; + hash[0] ^= hash[0] >> MUR_SH5; + hash[0] *= MUR_FMUL2; + hash[0] ^= hash[0] >> MUR_SH5; + + hash[1] ^= hash[1] >> MUR_SH5; + hash[1] *= MUR_FMUL1; + hash[1] ^= hash[1] >> MUR_SH5; + hash[1] *= MUR_FMUL2; + hash[1] ^= hash[1] >> MUR_SH5; + + hash[0] += hash[1]; + hash[1] += hash[0]; + + return; +} |