diff options
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128')
25 files changed, 5617 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am new file mode 100644 index 000000000..e6ea6784c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/Makefile.am @@ -0,0 +1,89 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc_murmur = mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c + +lsrc_stitch = mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm + +lsrc_stitch += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm + +lsrc_x86_64 += $(lsrc_murmur) \ + $(lsrc_stitch) + +lsrc_x86_32 += $(lsrc_x86_64) + +lsrc_aarch64 += $(lsrc_murmur) \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \ + mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c \ + mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c \ + mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S \ + mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c \ + mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S \ + mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S + +lsrc_base_aliases += $(lsrc_murmur) \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c + +other_src += include/reg_sizes.asm \ + include/multibinary.asm \ + include/test.h \ + mh_sha1/mh_sha1_internal.h \ + mh_sha1_murmur3_x64_128/murmur3_x64_128.c \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h + +src_include += -I $(srcdir)/mh_sha1_murmur3_x64_128 + +extern_hdrs += include/mh_sha1_murmur3_x64_128.h + +unit_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test \ + mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test + +perf_tests += mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf + + +mh_sha1_murmur3_x64_128_test: mh_sha1_ref.o murmur3_x64_128.o +mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la + +mh_sha1_murmur3_x64_128_update_test: mh_sha1_ref.o murmur3_x64_128.o +mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_update_test_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la + +mh_sha1_murmur3_x64_128_perf: mh_sha1_ref.o murmur3_x64_128.o +mh_sha1_murmur3_x64_128_mh_sha1_murmur3_x64_128_perf_LDADD = mh_sha1/mh_sha1_ref.lo mh_sha1_murmur3_x64_128/murmur3_x64_128.lo libisal_crypto.la diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c new file mode 100644 index 000000000..e6993703a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_dispatcher.c @@ -0,0 +1,53 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include <aarch64_multibinary.h> + +DEFINE_INTERFACE_DISPATCHER(mh_sha1_murmur3_x64_128_update) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(mh_sha1_murmur3_update_ce); + + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mh_sha1_murmur3_update_asimd); + + return PROVIDER_BASIC(mh_sha1_murmur3_x64_128_update); +} + +DEFINE_INTERFACE_DISPATCHER(mh_sha1_murmur3_x64_128_finalize) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(mh_sha1_murmur3_finalize_ce); + + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mh_sha1_murmur3_finalize_asimd); + + return PROVIDER_BASIC(mh_sha1_murmur3_x64_128_finalize); +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h new file mode 100644 index 000000000..22b33cbd2 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_aarch64_internal.h @@ -0,0 +1,91 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_MURMUR3_AARCH64_INTERNAL_H_ +#define _MH_SHA1_MURMUR3_AARCH64_INTERNAL_H_ + +/** + * @file mh_sha1_murmur3_aarch64_internal.h + * @brief mh_sha1_murmur3_aarch64 internal function prototypes and macros + * + * Interface for mh_sha1_murmur3_aarch64 internal functions + * + */ +#include <stdint.h> +#include "mh_sha1_murmur3_x64_128_internal.h" + +#ifdef __cplusplus +extern "C" { +#endif + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires Crypto Extension + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ +void mh_sha1_murmur3_block_ce(const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t + murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires ASIMD + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ +void mh_sha1_murmur3_block_asimd(const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t + murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c new file mode 100644 index 000000000..9cac8504e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_asimd.c @@ -0,0 +1,54 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_murmur3_aarch64_internal.h" + +extern void mh_sha1_tail_asimd(uint8_t * partial_buffer, uint32_t total_len, + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t * frame_buffer, + uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + +extern void mh_sha1_block_asimd(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + +// mh_sha1_murmur3_update_asimd.c +#define UPDATE_FUNCTION mh_sha1_murmur3_update_asimd +#define BLOCK_FUNCTION mh_sha1_murmur3_block_asimd +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +// mh_sha1_murmur3_finalize_asimd.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_finalize_asimd +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_asimd +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S new file mode 100644 index 000000000..575129f36 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_asimd.S @@ -0,0 +1,224 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +#include "sha1_asimd_common.S" +.macro sha1_step_16_79_interleave0 windex:req + // interleaving murmur3 operation + .if (\windex % 4) == 0 + ldp mur_data1, mur_data2, [mur_data], #16 + .endif + .if (\windex % 4) == 1 + /* rotate left by 31 bits */ + ror mur_data1, mur_data1, #64-31 + /* rotate left by 33 bits */ + ror mur_data2, mur_data2, #64-33 + .endif + .if (\windex % 4) == 2 + eor mur_hash1, mur_hash1, mur_data1 + /* rotate left by 27 bits */ + ror mur_hash1, mur_hash1, #64-27 + .endif + .if (\windex % 4) == 3 + eor mur_hash2, mur_hash2, mur_data2 + /* rotate left by 31 bits */ + ror mur_hash2, mur_hash2, #64-31 + .endif +.endm + +.macro sha1_step_16_79_interleave1 windex:req + // interleaving murmur3 operation + .if (\windex % 4) == 0 + mul mur_data1, mur_data1, mur_c1 + mul mur_data2, mur_data2, mur_c2 + .endif + .if (\windex % 4) == 1 + mul mur_data1, mur_data1, mur_c2 + mul mur_data2, mur_data2, mur_c1 + .endif + .if (\windex % 4) == 2 + add mur_hash1, mur_hash1, mur_hash2 + //mur_hash1 = mur_hash1 * 5 + N1 + add mur_hash1, mur_hash1, mur_hash1, LSL #2 + add mur_hash1, mur_n1, mur_hash1 + .endif + .if (\windex % 4) == 3 + add mur_hash2, mur_hash2, mur_hash1 + // mur_hash2 = mur_hash2 * 5 + N2 + add mur_hash2, mur_hash2, mur_hash2, LSL #2 + add mur_hash2, mur_n2, mur_hash2 + .endif +.endm + +.macro load_x4_word idx:req + ld1 {WORD\idx\().16b},[segs_ptr] + add segs_ptr,segs_ptr,#64 +.endm + +/* + * void mh_sha1_murmur3_block_asimd (const uint8_t * input_data, + * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + * uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + * uint32_t num_blocks); + * arg 0 pointer to input data + * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) + * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. + * arg 3 pointer to murmur3 digest + * arg 4 number of 1KB blocks + */ + + input_data .req x0 + sha1_digest .req x1 + data_buf .req x2 + mur_digest .req x3 + num_blocks .req w4 + + src .req x5 + dst .req x6 + offs .req x7 + mh_segs .req x8 + tmp .req x9 + tmpw .req w9 + segs_ptr .req x10 + mur_hash1 .req x11 + mur_hash2 .req x12 + mur_c1 .req x13 + mur_c2 .req x14 + mur_data1 .req x19 + mur_data2 .req x20 + mur_data .req x21 + mur_n1 .req x22 + mur_n1_w .req w22 + mur_n2 .req x23 + mur_n2_w .req w23 + block_ctr .req w24 + + .global mh_sha1_murmur3_block_asimd + .type mh_sha1_murmur3_block_asimd, %function +mh_sha1_murmur3_block_asimd: + cmp num_blocks, #0 + beq .return + sha1_asimd_save_stack + stp x19, x20, [sp, -48]! + stp x21, x22, [sp, 16] + stp x23, x24, [sp, 32] + + mov mur_data, input_data + ldr mur_hash1, [mur_digest] + ldr mur_hash2, [mur_digest, 8] + adr mur_c1, C1 + ldr mur_c1, [mur_c1] + adr mur_c2, C2 + ldr mur_c2, [mur_c2] + adr tmp, N1 + ldr mur_n1_w, [tmp] + adr tmp, N2 + ldr mur_n2_w, [tmp] + + mov mh_segs, #0 +.seg_loops: + add segs_ptr,input_data,mh_segs + mov offs, #64 + add src, sha1_digest, mh_segs + ld1 {VA.4S}, [src], offs + ld1 {VB.4S}, [src], offs + ld1 {VC.4S}, [src], offs + ld1 {VD.4S}, [src], offs + ld1 {VE.4S}, [src], offs + mov block_ctr,num_blocks + +.block_loop: + sha1_single + subs block_ctr, block_ctr, 1 + bne .block_loop + + mov offs, #64 + add dst, sha1_digest, mh_segs + st1 {VA.4S}, [dst], offs + st1 {VB.4S}, [dst], offs + st1 {VC.4S}, [dst], offs + st1 {VD.4S}, [dst], offs + st1 {VE.4S}, [dst], offs + + add mh_segs, mh_segs, #16 + cmp mh_segs, #64 + bne .seg_loops + + /* save murmur-hash digest */ + str mur_hash1, [mur_digest], #8 + str mur_hash2, [mur_digest] + + ldp x21, x22, [sp, 16] + ldp x23, x24, [sp, 32] + ldp x19, x20, [sp], 48 + sha1_asimd_restore_stack +.return: + ret + + .size mh_sha1_murmur3_block_asimd, .-mh_sha1_murmur3_block_asimd + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 +N1: + .word 0x52dce729 + .word 0x52dce729 + .word 0x52dce729 + .word 0x52dce729 +N2: + .word 0x38495ab5 + .word 0x38495ab5 + .word 0x38495ab5 + .word 0x38495ab5 +C1: + .dword 0x87c37b91114253d5 + .dword 0x87c37b91114253d5 +C2: + .dword 0x4cf5ad432745937f + .dword 0x4cf5ad432745937f diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S new file mode 100644 index 000000000..7f4256e20 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_block_ce.S @@ -0,0 +1,482 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm + + + +/* +Variable list +*/ + + declare_var_vector_reg lane0_msg_0, 0 + declare_var_vector_reg lane1_msg_0, 1 + declare_var_vector_reg lane2_msg_0, 2 + declare_var_vector_reg lane3_msg_0, 3 + declare_var_vector_reg lane0_msg_1, 4 + declare_var_vector_reg lane1_msg_1, 5 + declare_var_vector_reg lane2_msg_1, 6 + declare_var_vector_reg lane3_msg_1, 7 + declare_var_vector_reg lane0_msg_2, 8 + declare_var_vector_reg lane1_msg_2, 9 + declare_var_vector_reg lane2_msg_2,10 + declare_var_vector_reg lane3_msg_2,11 + declare_var_vector_reg lane0_msg_3,12 + declare_var_vector_reg lane1_msg_3,13 + declare_var_vector_reg lane2_msg_3,14 + declare_var_vector_reg lane3_msg_3,15 + + declare_var_vector_reg lane0_abcd ,16 + declare_var_vector_reg lane1_abcd ,17 + declare_var_vector_reg lane2_abcd ,18 + declare_var_vector_reg lane3_abcd ,19 + declare_var_vector_reg lane0_tmp0 ,20 + declare_var_vector_reg lane1_tmp0 ,21 + declare_var_vector_reg lane2_tmp0 ,22 + declare_var_vector_reg lane3_tmp0 ,23 + declare_var_vector_reg lane0_tmp1 ,24 + declare_var_vector_reg lane1_tmp1 ,25 + declare_var_vector_reg lane2_tmp1 ,26 + declare_var_vector_reg lane3_tmp1 ,27 + + + declare_var_vector_reg e0 ,28 + declare_var_vector_reg e1 ,29 + declare_var_vector_reg key ,30 + declare_var_vector_reg tmp ,31 + + key_adr .req x5 + msg_adr .req x6 + block_cnt .req x7 + offs .req x8 + mur_n1 .req x9 + mur_n1_w .req w9 + mur_n2 .req x10 + mur_n2_w .req w10 + mur_hash1 .req x11 + mur_hash2 .req x12 + mur_c1 .req x13 + mur_c2 .req x14 + mur_data1 .req x15 + + digest_adr .req x16 + tmp0_adr .req x17 + tmp1_adr .req x18 + mur_data2 .req x19 + mur_data .req x20 + +.macro murmur3_00 + ldp mur_data1, mur_data2, [mur_data], #16 + mul mur_data1, mur_data1, mur_c1 + mul mur_data2, mur_data2, mur_c2 +.endm + +.macro murmur3_01 + /* rotate left by 31 bits */ + ror mur_data1, mur_data1, #64-31 + /* rotate left by 33 bits */ + ror mur_data2, mur_data2, #64-33 + mul mur_data1, mur_data1, mur_c2 + mul mur_data2, mur_data2, mur_c1 +.endm + +.macro murmur3_02 + eor mur_hash1, mur_hash1, mur_data1 + /* rotate left by 27 bits */ + ror mur_hash1, mur_hash1, #64-27 + add mur_hash1, mur_hash1, mur_hash2 + // mur_hash1 = mur_hash1 * 5 + N1 + add mur_hash1, mur_hash1, mur_hash1, LSL #2 + add mur_hash1, mur_n1, mur_hash1 +.endm + +.macro murmur3_03 + eor mur_hash2, mur_hash2, mur_data2 + /* rotate left by 31 bits */ + ror mur_hash2, mur_hash2, #64-31 + add mur_hash2, mur_hash2, mur_hash1 + // mur_hash2 = mur_hash2 * 5 + N2 + add mur_hash2, mur_hash2, mur_hash2, LSL #2 + add mur_hash2, mur_n2, mur_hash2 +.endm + +/** + * maros for round 4-67 + * the code execute 16 times per block, allowing the inserted murmur3 operation to process 256 bytes +*/ +.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req + sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s + sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s + sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s + sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s + mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0] + mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0] + mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0] + mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0] + mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0] + mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1] + mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2] + mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3] + \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s + murmur3_00 + \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s + murmur3_01 + \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s + murmur3_02 + \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s + murmur3_03 + ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr] + add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s + add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s + add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s + add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s + st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr] + sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s + sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s + sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s + sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s + sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s + sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s + sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s + sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s +.endm + + +/* + * void mh_sha1_murmur3_block_ce (const uint8_t * input_data, + * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + * uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + * uint32_t num_blocks); + * arg 0 pointer to input data + * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) + * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. + * arg 3 pointer to murmur3 digest + * arg 4 number of 1KB blocks + */ + +/* +Arguements list +*/ + input_data .req x0 + digests .req x1 + frame_buffer .req x2 + mur_digest .req x3 + num_blocks .req w4 + + .global mh_sha1_murmur3_block_ce + .type mh_sha1_murmur3_block_ce, %function +mh_sha1_murmur3_block_ce: + // save temp vector registers + stp d8, d9, [sp, -80]! + + stp d10, d11, [sp, 16] + stp d12, d13, [sp, 32] + stp d14, d15, [sp, 48] + stp x19, x20, [sp, 64] + + mov mur_data, input_data + ldr mur_hash1, [mur_digest] + ldr mur_hash2, [mur_digest, 8] + adr mur_c1, C1 + ldr mur_c1, [mur_c1] + adr mur_c2, C2 + ldr mur_c2, [mur_c2] + adr tmp0_adr, N1 + ldr mur_n1_w, [tmp0_adr] + adr tmp0_adr, N2 + ldr mur_n2_w, [tmp0_adr] + + mov tmp0_adr,frame_buffer + add tmp1_adr,tmp0_adr,128 + + +start_loop: + mov block_cnt,0 + mov msg_adr,input_data +lane_loop: + mov offs,64 + adr key_adr,KEY_0 + // load msg 0 + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs + + add digest_adr,digests,block_cnt + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + ldr e0_q,[digest_adr] + + // load key_0 + ldr key_q,[key_adr] + + rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b + rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b + rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b + rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b + rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b + rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b + rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b + rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b + rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b + rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b + rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b + rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b + rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b + rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b + rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b + rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b + + add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s + st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + + add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s + add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s + add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s + add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s + + /* rounds 0-3 */ + sha1h lane0_tmp1_s,lane0_abcd_s + sha1h lane1_tmp1_s,lane1_abcd_s + sha1h lane2_tmp1_s,lane2_abcd_s + sha1h lane3_tmp1_s,lane3_abcd_s + mov e1_v.S[0],lane0_tmp1_v.S[0] + mov e1_v.S[1],lane1_tmp1_v.S[0] + mov e1_v.S[2],lane2_tmp1_v.S[0] + mov e1_v.S[3],lane3_tmp1_v.S[0] + mov lane0_tmp1_v.S[0],e0_v.S[0] + mov lane1_tmp1_v.S[0],e0_v.S[1] + mov lane2_tmp1_v.S[0],e0_v.S[2] + mov lane3_tmp1_v.S[0],e0_v.S[3] + sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s + sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s + sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s + sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s + ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s + sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s + add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s + sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s + add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s + sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s + add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s + sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s + st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr] + + sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */ + sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + + + adr key_adr,KEY_1 + ldr key_q,[key_adr] + sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */ + sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */ + sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + + adr key_adr,KEY_2 + ldr key_q,[key_adr] + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + adr key_adr,KEY_3 + ldr key_q,[key_adr] + sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + // msg2 and msg1 are free + mov lane0_msg_2_v.S[0],e1_v.S[0] + mov lane1_msg_2_v.S[0],e1_v.S[1] + mov lane2_msg_2_v.S[0],e1_v.S[2] + mov lane3_msg_2_v.S[0],e1_v.S[3] + + /* rounds 68-71 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s + sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s + sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s + sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s + sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s + + /* rounds 72-75 */ + sha1h lane0_msg_2_s,lane0_abcd_s + sha1h lane1_msg_2_s,lane1_abcd_s + sha1h lane2_msg_2_s,lane2_abcd_s + sha1h lane3_msg_2_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s + sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s + sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s + sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s + + /* rounds 76-79 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add digest_adr,digests,block_cnt + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr] + + add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S + add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S + add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S + add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S + + add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S + add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S + add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S + add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S + + add digest_adr,digests,block_cnt + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr] + + add block_cnt,block_cnt,16 + cmp block_cnt,64 + add msg_adr,input_data,block_cnt + add digest_adr,digests,block_cnt + bcc lane_loop + + subs num_blocks,num_blocks,1 + add input_data,input_data,1024 + bhi start_loop + + /* save murmur-hash digest */ + str mur_hash1, [mur_digest], #8 + str mur_hash2, [mur_digest] + +exit_func: + // restore temp register + ldp d10, d11, [sp, 16] + ldp d12, d13, [sp, 32] + ldp d14, d15, [sp, 48] + ldp x19, x20, [sp, 64] + ldp d8, d9, [sp], 80 + ret + + .size mh_sha1_murmur3_block_ce, .-mh_sha1_murmur3_block_ce + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + +N1: + .word 0x52dce729 + .word 0x52dce729 + .word 0x52dce729 + .word 0x52dce729 +N2: + .word 0x38495ab5 + .word 0x38495ab5 + .word 0x38495ab5 + .word 0x38495ab5 + +C1: + .dword 0x87c37b91114253d5 + .dword 0x87c37b91114253d5 +C2: + .dword 0x4cf5ad432745937f + .dword 0x4cf5ad432745937f diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c new file mode 100644 index 000000000..4da674fba --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_ce.c @@ -0,0 +1,54 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_murmur3_aarch64_internal.h" + +extern void mh_sha1_tail_ce(uint8_t * partial_buffer, uint32_t total_len, + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t * frame_buffer, + uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + +extern void mh_sha1_block_ce(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + +// mh_sha1_murmur3_update_ce.c +#define UPDATE_FUNCTION mh_sha1_murmur3_update_ce +#define BLOCK_FUNCTION mh_sha1_murmur3_block_ce +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +// mh_sha1_murmur3_finalize_ce.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_finalize_ce +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_ce +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S new file mode 100644 index 000000000..051a6157e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/mh_sha1_murmur3_multibinary.S @@ -0,0 +1,34 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#include "aarch64_multibinary.h" + +mbin_interface mh_sha1_murmur3_x64_128_update +mbin_interface mh_sha1_murmur3_x64_128_finalize diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S new file mode 100644 index 000000000..ccc66f41a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/aarch64/sha1_asimd_common.S @@ -0,0 +1,271 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +// macro F = (D ^ (B & (C ^ D))) +.macro FUNC_F0 + eor VF.16b, VC.16b, VD.16b + and VF.16b, VB.16b, VF.16b + eor VF.16b, VD.16b, VF.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F1 + eor VF.16b, VB.16b, VC.16b + eor VF.16b, VF.16b, VD.16b +.endm + +// F = ((B & C) | (B & D) | (C & D)) +.macro FUNC_F2 + and vT0.16b, VB.16b, VC.16b + and vT1.16b, VB.16b, VD.16b + and vT2.16b, VC.16b, VD.16b + orr VF.16b, vT0.16b, vT1.16b + orr VF.16b, VF.16b, vT2.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F3 + FUNC_F1 +.endm + +.altmacro +.macro load_next_word windex + .if \windex < 16 + load_x4_word \windex + .endif +.endm + +// FUNC_F0 is merged into STEP_00_15 for efficiency +.macro SHA1_STEP_00_15_F0 windex:req + rev32 WORD\windex\().16b,WORD\windex\().16b + next_word=\windex+1 + load_next_word %next_word + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, VA.4s, 32 - 5 + add VE.4s, VE.4s, VK.4s + sli VT.4s, VA.4s, 5 + eor VF.16b, VC.16b, VD.16b + add VE.4s, VE.4s, WORD\windex\().4s + and VF.16b, VB.16b, VF.16b + add VE.4s, VE.4s, VT.4s + eor VF.16b, VD.16b, VF.16b + ushr VT.4s, VB.4s, 32 - 30 + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + +.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req + eor vT0.16b,\reg_3\().16b,\reg_8\().16b + eor VT.16b,\reg_14\().16b,\reg_16\().16b + sha1_step_16_79_interleave0 \windex + eor vT0.16b,vT0.16b,VT.16b + sha1_step_16_79_interleave1 \windex + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, vT0.4s, 32 - 1 + add VE.4s, VE.4s, VK.4s + ushr vT1.4s, VA.4s, 32 - 5 + sli VT.4s, vT0.4s, 1 + add VE.4s, VE.4s, VT.4s + sli vT1.4s, VA.4s, 5 + mov \reg_16\().16b,VT.16b + add VE.4s, VE.4s, vT1.4s + ushr VT.4s, VB.4s, 32 - 30 + \func_f + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + + VA .req v0 + VB .req v1 + VC .req v2 + VD .req v3 + VE .req v4 + VT .req v5 + VF .req v6 + VK .req v7 + WORD0 .req v8 + WORD1 .req v9 + WORD2 .req v10 + WORD3 .req v11 + WORD4 .req v12 + WORD5 .req v13 + WORD6 .req v14 + WORD7 .req v15 + WORD8 .req v16 + WORD9 .req v17 + WORD10 .req v18 + WORD11 .req v19 + WORD12 .req v20 + WORD13 .req v21 + WORD14 .req v22 + WORD15 .req v23 + vT0 .req v24 + vT1 .req v25 + vT2 .req v26 + vAA .req v27 + vBB .req v28 + vCC .req v29 + vDD .req v30 + vEE .req v31 + TT .req v0 + sha1key_adr .req x15 + +.macro SWAP_STATES + // shifted VB is held in VT after each step + .unreq TT + TT .req VE + .unreq VE + VE .req VD + .unreq VD + VD .req VC + .unreq VC + VC .req VT + .unreq VT + VT .req VB + .unreq VB + VB .req VA + .unreq VA + VA .req TT +.endm + +.altmacro +.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req + SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\() +.endm + +.macro exec_step windex:req + .if \windex <= 15 + SHA1_STEP_00_15_F0 windex + .else + idx14=((\windex - 14) & 15) + idx8=((\windex - 8) & 15) + idx3=((\windex - 3) & 15) + idx16=(\windex & 15) + .if \windex <= 19 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 20 && \windex <= 39 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 40 && \windex <= 59 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 60 && \windex <= 79 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16 + .endif + .endif + + SWAP_STATES + + .if \windex == 79 + // after 80 steps, the registers ABCDET has shifted from + // its orignal order of 012345 to 341520 + // have to swap back for both compile- and run-time correctness + mov v0.16b,v3.16b + .unreq VA + VA .req v0 + + mov vT0.16b,v2.16b + mov v2.16b,v1.16b + mov v1.16b,v4.16b + .unreq VB + VB .req v1 + .unreq VC + VC .req v2 + + mov v3.16b,v5.16b + .unreq VD + VD .req v3 + + mov v4.16b,vT0.16b + .unreq VE + VE .req v4 + + .unreq VT + VT .req v5 + .endif +.endm + +.macro exec_steps idx:req,more:vararg + exec_step \idx + .ifnb \more + exec_steps \more + .endif +.endm + +.macro sha1_single + load_x4_word 0 + + mov vAA.16B, VA.16B + mov vBB.16B, VB.16B + mov vCC.16B, VC.16B + mov vDD.16B, VD.16B + mov vEE.16B, VE.16B + + adr sha1key_adr, KEY_0 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 + + // 20 ~ 39 + adr sha1key_adr, KEY_1 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 + + // 40 ~ 59 + adr sha1key_adr, KEY_2 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 + + // 60 ~ 79 + adr sha1key_adr, KEY_3 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 + + add VA.4s, vAA.4s, VA.4s + add VB.4s, vBB.4s, VB.4s + add VC.4s, vCC.4s, VC.4s + add VD.4s, vDD.4s, VD.4s + add VE.4s, vEE.4s, VE.4s +.endm + +.macro sha1_asimd_save_stack + stp d8,d9,[sp, -64]! + stp d10,d11,[sp, 16] + stp d12,d13,[sp, 32] + stp d14,d15,[sp, 48] +.endm + +.macro sha1_asimd_restore_stack + ldp d10,d11,[sp, 16] + ldp d12,d13,[sp, 32] + ldp d14,d15,[sp, 48] + ldp d8,d9,[sp],64 +.endm diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c new file mode 100644 index 000000000..518adb797 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128.c @@ -0,0 +1,154 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_murmur3_x64_128_internal.h" + +int mh_sha1_murmur3_x64_128_init(struct mh_sha1_murmur3_x64_128_ctx *ctx, uint64_t murmur_seed) +{ + uint64_t *murmur3_x64_128_hash; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint32_t i; + + if (ctx == NULL) + return MH_SHA1_MURMUR3_CTX_ERROR_NULL; + + memset(ctx, 0, sizeof(*ctx)); + + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + for (i = 0; i < HASH_SEGS; i++) { + mh_sha1_segs_digests[0][i] = MH_SHA1_H0; + mh_sha1_segs_digests[1][i] = MH_SHA1_H1; + mh_sha1_segs_digests[2][i] = MH_SHA1_H2; + mh_sha1_segs_digests[3][i] = MH_SHA1_H3; + mh_sha1_segs_digests[4][i] = MH_SHA1_H4; + } + + murmur3_x64_128_hash = (uint64_t *) ctx->murmur3_x64_128_digest; + murmur3_x64_128_hash[0] = murmur_seed; + murmur3_x64_128_hash[1] = murmur_seed; + + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; +} + +void mh_sha1_murmur3_x64_128_block_base(const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t + murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks) +{ + + mh_sha1_block_base(input_data, mh_sha1_digests, frame_buffer, num_blocks); + + murmur3_x64_128_block(input_data, + num_blocks * MH_SHA1_BLOCK_SIZE / MUR_BLOCK_SIZE, + murmur3_x64_128_digests); + + return; +} + +#if (!defined(NOARCH)) && (defined(__i386__) || defined(__x86_64__) \ + || defined( _M_X64) || defined(_M_IX86)) +/***************mh_sha1_murmur3_x64_128_update***********/ +// mh_sha1_murmur3_x64_128_update_sse.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_sse +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_sse +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +// mh_sha1_murmur3_x64_128_update_avx.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +// mh_sha1_murmur3_x64_128_update_avx2.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx2 +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx2 +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +/***************mh_sha1_murmur3_x64_128_finalize***********/ +// mh_sha1_murmur3_x64_128_finalize_sse.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_sse +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +// mh_sha1_murmur3_x64_128_finalize_avx.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +// mh_sha1_murmur3_x64_128_finalize_avx2.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx2 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2 +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +/***************version info***********/ + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// Version info +struct slver mh_sha1_murmur3_x64_128_init_slver_00000251; +struct slver mh_sha1_murmur3_x64_128_init_slver = { 0x0251, 0x00, 0x00 }; + +// mh_sha1_murmur3_x64_128_update version info +struct slver mh_sha1_murmur3_x64_128_update_sse_slver_00000254; +struct slver mh_sha1_murmur3_x64_128_update_sse_slver = { 0x0254, 0x00, 0x00 }; + +struct slver mh_sha1_murmur3_x64_128_update_avx_slver_02000256; +struct slver mh_sha1_murmur3_x64_128_update_avx_slver = { 0x0256, 0x00, 0x02 }; + +struct slver mh_sha1_murmur3_x64_128_update_avx2_slver_04000258; +struct slver mh_sha1_murmur3_x64_128_update_avx2_slver = { 0x0258, 0x00, 0x04 }; + +// mh_sha1_murmur3_x64_128_finalize version info +struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver_00000255; +struct slver mh_sha1_murmur3_x64_128_finalize_sse_slver = { 0x0255, 0x00, 0x00 }; + +struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver_02000257; +struct slver mh_sha1_murmur3_x64_128_finalize_avx_slver = { 0x0257, 0x00, 0x02 }; + +struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver_04000259; +struct slver mh_sha1_murmur3_x64_128_finalize_avx2_slver = { 0x0259, 0x00, 0x04 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c new file mode 100644 index 000000000..fbef1ac13 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_avx512.c @@ -0,0 +1,67 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_murmur3_x64_128_internal.h" + +#ifdef HAVE_AS_KNOWS_AVX512 + +/***************mh_sha1_murmur3_x64_128_update***********/ +// mh_sha1_murmur3_x64_128_update_avx512.c +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_avx512 +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_avx512 +#include "mh_sha1_murmur3_x64_128_update_base.c" +#undef UPDATE_FUNCTION +#undef BLOCK_FUNCTION + +/***************mh_sha1_murmur3_x64_128_finalize***********/ +// mh_sha1_murmur3_x64_128_finalize_avx512.c +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_avx512 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512 +#include "mh_sha1_murmur3_x64_128_finalize_base.c" +#undef FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION + +/***************version info***********/ + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// mh_sha1_murmur3_x64_128_update version info +struct slver mh_sha1_murmur3_x64_128_update_avx512_slver_0600025c; +struct slver mh_sha1_murmur3_x64_128_update_avx512_slver = { 0x025c, 0x00, 0x06 }; + +// mh_sha1_murmur3_x64_128_finalize version info +struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver_0600025d; +struct slver mh_sha1_murmur3_x64_128_finalize_avx512_slver = { 0x025d, 0x00, 0x06 }; + +#endif // HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c new file mode 100644 index 000000000..28f15086d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_base_aliases.c @@ -0,0 +1,43 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include "mh_sha1_murmur3_x64_128_internal.h" +#include <string.h> +int mh_sha1_murmur3_x64_128_update(struct mh_sha1_murmur3_x64_128_ctx *ctx, const void *buffer, + uint32_t len) +{ + return mh_sha1_murmur3_x64_128_update_base(ctx, buffer, len); + +} + +int mh_sha1_murmur3_x64_128_finalize(struct mh_sha1_murmur3_x64_128_ctx *ctx, + void *mh_sha1_digest, void *murmur3_x64_128_digest) +{ + return mh_sha1_murmur3_x64_128_finalize_base(ctx, mh_sha1_digest, + murmur3_x64_128_digest); +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm new file mode 100644 index 000000000..4611494e0 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx.asm @@ -0,0 +1,706 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX +;; + +%include "reg_sizes.asm" + +[bits 64] +default rel +section .text + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-(%%imm)) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; non-destructive +; PROLD_nd reg, imm, tmp, src +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-(%%imm)) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 16)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte. +;; So insert 1 murmur block into every 4 SHA1_STEP_16_79. +%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J + +%macro SHA1_STEP_16_79_0 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c1_r + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c2_r + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_1 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + rol mur_data1, R1 + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + rol mur_data2, R2 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c2_r + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c1_r + PROLD %%regB,30, %%regT + add mur_in_p, 16 + vpaddd %%regE,%%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_2 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash1, mur_data1 + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash1, R3 + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash1, mur_hash2 + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_3 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash2, mur_data2 + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash2, R4 + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash2, mur_hash1 + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + save_reg rbx, 10*16 + 6*8 + save_reg rbp, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 +;void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +mk_global mh_sha1_murmur3_x64_128_block_avx, function, internal +func(mh_sha1_murmur3_x64_128_block_avx) + endbranch + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by avx + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + VMOVPS A, [mh_digests_p + I*64 + 16*0] + VMOVPS B, [mh_digests_p + I*64 + 16*1] + VMOVPS C, [mh_digests_p + I*64 + 16*2] + VMOVPS D, [mh_digests_p + I*64 + 16*3] + + vmovdqa [rsp + I*64 + 16*0], A + vmovdqa [rsp + I*64 + 16*1], B + vmovdqa [rsp + I*64 + 16*2], C + vmovdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + VMOVPS T0,[mh_in_p + I*64+0*16] + VMOVPS T1,[mh_in_p + I*64+1*16] + VMOVPS T2,[mh_in_p + I*64+2*16] + VMOVPS T3,[mh_in_p + I*64+3*16] + + vpshufb T0, F + vmovdqa [mh_data_p +(I)*16 +0*256],T0 + vpshufb T1, F + vmovdqa [mh_data_p +(I)*16 +1*256],T1 + vpshufb T2, F + vmovdqa [mh_data_p +(I)*16 +2*256],T2 + vpshufb T3, F + vmovdqa [mh_data_p +(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A, AA + vpaddd B, BB + vpaddd C, CC + vpaddd D, DD + vpaddd E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + vmovdqa A, [rsp + I*64 + 16*0] + vmovdqa B, [rsp + I*64 + 16*1] + vmovdqa C, [rsp + I*64 + 16*2] + vmovdqa D, [rsp + I*64 + 16*3] + + VMOVPS [mh_digests_p + I*64 + 16*0], A + VMOVPS [mh_digests_p + I*64 + 16*1], B + VMOVPS [mh_digests_p + I*64 + 16*2], C + VMOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm new file mode 100644 index 000000000..3fb440bf1 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx2.asm @@ -0,0 +1,653 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX2 +;; + +%include "reg_sizes.asm" + +[bits 64] +default rel +section .text + +;; Magic functions defined in FIPS 180-1 +;; +;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + + + +;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-%%imm) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-%%imm) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 32)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 512Byte. +;; So insert 1 murmur block into every 2 SHA1_STEP_16_79. +%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J + +%macro SHA1_STEP_16_79_0 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + + vpsrld %%regF, W16, (32-1) + imul mur_data1, mur_c1_r + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + imul mur_data2, mur_c2_r + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + rol mur_data1, R1 + vpaddd %%regE, %%regE,%%regF + rol mur_data2, R2 + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + imul mur_data1, mur_c2_r + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + imul mur_data2, mur_c1_r + vpaddd %%regE,%%regE,%%regF +%endmacro + + +%macro SHA1_STEP_16_79_1 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + xor mur_hash1, mur_data1 + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + rol mur_hash1, R3 + vpxor W16, W16, W14 + add mur_hash1, mur_hash2 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + xor mur_hash2, mur_data2 + vpor %%regF, %%regF, W16 + rol mur_hash2, R4 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + vpaddd %%regE, %%regE,%%regF + add mur_hash2, mur_hash1 + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + add mur_in_p, 16 + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + save_reg rbx, 10*16 + 6*8 + save_reg rbp, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp8 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A ymm0 +%define B ymm1 +%define C ymm2 +%define D ymm3 +%define E ymm4 + +%define F ymm5 +%define T0 ymm6 +%define T1 ymm7 +%define T2 ymm8 +%define T3 ymm9 +%define T4 ymm10 +%define T5 ymm11 +%define T6 ymm12 +%define T7 ymm13 +%define T8 ymm14 +%define T9 ymm15 + +%define AA ymm5 +%define BB ymm6 +%define CC ymm7 +%define DD ymm8 +%define EE ymm9 +%define TMP ymm10 +%define FUN ymm11 +%define K ymm12 +%define W14 ymm13 +%define W15 ymm14 +%define W16 ymm15 + + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 +;void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +mk_global mh_sha1_murmur3_x64_128_block_avx2, function, internal +func(mh_sha1_murmur3_x64_128_block_avx2) + endbranch + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 32 Bytes needed by avx2 + and rsp, ~0x1F + + %assign I 0 ; copy segs_digests into stack + %rep 2 + VMOVPS A, [mh_digests_p + I*32*5 + 32*0] + VMOVPS B, [mh_digests_p + I*32*5 + 32*1] + VMOVPS C, [mh_digests_p + I*32*5 + 32*2] + VMOVPS D, [mh_digests_p + I*32*5 + 32*3] + VMOVPS E, [mh_digests_p + I*32*5 + 32*4] + + vmovdqa [rsp + I*32*5 + 32*0], A + vmovdqa [rsp + I*32*5 + 32*1], B + vmovdqa [rsp + I*32*5 + 32*2], C + vmovdqa [rsp + I*32*5 + 32*3], D + vmovdqa [rsp + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2 +%assign I 0 +%rep 16 + VMOVPS T0,[mh_in_p + I*64+0*32] + VMOVPS T1,[mh_in_p + I*64+1*32] + + vpshufb T0, T0, F + vmovdqa [mh_data_p +I*32+0*512],T0 + vpshufb T1, T1, F + vmovdqa [mh_data_p +I*32+1*512],T1 +%assign I (I+1) +%endrep + + mov mh_segs, 0 ;start from the first 8 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS +%assign I (I+1) +%endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32] + %rep 4 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*2] + PREFETCH_X [mh_in_p + pref+128*3] +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + %assign J (I % 2) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A,A, AA + vpaddd B,B, BB + vpaddd C,C, CC + vpaddd D,D, DD + vpaddd E,E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add pref, 512 + + add mh_data_p, 512 + add mh_segs, 32 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 2 + vmovdqa A, [rsp + I*32*5 + 32*0] + vmovdqa B, [rsp + I*32*5 + 32*1] + vmovdqa C, [rsp + I*32*5 + 32*2] + vmovdqa D, [rsp + I*32*5 + 32*3] + vmovdqa E, [rsp + I*32*5 + 32*4] + + VMOVPS [mh_digests_p + I*32*5 + 32*0], A + VMOVPS [mh_digests_p + I*32*5 + 32*1], B + VMOVPS [mh_digests_p + I*32*5 + 32*2], C + VMOVPS [mh_digests_p + I*32*5 + 32*3], D + VMOVPS [mh_digests_p + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=32 + +align 32 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + dq 0x0405060700010203, 0x0c0d0e0f08090a0b +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 + dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm new file mode 100644 index 000000000..a5c157078 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_avx512.asm @@ -0,0 +1,504 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX-512 +;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +[bits 64] +default rel +section .text + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovdqu64 +;SIMD variables definition +%define A zmm0 +%define B zmm1 +%define C zmm2 +%define D zmm3 +%define E zmm4 +%define HH0 zmm5 +%define HH1 zmm6 +%define HH2 zmm7 +%define HH3 zmm8 +%define HH4 zmm9 +%define KT zmm10 +%define XTMP0 zmm11 +%define XTMP1 zmm12 +%define SHUF_MASK zmm13 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;using extra 16 ZMM registers to place the inverse input data +%define W0 zmm16 +%define W1 zmm17 +%define W2 zmm18 +%define W3 zmm19 +%define W4 zmm20 +%define W5 zmm21 +%define W6 zmm22 +%define W7 zmm23 +%define W8 zmm24 +%define W9 zmm25 +%define W10 zmm26 +%define W11 zmm27 +%define W12 zmm28 +%define W13 zmm29 +%define W14 zmm30 +%define W15 zmm31 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;macros definition +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro PROCESS_LOOP 2 +%define %%WT %1 +%define %%F_IMMED %2 + + ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt + ; E=D, D=C, C=ROTL_30(B), B=A, A=T + + ; Ft + ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) + ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D + ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) + + vmovdqa32 XTMP1, B ; Copy B + vpaddd E, E, %%WT ; E = E + Wt + vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) + vpaddd E, E, KT ; E = E + Wt + Kt + vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) + vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt + vprold B, B, 30 ; B = ROTL_30(B) + vpaddd E, E, XTMP0 ; E = T + + ROTATE_ARGS +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls PROCESS_LOOP 80 and +;; MSG_SCHED_ROUND_16_79 64 times and processes 1024 Bytes. +;; So insert 1 murmur block per section_loop. +%macro PROCESS_LOOP_MUR 2 +%define %%WT %1 +%define %%F_IMMED %2 + + ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt + ; E=D, D=C, C=ROTL_30(B), B=A, A=T + + ; Ft + ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) + ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D + ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) + + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + vmovdqa32 XTMP1, B ; Copy B + imul mur_data1, mur_c1_r + imul mur_data2, mur_c2_r + vpaddd E, E, %%WT ; E = E + Wt + rol mur_data1, R1 + rol mur_data2, R2 + vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) + imul mur_data1, mur_c2_r + imul mur_data2, mur_c1_r + vpaddd E, E, KT ; E = E + Wt + Kt + xor mur_hash1, mur_data1 + add mur_in_p, 16 + vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) + rol mur_hash1, R3 + vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt + add mur_hash1, mur_hash2 + vprold B, B, 30 ; B = ROTL_30(B) + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + vpaddd E, E, XTMP0 ; E = T + xor mur_hash2, mur_data2 + + ROTATE_ARGS +%endmacro + +%macro MSG_SCHED_ROUND_16_79_MUR 4 +%define %%WT %1 +%define %%WTp2 %2 +%define %%WTp8 %3 +%define %%WTp13 %4 + ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16) + ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt) + vpternlogd %%WT, %%WTp2, %%WTp8, 0x96 + rol mur_hash2, R4 + vpxord %%WT, %%WT, %%WTp13 + add mur_hash2, mur_hash1 + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + vprold %%WT, %%WT, 1 +%endmacro + +%define APPEND(a,b) a %+ b +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + ; remove unwind info macros + %define func(x) x: + %macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp + 0*16], xmm6 + movdqa [rsp + 1*16], xmm7 + movdqa [rsp + 2*16], xmm8 + movdqa [rsp + 3*16], xmm9 + movdqa [rsp + 4*16], xmm10 + movdqa [rsp + 5*16], xmm11 + movdqa [rsp + 6*16], xmm12 + movdqa [rsp + 7*16], xmm13 + movdqa [rsp + 8*16], xmm14 + movdqa [rsp + 9*16], xmm15 + mov [rsp + 10*16 + 0*8], r12 + mov [rsp + 10*16 + 1*8], r13 + mov [rsp + 10*16 + 2*8], r14 + mov [rsp + 10*16 + 3*8], r15 + mov [rsp + 10*16 + 4*8], rdi + mov [rsp + 10*16 + 5*8], rsi + mov [rsp + 10*16 + 6*8], rbx + mov [rsp + 10*16 + 7*8], rbp + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 + +%define pref tmp8 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +[bits 64] +section .text +align 32 + +;void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +global mh_sha1_murmur3_x64_128_block_avx512 +func(mh_sha1_murmur3_x64_128_block_avx512) + endbranch + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; align rsp to 64 Bytes needed by avx512 + and rsp, ~0x3f + + ; copy segs_digests into registers. + VMOVPS HH0, [mh_digests_p + 64*0] + VMOVPS HH1, [mh_digests_p + 64*1] + VMOVPS HH2, [mh_digests_p + 64*2] + VMOVPS HH3, [mh_digests_p + 64*3] + VMOVPS HH4, [mh_digests_p + 64*4] + ;a mask used to transform to big-endian data + vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK] + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + ;using extra 16 ZMM registers instead of stack +%assign I 0 +%rep 8 +%assign J (I+1) + VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64] + VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64] + + vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK + vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK +%assign I (I+2) +%endrep + + vmovdqa64 A, HH0 + vmovdqa64 B, HH1 + vmovdqa64 C, HH2 + vmovdqa64 D, HH3 + vmovdqa64 E, HH4 + + vmovdqa32 KT, [K00_19] +%assign I 0xCA +%assign J 0 +%assign K 2 +%assign L 8 +%assign M 13 +%assign N 0 +%rep 80 + %if N < 64 ; stitching 64 times + PROCESS_LOOP_MUR APPEND(W,J), I + MSG_SCHED_ROUND_16_79_MUR APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M) + %else ; 64 <= N < 80, without stitching + PROCESS_LOOP APPEND(W,J), I + %endif + %if N = 19 + vmovdqa32 KT, [K20_39] + %assign I 0x96 + %elif N = 39 + vmovdqa32 KT, [K40_59] + %assign I 0xE8 + %elif N = 59 + vmovdqa32 KT, [K60_79] + %assign I 0x96 + %endif + %if N % 20 = 19 + PREFETCH_X [mh_in_p + 1024+128*(N / 20)] + PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)] + %endif +%assign J ((J+1)% 16) +%assign K ((K+1)% 16) +%assign L ((L+1)% 16) +%assign M ((M+1)% 16) +%assign N (N+1) +%endrep + + ; Add old digest + vpaddd HH0,A, HH0 + vpaddd HH1,B, HH1 + vpaddd HH2,C, HH2 + vpaddd HH3,D, HH3 + vpaddd HH4,E, HH4 + + add mh_in_p, 1024 + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + ; copy segs_digests to mh_digests_p + VMOVPS [mh_digests_p + 64*0], HH0 + VMOVPS [mh_digests_p + 64*1], HH1 + VMOVPS [mh_digests_p + 64*2], HH2 + VMOVPS [mh_digests_p + 64*3], HH3 + VMOVPS [mh_digests_p + 64*4], HH4 + + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + + +section .data align=64 + +align 64 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + +K20_39: dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + +K40_59: dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + +K60_79: dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_sha1_murmur3_x64_128_block_avx512 +no_sha1_murmur3_x64_128_block_avx512: +%endif +%endif ; HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm new file mode 100644 index 000000000..ebd1b8b49 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_block_sse.asm @@ -0,0 +1,702 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using SSE +;; + +%include "reg_sizes.asm" + +[bits 64] +default rel +section .text + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regC + pxor %%regF,%%regD + pand %%regF,%%regB + pxor %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regD + pxor %%regF,%%regC + pxor %%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regB + movdqa %%regT,%%regB + por %%regF,%%regC + pand %%regT,%%regC + pand %%regF,%%regD + por %%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + movdqa %%tmp, %%reg + pslld %%reg, %%imm + psrld %%tmp, (32-%%imm) + por %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + paddd %%regE,[%%data + (%%memW * 16)] + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +;; Insert murmur's instructions into this macro. +;; Every section_loop of mh_sha1 calls SHA1_STEP_16_79 64 times and processes 256Byte. +;; So insert 1 murmur block into every 4 SHA1_STEP_16_79. +%define SHA1_STEP_16_79(J) SHA1_STEP_16_79_ %+ J + +%macro SHA1_STEP_16_79_0 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + mov mur_data1, [mur_in_p] + mov mur_data2, [mur_in_p + 8] + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c1_r + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c2_r + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_1 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + rol mur_data1, R1 + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + pslld W16, 1 + rol mur_data2, R2 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + imul mur_data1, mur_c2_r + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + imul mur_data2, mur_c1_r + PROLD %%regB,30, %%regT + add mur_in_p, 16 + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_2 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash1, mur_data1 + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash1, R3 + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash1, mur_hash2 + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + lea mur_hash1, [mur_hash1 + mur_hash1*4 + N1] + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79_3 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + xor mur_hash2, mur_data2 + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + rol mur_hash2, R4 + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + add mur_hash2, mur_hash1 + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + lea mur_hash2, [mur_hash2 + mur_hash2*4 + N2] + paddd %%regE,%%regF +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8d + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + %endmacro + %macro FUNC_RESTORE 0 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10d + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define tmp7 rbx ; must be saved and restored + %define tmp8 rbp ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 9*8 ; must be an odd multiple of 8 + %define PS 8 + %define arg(x) [rsp + stack_size + PS + PS*x] + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + save_reg rbx, 10*16 + 6*8 + save_reg rbp, 10*16 + 7*8 + end_prolog + mov arg4, arg(4) + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + mov rbx, [rsp + 10*16 + 6*8] + mov rbp, [rsp + 10*16 + 7*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg4 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables of murmur3 +%define mur_in_p tmp2 +%define mur_digest_p arg3 +%define mur_hash1 tmp3 +%define mur_hash2 tmp4 +%define mur_data1 tmp5 +%define mur_data2 return +%define mur_c1_r tmp6 +%define mur_c2_r arg5 +; constants of murmur3_x64_128 +%define R1 31 +%define R2 33 +%define R3 27 +%define R4 31 +%define M 5 +%define N1 0x52dce729;DWORD +%define N2 0x38495ab5;DWORD +%define C1 QWORD(0x87c37b91114253d5) +%define C2 QWORD(0x4cf5ad432745937f) +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp7 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define MOVPS movups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 +;void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data, +; uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], +; uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], +; uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 pointer to murmur3 digest +; arg 4 number of 1KB blocks +; +mk_global mh_sha1_murmur3_x64_128_block_sse, function, internal +func(mh_sha1_murmur3_x64_128_block_sse) + endbranch + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by sse + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + MOVPS A, [mh_digests_p + I*64 + 16*0] + MOVPS B, [mh_digests_p + I*64 + 16*1] + MOVPS C, [mh_digests_p + I*64 + 16*2] + MOVPS D, [mh_digests_p + I*64 + 16*3] + + movdqa [rsp + I*64 + 16*0], A + movdqa [rsp + I*64 + 16*1], B + movdqa [rsp + I*64 + 16*2], C + movdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + + ;init murmur variables + mov mur_in_p, mh_in_p ;different steps between murmur and mh_sha1 + ;load murmur hash digests and multiplier + mov mur_hash1, [mur_digest_p] + mov mur_hash2, [mur_digest_p + 8] + mov mur_c1_r, C1 + mov mur_c2_r, C2 + +.block_loop: + ;transform to big-endian data and store on aligned_frame + movdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + MOVPS T0,[mh_in_p+I*64+0*16] + MOVPS T1,[mh_in_p+I*64+1*16] + MOVPS T2,[mh_in_p+I*64+2*16] + MOVPS T3,[mh_in_p+I*64+3*16] + + pshufb T0, F + movdqa [mh_data_p+(I)*16 +0*256],T0 + pshufb T1, F + movdqa [mh_data_p+(I)*16 +1*256],T1 + pshufb T2, F + movdqa [mh_data_p+(I)*16 +2*256],T2 + pshufb T3, F + movdqa [mh_data_p+(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + .segs_loop: + ;; Initialize digests + movdqa A, [rsp + 0*64 + mh_segs] + movdqa B, [rsp + 1*64 + mh_segs] + movdqa C, [rsp + 2*64 + mh_segs] + movdqa D, [rsp + 3*64 + mh_segs] + movdqa E, [rsp + 4*64 + mh_segs] + + movdqa AA, A + movdqa BB, B + movdqa CC, C + movdqa DD, D + movdqa EE, E +;; +;; perform 0-79 steps +;; + movdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 20...39 + movdqa K, [K20_39] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + movdqa K, [K40_59] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 60...79 + movdqa K, [K60_79] + %rep 20 + %assign J (I % 4) + SHA1_STEP_16_79(J) A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + paddd A, AA + paddd B, BB + paddd C, CC + paddd D, DD + paddd E, EE + + ; write out digests + movdqa [rsp + 0*64 + mh_segs], A + movdqa [rsp + 1*64 + mh_segs], B + movdqa [rsp + 2*64 + mh_segs], C + movdqa [rsp + 3*64 + mh_segs], D + movdqa [rsp + 4*64 + mh_segs], E + + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + ;store murmur-hash digest + mov [mur_digest_p], mur_hash1 + mov [mur_digest_p + 8], mur_hash2 + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + movdqa A, [rsp + I*64 + 16*0] + movdqa B, [rsp + I*64 + 16*1] + movdqa C, [rsp + I*64 + 16*2] + movdqa D, [rsp + I*64 + 16*3] + + MOVPS [mh_digests_p + I*64 + 16*0], A + MOVPS [mh_digests_p + I*64 + 16*1], B + MOVPS [mh_digests_p + I*64 + 16*2], C + MOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c new file mode 100644 index 000000000..4d09abf1d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_finalize_base.c @@ -0,0 +1,102 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef FINALIZE_FUNCTION +#include <stdlib.h> // For NULL +#include "mh_sha1_murmur3_x64_128_internal.h" + +#define FINALIZE_FUNCTION mh_sha1_murmur3_x64_128_finalize_base +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base +#define FINALIZE_FUNCTION_SLVER +#endif + +#define MURMUR_BLOCK_FUNCTION murmur3_x64_128_block +#define MURMUR_TAIL_FUNCTION murmur3_x64_128_tail + +int FINALIZE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, void *mh_sha1_digest, + void *murmur3_x64_128_digest) +{ + uint8_t *partial_block_buffer, *murmur_tail_data; + uint64_t partial_block_len, total_len; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + + if (ctx == NULL) + return MH_SHA1_MURMUR3_CTX_ERROR_NULL; + + total_len = ctx->total_length; + partial_block_len = total_len % MH_SHA1_BLOCK_SIZE; + partial_block_buffer = ctx->partial_block_buffer; + + // Calculate murmur3 firstly + // because mh_sha1 will change the partial_block_buffer + // ( partial_block_buffer = n murmur3 blocks and 1 murmur3 tail) + murmur_tail_data = + partial_block_buffer + partial_block_len - partial_block_len % MUR_BLOCK_SIZE; + MURMUR_BLOCK_FUNCTION(partial_block_buffer, partial_block_len / MUR_BLOCK_SIZE, + ctx->murmur3_x64_128_digest); + MURMUR_TAIL_FUNCTION(murmur_tail_data, total_len, ctx->murmur3_x64_128_digest); + + /* mh_sha1 final */ + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + + MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests, + aligned_frame_buffer, ctx->mh_sha1_digest); + + /* Output the digests of murmur3 and mh_sha1 */ + if (mh_sha1_digest != NULL) { + ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0]; + ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1]; + ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2]; + ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3]; + ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4]; + } + + if (murmur3_x64_128_digest != NULL) { + ((uint32_t *) murmur3_x64_128_digest)[0] = ctx->murmur3_x64_128_digest[0]; + ((uint32_t *) murmur3_x64_128_digest)[1] = ctx->murmur3_x64_128_digest[1]; + ((uint32_t *) murmur3_x64_128_digest)[2] = ctx->murmur3_x64_128_digest[2]; + ((uint32_t *) murmur3_x64_128_digest)[3] = ctx->murmur3_x64_128_digest[3]; + } + + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; +} + +#ifdef FINALIZE_FUNCTION_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + + // Version info +struct slver mh_sha1_murmur3_x64_128_finalize_base_slver_0000025b; +struct slver mh_sha1_murmur3_x64_128_finalize_base_slver = { 0x025b, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h new file mode 100644 index 000000000..e77837347 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_internal.h @@ -0,0 +1,202 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_ +#define _MH_SHA1_MURMUR3_X64_128_INTERNAL_H_ + +/** + * @file mh_sha1_murmur3_x64_128_internal.h + * @brief mh_sha1_murmur3_x64_128 internal function prototypes and macros + * + * Interface for mh_sha1_murmur3_x64_128 internal functions + * + */ +#include <stdint.h> +#include "mh_sha1_internal.h" +#include "mh_sha1_murmur3_x64_128.h" + +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef _MSC_VER +# define inline __inline +#endif + + /******************************************************************* + * mh_sha1_murmur3_x64_128 API internal function prototypes + * Multiple versions of Update and Finalize functions are supplied which use + * multiple versions of block and tail process subfunctions. + ******************************************************************/ + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + // Each function needs an individual C or ASM file because they impact performance much. + //They will be called by mh_sha1_murmur3_x64_128_update_XXX. + void mh_sha1_murmur3_x64_128_block (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_base (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires SSE + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_sse (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires AVX + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_avx (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires AVX2 + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_avx2 (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + + /** + * @brief Calculate blocks which size is MH_SHA1_BLOCK_SIZE*N + * + * @requires AVX512 + * + * @param input_data Pointer to input data to be processed + * @param mh_sha1_digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param murmur3_x64_128_digests Murmur3 digest + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_murmur3_x64_128_block_avx512 (const uint8_t * input_data, + uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + uint32_t murmur3_x64_128_digests[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t num_blocks); + /******************************************************************* + * murmur hash API + ******************************************************************/ + + /** + * @brief Calculate murmur digest of blocks which size is 16*N. + * @param input_data Pointer to input data to be processed + * @param num_blocks The number of blocks which size is 16. + * @param murmur3_x64_128_digests Murmur3 digest + * @returns none + * + */ + void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]); + + /** + * @brief Do the tail process which is less than 16Byte. + * @param tail_buffer Pointer to input data to be processed + * @param total_len The total length of the input_data + * @param digests Murmur3 digest + * @returns none + * + */ + void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm new file mode 100644 index 000000000..6f9e54cdd --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_multibinary.asm @@ -0,0 +1,76 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%include "reg_sizes.asm" +%include "multibinary.asm" + +%ifidn __OUTPUT_FORMAT__, elf32 + [bits 32] +%else + default rel + [bits 64] + + extern mh_sha1_murmur3_x64_128_update_sse + extern mh_sha1_murmur3_x64_128_update_avx + extern mh_sha1_murmur3_x64_128_update_avx2 + extern mh_sha1_murmur3_x64_128_finalize_sse + extern mh_sha1_murmur3_x64_128_finalize_avx + extern mh_sha1_murmur3_x64_128_finalize_avx2 + + %ifdef HAVE_AS_KNOWS_AVX512 + extern mh_sha1_murmur3_x64_128_update_avx512 + extern mh_sha1_murmur3_x64_128_finalize_avx512 + %endif + +%endif + +extern mh_sha1_murmur3_x64_128_update_base +extern mh_sha1_murmur3_x64_128_finalize_base + +mbin_interface mh_sha1_murmur3_x64_128_update +mbin_interface mh_sha1_murmur3_x64_128_finalize + +%ifidn __OUTPUT_FORMAT__, elf64 + + %ifdef HAVE_AS_KNOWS_AVX512 + mbin_dispatch_init6 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2, mh_sha1_murmur3_x64_128_update_avx512 + mbin_dispatch_init6 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2, mh_sha1_murmur3_x64_128_finalize_avx512 + %else + mbin_dispatch_init5 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base, mh_sha1_murmur3_x64_128_update_sse, mh_sha1_murmur3_x64_128_update_avx, mh_sha1_murmur3_x64_128_update_avx2 + mbin_dispatch_init5 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base, mh_sha1_murmur3_x64_128_finalize_sse, mh_sha1_murmur3_x64_128_finalize_avx, mh_sha1_murmur3_x64_128_finalize_avx2 + %endif + +%else + mbin_dispatch_init2 mh_sha1_murmur3_x64_128_update, mh_sha1_murmur3_x64_128_update_base + mbin_dispatch_init2 mh_sha1_murmur3_x64_128_finalize, mh_sha1_murmur3_x64_128_finalize_base +%endif + +;;; func core, ver, snum +slversion mh_sha1_murmur3_x64_128_update, 00, 02, 0252 +slversion mh_sha1_murmur3_x64_128_finalize, 00, 02, 0253 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c new file mode 100644 index 000000000..77ebb964e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_perf.c @@ -0,0 +1,206 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1_murmur3_x64_128.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Loop many times over same +# define TEST_LEN 16*1024 +# define TEST_LOOPS 20000 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define TEST_LEN 32*1024*1024 +# define TEST_LOOPS 100 +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif +#define TEST_MEM TEST_LEN + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \ + printf("The stitch function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest); + +void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest) +{ + mh_sha1_ref(buffer, len, mh_sha1_digest); + murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest); + + return; +} + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS], + uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + int murmur3_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) { + if (murmur3_test[i] != murmur3_base[i]) + murmur3_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + if (murmur3_fail) { + printf("murmur3 fail test\n"); + printf("base: "); + dump((char *)murmur3_base, 16); + printf("ref: "); + dump((char *)murmur3_test, 16); + } + + return mh_sha1_fail + murmur3_fail; +} + +int main(int argc, char *argv[]) +{ + int i, fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS], + murmur3_base[MURMUR3_x64_128_DIGEST_WORDS]; + uint8_t *buff = NULL; + struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL; + struct perf start, stop; + + printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n"); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + // mh_sha1_murmur3 base version + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base); + perf_start(&start); + for (i = 0; i < TEST_LOOPS / 10; i++) { + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + } + perf_stop(&stop); + printf("mh_sha1_murmur3_x64_128_base" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + //Update feature test + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + perf_start(&start); + for (i = 0; i < TEST_LOOPS; i++) { + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + } + perf_stop(&stop); + printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + // Check results + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d\n", TEST_LEN); + return -1; + } + + if (fail) + printf("Test failed function test%d\n", fail); + else + printf("Pass func check\n"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c new file mode 100644 index 000000000..22ab6d1f9 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_test.c @@ -0,0 +1,248 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1_murmur3_x64_128.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \ + printf("The stitch function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest); + +void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest) +{ + mh_sha1_ref(buffer, len, mh_sha1_digest); + murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest); + + return; +} + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS], + uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + int murmur3_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) { + if (murmur3_test[i] != murmur3_base[i]) + murmur3_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + if (murmur3_fail) { + printf("murmur3 fail test\n"); + printf("base: "); + dump((char *)murmur3_base, 16); + printf("ref: "); + dump((char *)murmur3_test, 16); + } + + return mh_sha1_fail + murmur3_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS], + murmur3_base[MURMUR3_x64_128_DIGEST_WORDS]; + uint8_t *buff = NULL; + int size, offset; + struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL; + + printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages + for (size = TEST_LEN; size >= 0; size--) { + + // Fill with rand data + rand_buffer(buff, size); + + mh_sha1_murmur3_x64_128_base(buff, size, TEST_SEED, hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + if ((size & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various buffer offsets and sizes + printf("offset tests"); + for (size = TEST_LEN - 256; size > 256; size -= 11) { + for (offset = 0; offset < 256; offset++) { + mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED, + hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = + compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d offset=%d\n", size, offset); + return -1; + } + + } + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + // Run efence tests + printf("efence tests"); + for (size = TEST_SIZE; size > 0; size--) { + offset = TEST_LEN - size; + mh_sha1_murmur3_x64_128_base(buff + offset, size, TEST_SEED, + hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size=%d offset=%d\n", size, offset); + return -1; + } + + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c new file mode 100644 index 000000000..0e7a3970d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_base.c @@ -0,0 +1,107 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef UPDATE_FUNCTION +#include "mh_sha1_murmur3_x64_128_internal.h" +#include <string.h> + +#define UPDATE_FUNCTION mh_sha1_murmur3_x64_128_update_base +#define BLOCK_FUNCTION mh_sha1_murmur3_x64_128_block_base +#define UPDATE_FUNCTION_SLVER +#endif + +int UPDATE_FUNCTION(struct mh_sha1_murmur3_x64_128_ctx *ctx, const void *buffer, uint32_t len) +{ + + uint8_t *partial_block_buffer; + uint64_t partial_block_len; + uint64_t num_blocks; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + uint32_t *murmur3_x64_128_digest; + const uint8_t *input_data = (const uint8_t *)buffer; + + if (ctx == NULL) + return MH_SHA1_MURMUR3_CTX_ERROR_NULL; + + if (len == 0) + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; + + partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE; + partial_block_buffer = ctx->partial_block_buffer; + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + murmur3_x64_128_digest = ctx->murmur3_x64_128_digest; + + ctx->total_length += len; + // No enough input data for mh_sha1 calculation + if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) { + memcpy(partial_block_buffer + partial_block_len, input_data, len); + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; + } + // mh_sha1 calculation for the previous partial block + if (partial_block_len != 0) { + memcpy(partial_block_buffer + partial_block_len, input_data, + MH_SHA1_BLOCK_SIZE - partial_block_len); + //do one_block process + BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests, + aligned_frame_buffer, murmur3_x64_128_digest, 1); + input_data += MH_SHA1_BLOCK_SIZE - partial_block_len; + len -= MH_SHA1_BLOCK_SIZE - partial_block_len; + memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + // Calculate mh_sha1 for the current blocks + num_blocks = len / MH_SHA1_BLOCK_SIZE; + if (num_blocks > 0) { + //do num_blocks process + BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer, + murmur3_x64_128_digest, num_blocks); + len -= num_blocks * MH_SHA1_BLOCK_SIZE; + input_data += num_blocks * MH_SHA1_BLOCK_SIZE; + } + // Store the partial block + if (len != 0) { + memcpy(partial_block_buffer, input_data, len); + } + + return MH_SHA1_MURMUR3_CTX_ERROR_NONE; + +} + +#ifdef UPDATE_FUNCTION_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + + // Version info +struct slver mh_sha1_murmur3_x64_128_update_base_slver_0000025a; +struct slver mh_sha1_murmur3_x64_128_update_base_slver = { 0x025a, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c new file mode 100644 index 000000000..6ae888e21 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/mh_sha1_murmur3_x64_128_update_test.c @@ -0,0 +1,272 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1_murmur3_x64_128.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_murmur3_x64_128_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_MURMUR3_CTX_ERROR_NONE){ \ + printf("The stitch function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +extern void murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest); + +void mh_sha1_murmur3_x64_128_base(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * mh_sha1_digest, uint32_t * murmur3_x64_128_digest) +{ + mh_sha1_ref(buffer, len, mh_sha1_digest); + murmur3_x64_128(buffer, len, murmur_seed, murmur3_x64_128_digest); + + return; +} + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS], + uint32_t murmur3_base[MURMUR3_x64_128_DIGEST_WORDS], + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + int murmur3_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + for (i = 0; i < MURMUR3_x64_128_DIGEST_WORDS; i++) { + if (murmur3_test[i] != murmur3_base[i]) + murmur3_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + if (murmur3_fail) { + printf("murmur3 fail test\n"); + printf("base: "); + dump((char *)murmur3_base, 16); + printf("ref: "); + dump((char *)murmur3_test, 16); + } + + return mh_sha1_fail + murmur3_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0, i; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint32_t murmur3_test[MURMUR3_x64_128_DIGEST_WORDS], + murmur3_base[MURMUR3_x64_128_DIGEST_WORDS]; + uint8_t *buff = NULL; + int update_count; + int size1, size2, offset, addr_offset; + struct mh_sha1_murmur3_x64_128_ctx *update_ctx = NULL; + uint8_t *mem_addr = NULL; + + printf(" " xstr(TEST_UPDATE_FUNCTION) "_test:"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, murmur3_base); + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages by update twice. + printf("\n various size messages by update twice tests"); + for (size1 = TEST_LEN; size1 >= 0; size1--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + + // subsequent update + size2 = TEST_LEN - size1; // size2 is different with the former + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various update count + printf("\n various update count tests"); + for (update_count = 1; update_count <= TEST_LEN; update_count++) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + + // subsequent update + size1 = TEST_LEN / update_count; + size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former + + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + for (i = 1, offset = 0; i < update_count; i++) { + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1)); + offset += size1; + } + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // test various start address of ctx. + printf("\n various start address of ctx test"); + free(update_ctx); + mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10); + for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_murmur3_x64_128_base(buff, TEST_LEN, TEST_SEED, hash_base, + murmur3_base); + + // a unaligned offset + update_ctx = (struct mh_sha1_murmur3_x64_128_ctx *)(mem_addr + addr_offset); + CHECK_RETURN(mh_sha1_murmur3_x64_128_init(update_ctx, TEST_SEED)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test, murmur3_test)); + + fail = compare_digests(hash_base, hash_test, murmur3_base, murmur3_test); + + if (fail) { + printf("Fail addr_offset=%d\n", addr_offset); + return -1; + } + + if ((addr_offset & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; + +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c new file mode 100644 index 000000000..f5fe30a83 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128.c @@ -0,0 +1,85 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdlib.h> // for NULL +#include "murmur3_x64_128_internal.c" + +#if (__GNUC__ >= 11) +# define OPT_FIX2 __attribute__ ((optimize(1))) +#else +# define OPT_FIX2 +#endif + +/******************************************************************* + * Single API which can calculate murmur3 + ******************************************************************/ +/** + * @brief Get the digest of murmur3_x64_128 through a single API. + * + * Using murmur3_x64_128_block and murmur3_x64_128_tail. + * Used to test the murmur3_x64_128 digest. + * + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param murmur_seed Seed as an initial digest of murmur3 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns none + * + */ +void OPT_FIX2 murmur3_x64_128(const void *buffer, uint32_t len, uint64_t murmur_seed, + uint32_t * murmur3_x64_128_digest) +{ + uint64_t *murmur3_x64_128_hash; + uint32_t murmur3_x64_128_hash_dword[4]; + uint8_t *tail_buffer; + const uint8_t *input_data = (const uint8_t *)buffer; + + // Initiate murmur3 + murmur3_x64_128_hash = (uint64_t *) murmur3_x64_128_hash_dword; + murmur3_x64_128_hash[0] = murmur_seed; + murmur3_x64_128_hash[1] = murmur_seed; + + // process bodies + murmur3_x64_128_block((uint8_t *) input_data, len / MUR_BLOCK_SIZE, + murmur3_x64_128_hash_dword); + + // process finalize + tail_buffer = (uint8_t *) input_data + len - len % MUR_BLOCK_SIZE; + murmur3_x64_128_tail(tail_buffer, len, murmur3_x64_128_hash_dword); + + // output the digests + if (murmur3_x64_128_digest != NULL) { + murmur3_x64_128_digest[0] = murmur3_x64_128_hash_dword[0]; + murmur3_x64_128_digest[1] = murmur3_x64_128_hash_dword[1]; + murmur3_x64_128_digest[2] = murmur3_x64_128_hash_dword[2]; + murmur3_x64_128_digest[3] = murmur3_x64_128_hash_dword[3]; + } + + return; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c new file mode 100644 index 000000000..67eabd0c4 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1_murmur3_x64_128/murmur3_x64_128_internal.c @@ -0,0 +1,138 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "mh_sha1_murmur3_x64_128_internal.h" +#include <stdlib.h> // for NULL + +/* murmur3_x64_128 constants */ +// Shift bits of circle rotate +#define MUR_SH1 31 +#define MUR_SH2 33 +#define MUR_SH3 27 +#define MUR_SH4 31 +#define MUR_SH5 33 + +#define MUR_MUL 5 +#define MUR_ADD1 0x52dce729 +#define MUR_ADD2 0x38495ab5 + +#define MUR_CON1 0x87c37b91114253d5LLU +#define MUR_CON2 0x4cf5ad432745937fLLU + +#define MUR_FMUL1 0xff51afd7ed558ccdLLU +#define MUR_FMUL2 0xc4ceb9fe1a85ec53LLU + +/* murmur3_x64_128 inline functions */ +static inline uint64_t blockmix64(uint64_t data, uint64_t conA, uint64_t conB, uint64_t shift) +{ + data *= conA; + data = (data << shift) | (data >> (64 - shift)); + data *= conB; + return data; +} + +static inline uint64_t hashmix64(uint64_t hashA, uint64_t hashB, uint64_t data, uint64_t add, + uint64_t shift) +{ + hashA ^= data; + hashA = (hashA << shift) | (hashA >> (64 - shift)); + hashA += hashB; + hashA = hashA * MUR_MUL + add; + return hashA; +} + +void murmur3_x64_128_block(const uint8_t * input_data, uint32_t num_blocks, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]) +{ + uint64_t data1, data2; + uint64_t *input_qword = (uint64_t *) input_data; + uint64_t *hash = (uint64_t *) digests; + uint32_t i = 0; + + while (i < num_blocks) { + data1 = input_qword[i * 2]; + data2 = input_qword[i * 2 + 1]; + data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1); + data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2); + hash[0] = hashmix64(hash[0], hash[1], data1, MUR_ADD1, MUR_SH3); + hash[1] = hashmix64(hash[1], hash[0], data2, MUR_ADD2, MUR_SH4); + i++; + } + + return; +} + +void murmur3_x64_128_tail(const uint8_t * tail_buffer, uint32_t total_len, + uint32_t digests[MURMUR3_x64_128_DIGEST_WORDS]) +{ + uint64_t data1, data2; + uint64_t *hash = (uint64_t *) digests; + uint64_t tail_len = total_len % 16; + uint8_t *tail = (uint8_t *) tail_buffer; + + union { + uint64_t hash[2]; + uint8_t hashB[16]; + } hashU; + + // tail + hashU.hash[0] = hashU.hash[1] = 0; + + while (tail_len-- > 0) + hashU.hashB[tail_len] = tail[tail_len]; + + data1 = hashU.hash[0]; + data2 = hashU.hash[1]; + + data1 = blockmix64(data1, MUR_CON1, MUR_CON2, MUR_SH1); + data2 = blockmix64(data2, MUR_CON2, MUR_CON1, MUR_SH2); + + hash[0] ^= total_len ^ data1; + hash[1] ^= total_len ^ data2; + + hash[0] += hash[1]; + hash[1] += hash[0]; + + hash[0] ^= hash[0] >> MUR_SH5; + hash[0] *= MUR_FMUL1; + hash[0] ^= hash[0] >> MUR_SH5; + hash[0] *= MUR_FMUL2; + hash[0] ^= hash[0] >> MUR_SH5; + + hash[1] ^= hash[1] >> MUR_SH5; + hash[1] *= MUR_FMUL1; + hash[1] ^= hash[1] >> MUR_SH5; + hash[1] *= MUR_FMUL2; + hash[1] ^= hash[1] >> MUR_SH5; + + hash[0] += hash[1]; + hash[1] += hash[0]; + + return; +} |