From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am | 83 ++++ .../mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c | 55 +++ .../isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c | 53 +++ .../mh_sha1/aarch64/mh_sha1_block_asimd.S | 124 +++++ .../mh_sha1/aarch64/mh_sha1_block_ce.S | 384 ++++++++++++++++ .../isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c | 53 +++ .../mh_sha1/aarch64/mh_sha1_multibinary.S | 35 ++ .../mh_sha1/aarch64/sha1_asimd_common.S | 269 +++++++++++ src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c | 141 ++++++ .../isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c | 70 +++ .../isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c | 40 ++ .../isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm | 506 ++++++++++++++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm | 508 +++++++++++++++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm | 406 ++++++++++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_block_base.c | 387 ++++++++++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm | 498 ++++++++++++++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c | 122 +++++ .../isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h | 308 +++++++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm | 77 ++++ .../isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c | 180 ++++++++ .../isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c | 430 +++++++++++++++++ .../isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c | 217 +++++++++ .../isa-l_crypto/mh_sha1/mh_sha1_update_base.c | 110 +++++ .../isa-l_crypto/mh_sha1/mh_sha1_update_test.c | 240 ++++++++++ .../isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c | 204 +++++++++ 25 files changed, 5500 insertions(+) create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c create mode 100644 src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c (limited to 'src/crypto/isa-l/isa-l_crypto/mh_sha1') diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am new file mode 100644 index 000000000..696e9c57d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am @@ -0,0 +1,83 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc_mh_sha1_base = \ + mh_sha1/mh_sha1_block_base.c \ + mh_sha1/mh_sha1_finalize_base.c \ + mh_sha1/mh_sha1_update_base.c \ + mh_sha1/sha1_for_mh_sha1.c \ + mh_sha1/mh_sha1.c + +lsrc_x86_64 += \ + $(lsrc_mh_sha1_base) \ + mh_sha1/mh_sha1_multibinary.asm \ + mh_sha1/mh_sha1_block_sse.asm \ + mh_sha1/mh_sha1_block_avx.asm \ + mh_sha1/mh_sha1_block_avx2.asm \ + mh_sha1/mh_sha1_block_avx512.asm \ + mh_sha1/mh_sha1_avx512.c + +lsrc_x86_32 += $(lsrc_x86_64) + +lsrc_aarch64 += \ + $(lsrc_mh_sha1_base) \ + mh_sha1/aarch64/mh_sha1_multibinary.S \ + mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c \ + mh_sha1/aarch64/mh_sha1_block_asimd.S \ + mh_sha1/aarch64/mh_sha1_asimd.c \ + mh_sha1/aarch64/mh_sha1_block_ce.S \ + mh_sha1/aarch64/mh_sha1_ce.c + +lsrc_base_aliases += \ + $(lsrc_mh_sha1_base) \ + mh_sha1/mh_sha1_base_aliases.c + +other_src += mh_sha1/mh_sha1_ref.c \ + include/reg_sizes.asm \ + include/multibinary.asm \ + include/test.h \ + mh_sha1/mh_sha1_internal.h + +src_include += -I $(srcdir)/mh_sha1 + +extern_hdrs += include/mh_sha1.h + +check_tests += mh_sha1/mh_sha1_test +unit_tests += mh_sha1/mh_sha1_update_test + +perf_tests += mh_sha1/mh_sha1_perf + + +mh_sha1_test: mh_sha1_ref.o +mh_sha1_mh_sha1_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la + +mh_sha1_update_test: mh_sha1_ref.o +mh_sha1_mh_sha1_update_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la + +mh_sha1_mh_sha1_perf_LDADD = libisal_crypto.la diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c new file mode 100644 index 000000000..2ad8871fa --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_aarch64_dispatcher.c @@ -0,0 +1,55 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include + +DEFINE_INTERFACE_DISPATCHER(mh_sha1_update) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(mh_sha1_update_ce); + + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mh_sha1_update_asimd); + + return PROVIDER_BASIC(mh_sha1_update); + +} + +DEFINE_INTERFACE_DISPATCHER(mh_sha1_finalize) +{ + unsigned long auxval = getauxval(AT_HWCAP); + if (auxval & HWCAP_SHA1) + return PROVIDER_INFO(mh_sha1_finalize_ce); + + if (auxval & HWCAP_ASIMD) + return PROVIDER_INFO(mh_sha1_finalize_asimd); + + return PROVIDER_BASIC(mh_sha1_finalize); + +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c new file mode 100644 index 000000000..c913a64df --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_asimd.c @@ -0,0 +1,53 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include "mh_sha1_internal.h" + +void mh_sha1_block_asimd(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +/***************mh_sha1_update***********/ +// mh_sha1_update_asimd.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_asimd +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail +// mh_sha1_finalize_asimd.c and mh_sha1_tail_asimd.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_asimd +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_asimd +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_asimd +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S new file mode 100644 index 000000000..22f716f27 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_asimd.S @@ -0,0 +1,124 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +#include "sha1_asimd_common.S" + +.macro load_x4_word idx:req + ld1 {WORD\idx\().16b},[segs_ptr] + add segs_ptr,segs_ptr,#64 +.endm + +/* + * void mh_sha1_block_asimd (const uint8_t * input_data, + * uint32_t mh_sha1_digests[SHA1_DIGEST_WORDS][HASH_SEGS], + * uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], + * uint32_t num_blocks); + * arg 0 pointer to input data + * arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) + * arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. + * arg 3 number of 1KB blocks + */ + + input_data .req x0 + sha1_digest .req x1 + data_buf .req x2 + num_blocks .req w3 + src .req x4 + dst .req x5 + offs .req x6 + mh_segs .req x7 + tmp .req x8 + segs_ptr .req x9 + block_ctr .req w10 + + .global mh_sha1_block_asimd + .type mh_sha1_block_asimd, %function +mh_sha1_block_asimd: + cmp num_blocks, #0 + beq .return + sha1_asimd_save_stack + + mov mh_segs, #0 +.seg_loops: + add segs_ptr,input_data,mh_segs + mov offs, #64 + add src, sha1_digest, mh_segs + ld1 {VA.4S}, [src], offs + ld1 {VB.4S}, [src], offs + ld1 {VC.4S}, [src], offs + ld1 {VD.4S}, [src], offs + ld1 {VE.4S}, [src], offs + mov block_ctr,num_blocks + +.block_loop: + sha1_single + subs block_ctr, block_ctr, 1 + bne .block_loop + + mov offs, #64 + add dst, sha1_digest, mh_segs + st1 {VA.4S}, [dst], offs + st1 {VB.4S}, [dst], offs + st1 {VC.4S}, [dst], offs + st1 {VD.4S}, [dst], offs + st1 {VE.4S}, [dst], offs + + add mh_segs, mh_segs, #16 + cmp mh_segs, #64 + bne .seg_loops + + sha1_asimd_restore_stack +.return: + ret + + .size mh_sha1_block_asimd, .-mh_sha1_block_asimd + .section .rodata.cst16,"aM",@progbits,16 + .align 16 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S new file mode 100644 index 000000000..12d3c5df2 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_block_ce.S @@ -0,0 +1,384 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + .arch armv8-a+crypto + .text + .align 2 + .p2align 3,,7 + +/* +Macros +*/ + +.macro declare_var_vector_reg name:req,reg:req + \name\()_q .req q\reg + \name\()_v .req v\reg + \name\()_s .req s\reg +.endm + + + +/* +Variable list +*/ + + declare_var_vector_reg lane0_msg_0, 0 + declare_var_vector_reg lane1_msg_0, 1 + declare_var_vector_reg lane2_msg_0, 2 + declare_var_vector_reg lane3_msg_0, 3 + declare_var_vector_reg lane0_msg_1, 4 + declare_var_vector_reg lane1_msg_1, 5 + declare_var_vector_reg lane2_msg_1, 6 + declare_var_vector_reg lane3_msg_1, 7 + declare_var_vector_reg lane0_msg_2, 8 + declare_var_vector_reg lane1_msg_2, 9 + declare_var_vector_reg lane2_msg_2,10 + declare_var_vector_reg lane3_msg_2,11 + declare_var_vector_reg lane0_msg_3,12 + declare_var_vector_reg lane1_msg_3,13 + declare_var_vector_reg lane2_msg_3,14 + declare_var_vector_reg lane3_msg_3,15 + + declare_var_vector_reg lane0_abcd ,16 + declare_var_vector_reg lane1_abcd ,17 + declare_var_vector_reg lane2_abcd ,18 + declare_var_vector_reg lane3_abcd ,19 + declare_var_vector_reg lane0_tmp0 ,20 + declare_var_vector_reg lane1_tmp0 ,21 + declare_var_vector_reg lane2_tmp0 ,22 + declare_var_vector_reg lane3_tmp0 ,23 + declare_var_vector_reg lane0_tmp1 ,24 + declare_var_vector_reg lane1_tmp1 ,25 + declare_var_vector_reg lane2_tmp1 ,26 + declare_var_vector_reg lane3_tmp1 ,27 + + + declare_var_vector_reg e0 ,28 + declare_var_vector_reg e1 ,29 + declare_var_vector_reg key ,30 + declare_var_vector_reg tmp ,31 + + key_adr .req x4 + msg_adr .req x5 + block_cnt .req x6 + offs .req x7 + digest_adr .req x16 + tmp0_adr .req x17 + tmp1_adr .req x18 + +/** +maros for round 4-67 +*/ +.macro sha1_4_rounds inst:req,msg0:req,msg1:req,msg2:req,msg3:req,abcd:req,e0:req,tmp0:req,e1:req,tmp1:req + sha1h lane0_\tmp0\()_s, lane0_\abcd\()_s + sha1h lane1_\tmp0\()_s, lane1_\abcd\()_s + sha1h lane2_\tmp0\()_s, lane2_\abcd\()_s + sha1h lane3_\tmp0\()_s, lane3_\abcd\()_s + mov \e0\()_v.S[0],lane0_\tmp0\()_v.S[0] + mov \e0\()_v.S[1],lane1_\tmp0\()_v.S[0] + mov \e0\()_v.S[2],lane2_\tmp0\()_v.S[0] + mov \e0\()_v.S[3],lane3_\tmp0\()_v.S[0] + mov lane0_\tmp0\()_v.S[0],\e1\()_v.S[0] + mov lane1_\tmp0\()_v.S[0],\e1\()_v.S[1] + mov lane2_\tmp0\()_v.S[0],\e1\()_v.S[2] + mov lane3_\tmp0\()_v.S[0],\e1\()_v.S[3] + \inst lane0_\abcd\()_q,lane0_\tmp0\()_s,lane0_\tmp1\()_v.4s + \inst lane1_\abcd\()_q,lane1_\tmp0\()_s,lane1_\tmp1\()_v.4s + \inst lane2_\abcd\()_q,lane2_\tmp0\()_s,lane2_\tmp1\()_v.4s + \inst lane3_\abcd\()_q,lane3_\tmp0\()_s,lane3_\tmp1\()_v.4s + ld1 {lane0_\tmp0\()_v.4s-lane3_\tmp0\()_v.4s},[\tmp0\()_adr] + add lane0_\tmp1\()_v.4s,lane0_\msg3\()_v.4s,key_v.4s + add lane1_\tmp1\()_v.4s,lane1_\msg3\()_v.4s,key_v.4s + add lane2_\tmp1\()_v.4s,lane2_\msg3\()_v.4s,key_v.4s + add lane3_\tmp1\()_v.4s,lane3_\msg3\()_v.4s,key_v.4s + st1 {lane0_\tmp1\()_v.4s-lane3_\tmp1\()_v.4s},[\tmp1\()_adr] + sha1su1 lane0_\msg0\()_v.4s,lane0_\msg3\()_v.4s + sha1su1 lane1_\msg0\()_v.4s,lane1_\msg3\()_v.4s + sha1su1 lane2_\msg0\()_v.4s,lane2_\msg3\()_v.4s + sha1su1 lane3_\msg0\()_v.4s,lane3_\msg3\()_v.4s + sha1su0 lane0_\msg1\()_v.4s,lane0_\msg2\()_v.4s,lane0_\msg3\()_v.4s + sha1su0 lane1_\msg1\()_v.4s,lane1_\msg2\()_v.4s,lane1_\msg3\()_v.4s + sha1su0 lane2_\msg1\()_v.4s,lane2_\msg2\()_v.4s,lane2_\msg3\()_v.4s + sha1su0 lane3_\msg1\()_v.4s,lane3_\msg2\()_v.4s,lane3_\msg3\()_v.4s + +.endm + + +/* + void mh_sha1_block_ce(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks) +*/ +/* +Arguements list +*/ + input_data .req x0 + digests .req x1 + frame_buffer .req x2 + num_blocks .req w3 + + .global mh_sha1_block_ce + .type mh_sha1_block_ce, %function +mh_sha1_block_ce: + //save temp vector registers + stp d8, d9, [sp, -128]! + + stp d10, d11, [sp, 16] + stp d12, d13, [sp, 32] + stp d14, d15, [sp, 48] + mov tmp0_adr,frame_buffer + add tmp1_adr,tmp0_adr,128 + + +start_loop: + mov block_cnt,0 + mov msg_adr,input_data +lane_loop: + mov offs,64 + adr key_adr,KEY_0 + //load msg 0 + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_2_v.S-lane3_msg_2_v.S}[3],[msg_adr],offs + + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[1],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[2],[msg_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[3],[msg_adr],offs + + add digest_adr,digests,block_cnt + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + ld4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + ldr e0_q,[digest_adr] + + //load key_0 + ldr key_q,[key_adr] + + rev32 lane0_msg_0_v.16b,lane0_msg_0_v.16b + rev32 lane1_msg_0_v.16b,lane1_msg_0_v.16b + rev32 lane2_msg_0_v.16b,lane2_msg_0_v.16b + rev32 lane3_msg_0_v.16b,lane3_msg_0_v.16b + rev32 lane0_msg_1_v.16b,lane0_msg_1_v.16b + rev32 lane1_msg_1_v.16b,lane1_msg_1_v.16b + rev32 lane2_msg_1_v.16b,lane2_msg_1_v.16b + rev32 lane3_msg_1_v.16b,lane3_msg_1_v.16b + rev32 lane0_msg_2_v.16b,lane0_msg_2_v.16b + rev32 lane1_msg_2_v.16b,lane1_msg_2_v.16b + rev32 lane2_msg_2_v.16b,lane2_msg_2_v.16b + rev32 lane3_msg_2_v.16b,lane3_msg_2_v.16b + rev32 lane0_msg_3_v.16b,lane0_msg_3_v.16b + rev32 lane1_msg_3_v.16b,lane1_msg_3_v.16b + rev32 lane2_msg_3_v.16b,lane2_msg_3_v.16b + rev32 lane3_msg_3_v.16b,lane3_msg_3_v.16b + + add lane0_tmp1_v.4s,lane0_msg_1_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_1_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_1_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_1_v.4s,key_v.4s + st1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + + add lane0_tmp0_v.4s,lane0_msg_0_v.4s,key_v.4s + add lane1_tmp0_v.4s,lane1_msg_0_v.4s,key_v.4s + add lane2_tmp0_v.4s,lane2_msg_0_v.4s,key_v.4s + add lane3_tmp0_v.4s,lane3_msg_0_v.4s,key_v.4s + + /* rounds 0-3 */ + sha1h lane0_tmp1_s,lane0_abcd_s + sha1h lane1_tmp1_s,lane1_abcd_s + sha1h lane2_tmp1_s,lane2_abcd_s + sha1h lane3_tmp1_s,lane3_abcd_s + mov e1_v.S[0],lane0_tmp1_v.S[0] + mov e1_v.S[1],lane1_tmp1_v.S[0] + mov e1_v.S[2],lane2_tmp1_v.S[0] + mov e1_v.S[3],lane3_tmp1_v.S[0] + mov lane0_tmp1_v.S[0],e0_v.S[0] + mov lane1_tmp1_v.S[0],e0_v.S[1] + mov lane2_tmp1_v.S[0],e0_v.S[2] + mov lane3_tmp1_v.S[0],e0_v.S[3] + sha1c lane0_abcd_q,lane0_tmp1_s,lane0_tmp0_v.4s + sha1c lane1_abcd_q,lane1_tmp1_s,lane1_tmp0_v.4s + sha1c lane2_abcd_q,lane2_tmp1_s,lane2_tmp0_v.4s + sha1c lane3_abcd_q,lane3_tmp1_s,lane3_tmp0_v.4s + ld1 {lane0_tmp1_v.4s-lane3_tmp1_v.4s},[tmp1_adr] + add lane0_tmp0_v.4s,lane0_msg_2_v.4s,key_v.4s + sha1su0 lane0_msg_0_v.4s,lane0_msg_1_v.4s,lane0_msg_2_v.4s + add lane1_tmp0_v.4s,lane1_msg_2_v.4s,key_v.4s + sha1su0 lane1_msg_0_v.4s,lane1_msg_1_v.4s,lane1_msg_2_v.4s + add lane2_tmp0_v.4s,lane2_msg_2_v.4s,key_v.4s + sha1su0 lane2_msg_0_v.4s,lane2_msg_1_v.4s,lane2_msg_2_v.4s + add lane3_tmp0_v.4s,lane3_msg_2_v.4s,key_v.4s + sha1su0 lane3_msg_0_v.4s,lane3_msg_1_v.4s,lane3_msg_2_v.4s + st1 {lane0_tmp0_v.4s-lane3_tmp0_v.4s},[tmp0_adr] + + sha1_4_rounds sha1c,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 4-7 */ + sha1_4_rounds sha1c,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + + + adr key_adr,KEY_1 + ldr key_q,[key_adr] + sha1_4_rounds sha1c,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 /* rounds 12-15 */ + sha1_4_rounds sha1c,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 20-23 */ + sha1_4_rounds sha1p,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + + adr key_adr,KEY_2 + ldr key_q,[key_adr] + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 36-39 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1m,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1m,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + adr key_adr,KEY_3 + ldr key_q,[key_adr] + sha1_4_rounds sha1m,msg_0,msg_1,msg_2,msg_3,abcd,e0,tmp0,e1,tmp1 /* rounds 52-55 */ + sha1_4_rounds sha1m,msg_1,msg_2,msg_3,msg_0,abcd,e1,tmp1,e0,tmp0 + sha1_4_rounds sha1p,msg_2,msg_3,msg_0,msg_1,abcd,e0,tmp0,e1,tmp1 + sha1_4_rounds sha1p,msg_3,msg_0,msg_1,msg_2,abcd,e1,tmp1,e0,tmp0 + + //msg2 and msg1 are free + mov lane0_msg_2_v.S[0],e1_v.S[0] + mov lane1_msg_2_v.S[0],e1_v.S[1] + mov lane2_msg_2_v.S[0],e1_v.S[2] + mov lane3_msg_2_v.S[0],e1_v.S[3] + + /* rounds 68-71 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add lane0_tmp1_v.4s,lane0_msg_3_v.4s,key_v.4s + add lane1_tmp1_v.4s,lane1_msg_3_v.4s,key_v.4s + add lane2_tmp1_v.4s,lane2_msg_3_v.4s,key_v.4s + add lane3_tmp1_v.4s,lane3_msg_3_v.4s,key_v.4s + sha1su1 lane0_msg_0_v.4s,lane0_msg_3_v.4s + sha1su1 lane1_msg_0_v.4s,lane1_msg_3_v.4s + sha1su1 lane2_msg_0_v.4s,lane2_msg_3_v.4s + sha1su1 lane3_msg_0_v.4s,lane3_msg_3_v.4s + + /* rounds 72-75 */ + sha1h lane0_msg_2_s,lane0_abcd_s + sha1h lane1_msg_2_s,lane1_abcd_s + sha1h lane2_msg_2_s,lane2_abcd_s + sha1h lane3_msg_2_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_1_s,lane0_tmp0_v.4s + sha1p lane1_abcd_q,lane1_msg_1_s,lane1_tmp0_v.4s + sha1p lane2_abcd_q,lane2_msg_1_s,lane2_tmp0_v.4s + sha1p lane3_abcd_q,lane3_msg_1_s,lane3_tmp0_v.4s + + /* rounds 76-79 */ + sha1h lane0_msg_1_s,lane0_abcd_s + sha1h lane1_msg_1_s,lane1_abcd_s + sha1h lane2_msg_1_s,lane2_abcd_s + sha1h lane3_msg_1_s,lane3_abcd_s + sha1p lane0_abcd_q,lane0_msg_2_s,lane0_tmp1_v.4s + sha1p lane1_abcd_q,lane1_msg_2_s,lane1_tmp1_v.4s + sha1p lane2_abcd_q,lane2_msg_2_s,lane2_tmp1_v.4s + sha1p lane3_abcd_q,lane3_msg_2_s,lane3_tmp1_v.4s + add digest_adr,digests,block_cnt + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[0],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[1],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[2],[digest_adr],offs + ld4 {lane0_msg_0_v.S-lane3_msg_0_v.S}[3],[digest_adr],offs + ld4 {lane0_msg_3_v.S-lane3_msg_3_v.S}[0],[digest_adr] + + add lane0_abcd_v.4S,lane0_abcd_v.4S,lane0_msg_0_v.4S + add lane1_abcd_v.4S,lane1_abcd_v.4S,lane1_msg_0_v.4S + add lane2_abcd_v.4S,lane2_abcd_v.4S,lane2_msg_0_v.4S + add lane3_abcd_v.4S,lane3_abcd_v.4S,lane3_msg_0_v.4S + + add lane0_msg_1_v.4S,lane0_msg_1_v.4S,lane0_msg_3_v.4S + add lane1_msg_1_v.4S,lane1_msg_1_v.4S,lane1_msg_3_v.4S + add lane2_msg_1_v.4S,lane2_msg_1_v.4S,lane2_msg_3_v.4S + add lane3_msg_1_v.4S,lane3_msg_1_v.4S,lane3_msg_3_v.4S + + add digest_adr,digests,block_cnt + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[0],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[1],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[2],[digest_adr],offs + st4 {lane0_abcd_v.S-lane3_abcd_v.S}[3],[digest_adr],offs + st4 {lane0_msg_1_v.S-lane3_msg_1_v.S}[0],[digest_adr] + + add block_cnt,block_cnt,16 + cmp block_cnt,64 + add msg_adr,input_data,block_cnt + add digest_adr,digests,block_cnt + bcc lane_loop + + subs num_blocks,num_blocks,1 + add input_data,input_data,1024 + bhi start_loop +exit_func: + //restore temp register + ldp d10, d11, [sp, 16] + ldp d12, d13, [sp, 32] + ldp d14, d15, [sp, 48] + ldp d8, d9, [sp], 128 + ret + + .size mh_sha1_block_ce, .-mh_sha1_block_ce + .section .rodata.cst16,"aM",@progbits,16 + .align 4 +KEY_0: + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 + .word 0x5a827999 +KEY_1: + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 + .word 0x6ed9eba1 +KEY_2: + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc + .word 0x8f1bbcdc +KEY_3: + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 + .word 0xca62c1d6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c new file mode 100644 index 000000000..c35daeab0 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_ce.c @@ -0,0 +1,53 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include +#include "mh_sha1_internal.h" + +void mh_sha1_block_ce(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +/***************mh_sha1_update***********/ +// mh_sha1_update_ce.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_ce +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail +// mh_sha1_finalize_ce.c and mh_sha1_tail_ce.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_ce +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_ce +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_ce +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S new file mode 100644 index 000000000..9a6d0caea --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/mh_sha1_multibinary.S @@ -0,0 +1,35 @@ +/********************************************************************** + Copyright(c) 2020 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#include "aarch64_multibinary.h" + + +mbin_interface mh_sha1_update +mbin_interface mh_sha1_finalize diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S new file mode 100644 index 000000000..c8b8dd982 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/aarch64/sha1_asimd_common.S @@ -0,0 +1,269 @@ +/********************************************************************** + Copyright(c) 2021 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + .arch armv8-a + +// macro F = (D ^ (B & (C ^ D))) +.macro FUNC_F0 + eor VF.16b, VC.16b, VD.16b + and VF.16b, VB.16b, VF.16b + eor VF.16b, VD.16b, VF.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F1 + eor VF.16b, VB.16b, VC.16b + eor VF.16b, VF.16b, VD.16b +.endm + +// F = ((B & C) | (B & D) | (C & D)) +.macro FUNC_F2 + and vT0.16b, VB.16b, VC.16b + and vT1.16b, VB.16b, VD.16b + and vT2.16b, VC.16b, VD.16b + orr VF.16b, vT0.16b, vT1.16b + orr VF.16b, VF.16b, vT2.16b +.endm + +// F = (B ^ C ^ D) +.macro FUNC_F3 + FUNC_F1 +.endm + +.altmacro +.macro load_next_word windex + .if \windex < 16 + load_x4_word \windex + .endif +.endm + +// FUNC_F0 is merged into STEP_00_15 for efficiency +.macro SHA1_STEP_00_15_F0 windex:req + rev32 WORD\windex\().16b,WORD\windex\().16b + next_word=\windex+1 + load_next_word %next_word + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, VA.4s, 32 - 5 + add VE.4s, VE.4s, VK.4s + sli VT.4s, VA.4s, 5 + eor VF.16b, VC.16b, VD.16b + add VE.4s, VE.4s, WORD\windex\().4s + and VF.16b, VB.16b, VF.16b + add VE.4s, VE.4s, VT.4s + eor VF.16b, VD.16b, VF.16b + ushr VT.4s, VB.4s, 32 - 30 + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + +.macro SHA1_STEP_16_79 windex:req,func_f:req,reg_3:req,reg_8:req,reg_14:req,reg_16:req + eor vT0.16b,\reg_3\().16b,\reg_8\().16b + eor VT.16b,\reg_14\().16b,\reg_16\().16b + eor vT0.16b,vT0.16b,VT.16b + // e = (a leftrotate 5) + f + e + k + w[i] + ushr VT.4s, vT0.4s, 32 - 1 + add VE.4s, VE.4s, VK.4s + ushr vT1.4s, VA.4s, 32 - 5 + sli VT.4s, vT0.4s, 1 + add VE.4s, VE.4s, VT.4s + sli vT1.4s, VA.4s, 5 + mov \reg_16\().16b,VT.16b + add VE.4s, VE.4s, vT1.4s + ushr VT.4s, VB.4s, 32 - 30 + \func_f + add VE.4s, VE.4s, VF.4s + sli VT.4s, VB.4s, 30 +.endm + + VA .req v0 + VB .req v1 + VC .req v2 + VD .req v3 + VE .req v4 + VT .req v5 + VF .req v6 + VK .req v7 + WORD0 .req v8 + WORD1 .req v9 + WORD2 .req v10 + WORD3 .req v11 + WORD4 .req v12 + WORD5 .req v13 + WORD6 .req v14 + WORD7 .req v15 + WORD8 .req v16 + WORD9 .req v17 + WORD10 .req v18 + WORD11 .req v19 + WORD12 .req v20 + WORD13 .req v21 + WORD14 .req v22 + WORD15 .req v23 + vT0 .req v24 + vT1 .req v25 + vT2 .req v26 + vAA .req v27 + vBB .req v28 + vCC .req v29 + vDD .req v30 + vEE .req v31 + TT .req v0 + sha1key_adr .req x15 + +.macro SWAP_STATES + // shifted VB is held in VT after each step + .unreq TT + TT .req VE + .unreq VE + VE .req VD + .unreq VD + VD .req VC + .unreq VC + VC .req VT + .unreq VT + VT .req VB + .unreq VB + VB .req VA + .unreq VA + VA .req TT +.endm + +.altmacro +.macro SHA1_STEP_16_79_WRAPPER windex:req,func_f:req,idx3:req,idx8:req,idx14:req,idx16:req + SHA1_STEP_16_79 \windex,\func_f,WORD\idx3\(),WORD\idx8\(),WORD\idx14\(),WORD\idx16\() +.endm + +.macro exec_step windex:req + .if \windex <= 15 + SHA1_STEP_00_15_F0 windex + .else + idx14=((\windex - 14) & 15) + idx8=((\windex - 8) & 15) + idx3=((\windex - 3) & 15) + idx16=(\windex & 15) + .if \windex <= 19 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F0,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 20 && \windex <= 39 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F1,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 40 && \windex <= 59 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F2,%idx3,%idx8,%idx14,%idx16 + .endif + .if \windex >= 60 && \windex <= 79 + SHA1_STEP_16_79_WRAPPER \windex,FUNC_F3,%idx3,%idx8,%idx14,%idx16 + .endif + .endif + + SWAP_STATES + + .if \windex == 79 + // after 80 steps, the registers ABCDET has shifted from + // its orignal order of 012345 to 341520 + // have to swap back for both compile- and run-time correctness + mov v0.16b,v3.16b + .unreq VA + VA .req v0 + + mov vT0.16b,v2.16b + mov v2.16b,v1.16b + mov v1.16b,v4.16b + .unreq VB + VB .req v1 + .unreq VC + VC .req v2 + + mov v3.16b,v5.16b + .unreq VD + VD .req v3 + + mov v4.16b,vT0.16b + .unreq VE + VE .req v4 + + .unreq VT + VT .req v5 + .endif +.endm + +.macro exec_steps idx:req,more:vararg + exec_step \idx + .ifnb \more + exec_steps \more + .endif +.endm + +.macro sha1_single + load_x4_word 0 + + mov vAA.16B, VA.16B + mov vBB.16B, VB.16B + mov vCC.16B, VC.16B + mov vDD.16B, VD.16B + mov vEE.16B, VE.16B + + adr sha1key_adr, KEY_0 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19 + + // 20 ~ 39 + adr sha1key_adr, KEY_1 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39 + + // 40 ~ 59 + adr sha1key_adr, KEY_2 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59 + + // 60 ~ 79 + adr sha1key_adr, KEY_3 + ld1 {VK.4s}, [sha1key_adr] + exec_steps 60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79 + + add VA.4s, vAA.4s, VA.4s + add VB.4s, vBB.4s, VB.4s + add VC.4s, vCC.4s, VC.4s + add VD.4s, vDD.4s, VD.4s + add VE.4s, vEE.4s, VE.4s +.endm + +.macro sha1_asimd_save_stack + stp d8,d9,[sp, -64]! + stp d10,d11,[sp, 16] + stp d12,d13,[sp, 32] + stp d14,d15,[sp, 48] +.endm + +.macro sha1_asimd_restore_stack + ldp d10,d11,[sp, 16] + ldp d12,d13,[sp, 32] + ldp d14,d15,[sp, 48] + ldp d8,d9,[sp],64 +.endm diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c new file mode 100644 index 000000000..e5d8ad86d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c @@ -0,0 +1,141 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include "mh_sha1_internal.h" + +int mh_sha1_init(struct mh_sha1_ctx *ctx) +{ + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint32_t i; + + if (ctx == NULL) + return MH_SHA1_CTX_ERROR_NULL; + + memset(ctx, 0, sizeof(*ctx)); + + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + for (i = 0; i < HASH_SEGS; i++) { + mh_sha1_segs_digests[0][i] = MH_SHA1_H0; + mh_sha1_segs_digests[1][i] = MH_SHA1_H1; + mh_sha1_segs_digests[2][i] = MH_SHA1_H2; + mh_sha1_segs_digests[3][i] = MH_SHA1_H3; + mh_sha1_segs_digests[4][i] = MH_SHA1_H4; + } + + return MH_SHA1_CTX_ERROR_NONE; +} + +#if (!defined(NOARCH)) && (defined(__i386__) || defined(__x86_64__) \ + || defined( _M_X64) || defined(_M_IX86)) +/***************mh_sha1_update***********/ +// mh_sha1_update_sse.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_sse +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_update_avx.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_update_avx2.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx2 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2 +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail + +// mh_sha1_finalize_sse.c and mh_sha1_tail_sse.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_sse +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_finalize_avx.c and mh_sha1_tail_avx.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_finalize_avx2.c and mh_sha1_tail_avx2.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx2 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2 +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************version info***********/ + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; +// Version info +struct slver mh_sha1_init_slver_00000271; +struct slver mh_sha1_init_slver = { 0x0271, 0x00, 0x00 }; + +// mh_sha1_update version info +struct slver mh_sha1_update_sse_slver_00000274; +struct slver mh_sha1_update_sse_slver = { 0x0274, 0x00, 0x00 }; + +struct slver mh_sha1_update_avx_slver_02000276; +struct slver mh_sha1_update_avx_slver = { 0x0276, 0x00, 0x02 }; + +struct slver mh_sha1_update_avx2_slver_04000278; +struct slver mh_sha1_update_avx2_slver = { 0x0278, 0x00, 0x04 }; + +// mh_sha1_finalize version info +struct slver mh_sha1_finalize_sse_slver_00000275; +struct slver mh_sha1_finalize_sse_slver = { 0x0275, 0x00, 0x00 }; + +struct slver mh_sha1_finalize_avx_slver_02000277; +struct slver mh_sha1_finalize_avx_slver = { 0x0277, 0x00, 0x02 }; + +struct slver mh_sha1_finalize_avx2_slver_04000279; +struct slver mh_sha1_finalize_avx2_slver = { 0x0279, 0x00, 0x04 }; + +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c new file mode 100644 index 000000000..1305d048f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c @@ -0,0 +1,70 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include "mh_sha1_internal.h" + +#ifdef HAVE_AS_KNOWS_AVX512 + +/***************mh_sha1_update***********/ +// mh_sha1_update_avx512.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx512 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512 +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail +// mh_sha1_finalize_avx512.c and mh_sha1_tail_avx512.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx512 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512 +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************version info***********/ +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// mh_sha1_update version info +struct slver mh_sha1_update_avx512_slver_0600027c; +struct slver mh_sha1_update_avx512_slver = { 0x027c, 0x00, 0x06 }; + +// mh_sha1_finalize version info +struct slver mh_sha1_finalize_avx512_slver_0600027d; +struct slver mh_sha1_finalize_avx512_slver = { 0x027d, 0x00, 0x06 }; + +#endif // HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c new file mode 100644 index 000000000..18cd8161b --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_base_aliases.c @@ -0,0 +1,40 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#include "mh_sha1_internal.h" +#include +int mh_sha1_update(struct mh_sha1_ctx *ctx, const void *buffer, uint32_t len) +{ + return mh_sha1_update_base(ctx, buffer, len); + +} + +int mh_sha1_finalize(struct mh_sha1_ctx *ctx, void *mh_sha1_digest) +{ + return mh_sha1_finalize_base(ctx, mh_sha1_digest); +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm new file mode 100644 index 000000000..f4b5e76a0 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm @@ -0,0 +1,506 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX +;; + +%include "reg_sizes.asm" + +[bits 64] +default rel +section .text + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-(%%imm)) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; non-destructive +; PROLD_nd reg, imm, tmp, src +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-(%%imm)) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 16)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep | + +align 32 + +;void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +mk_global mh_sha1_block_avx, function, internal +func(mh_sha1_block_avx) + endbranch + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by avx + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + VMOVPS A, [mh_digests_p + I*64 + 16*0] + VMOVPS B, [mh_digests_p + I*64 + 16*1] + VMOVPS C, [mh_digests_p + I*64 + 16*2] + VMOVPS D, [mh_digests_p + I*64 + 16*3] + + vmovdqa [rsp + I*64 + 16*0], A + vmovdqa [rsp + I*64 + 16*1], B + vmovdqa [rsp + I*64 + 16*2], C + vmovdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + VMOVPS T0,[mh_in_p + I*64+0*16] + VMOVPS T1,[mh_in_p + I*64+1*16] + VMOVPS T2,[mh_in_p + I*64+2*16] + VMOVPS T3,[mh_in_p + I*64+3*16] + + vpshufb T0, F + vmovdqa [mh_data_p +(I)*16 +0*256],T0 + vpshufb T1, F + vmovdqa [mh_data_p +(I)*16 +1*256],T1 + vpshufb T2, F + vmovdqa [mh_data_p +(I)*16 +2*256],T2 + vpshufb T3, F + vmovdqa [mh_data_p +(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A, AA + vpaddd B, BB + vpaddd C, CC + vpaddd D, DD + vpaddd E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add pref, 256 + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + vmovdqa A, [rsp + I*64 + 16*0] + vmovdqa B, [rsp + I*64 + 16*1] + vmovdqa C, [rsp + I*64 + 16*2] + vmovdqa D, [rsp + I*64 + 16*3] + + VMOVPS [mh_digests_p + I*64 + 16*0], A + VMOVPS [mh_digests_p + I*64 + 16*1], B + VMOVPS [mh_digests_p + I*64 + 16*2], C + VMOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm new file mode 100644 index 000000000..fed35d83e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm @@ -0,0 +1,508 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX-2 +;; + +%include "reg_sizes.asm" + +[bits 64] +default rel +section .text + +;; Magic functions defined in FIPS 180-1 +;; +;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | ((~ B) & D) ) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpand %%regF, %%regB,%%regC + vpandn %%regT, %%regB,%%regD + vpor %%regF, %%regT,%%regF +%endmacro + +;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + + + +;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-%%imm) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-%%imm) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 32)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A ymm0 +%define B ymm1 +%define C ymm2 +%define D ymm3 +%define E ymm4 + +%define F ymm5 +%define T0 ymm6 +%define T1 ymm7 +%define T2 ymm8 +%define T3 ymm9 +%define T4 ymm10 +%define T5 ymm11 +%define T6 ymm12 +%define T7 ymm13 +%define T8 ymm14 +%define T9 ymm15 + +%define AA ymm5 +%define BB ymm6 +%define CC ymm7 +%define DD ymm8 +%define EE ymm9 +%define TMP ymm10 +%define FUN ymm11 +%define K ymm12 +%define W14 ymm13 +%define W15 ymm14 +%define W16 ymm15 + + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep | + +align 32 + +;void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +mk_global mh_sha1_block_avx2, function, internal +func(mh_sha1_block_avx2) + endbranch + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + test loops, loops + jz .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 32 Bytes needed by avx2 + and rsp, ~0x1F + + %assign I 0 ; copy segs_digests into stack + %rep 2 + VMOVPS A, [mh_digests_p + I*32*5 + 32*0] + VMOVPS B, [mh_digests_p + I*32*5 + 32*1] + VMOVPS C, [mh_digests_p + I*32*5 + 32*2] + VMOVPS D, [mh_digests_p + I*32*5 + 32*3] + VMOVPS E, [mh_digests_p + I*32*5 + 32*4] + + vmovdqa [rsp + I*32*5 + 32*0], A + vmovdqa [rsp + I*32*5 + 32*1], B + vmovdqa [rsp + I*32*5 + 32*2], C + vmovdqa [rsp + I*32*5 + 32*3], D + vmovdqa [rsp + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vbroadcasti128 F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2 +%assign I 0 +%rep 16 + VMOVPS T0,[mh_in_p + I*64+0*32] + VMOVPS T1,[mh_in_p + I*64+1*32] + + vpshufb T0, T0, F + vmovdqa [mh_data_p +I*32+0*512],T0 + vpshufb T1, T1, F + vmovdqa [mh_data_p +I*32+1*512],T1 +%assign I (I+1) +%endrep + + xor mh_segs, mh_segs ;start from the first 8 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vpbroadcastq K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS +%assign I (I+1) +%endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32] + %rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 20...39 + vpbroadcastq K, [K20_39] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep +;; do rounds 40...59 + vpbroadcastq K, [K40_59] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*2] + PREFETCH_X [mh_in_p + pref+128*3] +;; do rounds 60...79 + vpbroadcastq K, [K60_79] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A,A, AA + vpaddd B,B, BB + vpaddd C,C, CC + vpaddd D,D, DD + vpaddd E,E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add pref, 512 + + add mh_data_p, 512 + add mh_segs, 32 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 2 + vmovdqa A, [rsp + I*32*5 + 32*0] + vmovdqa B, [rsp + I*32*5 + 32*1] + vmovdqa C, [rsp + I*32*5 + 32*2] + vmovdqa D, [rsp + I*32*5 + 32*3] + vmovdqa E, [rsp + I*32*5 + 32*4] + + VMOVPS [mh_digests_p + I*32*5 + 32*0], A + VMOVPS [mh_digests_p + I*32*5 + 32*1], B + VMOVPS [mh_digests_p + I*32*5 + 32*2], C + VMOVPS [mh_digests_p + I*32*5 + 32*3], D + VMOVPS [mh_digests_p + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .rodata align=32 + +align 32 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b +K00_19: dq 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm new file mode 100644 index 000000000..a72c21661 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm @@ -0,0 +1,406 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX-512 +;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 + +[bits 64] +default rel +section .text + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovdqu64 +;SIMD variables definition +%define A zmm0 +%define B zmm1 +%define C zmm2 +%define D zmm3 +%define E zmm4 +%define HH0 zmm5 +%define HH1 zmm6 +%define HH2 zmm7 +%define HH3 zmm8 +%define HH4 zmm9 +%define KT zmm10 +%define XTMP0 zmm11 +%define XTMP1 zmm12 +%define SHUF_MASK zmm13 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;using extra 16 ZMM registers to place the inverse input data +%define W0 zmm16 +%define W1 zmm17 +%define W2 zmm18 +%define W3 zmm19 +%define W4 zmm20 +%define W5 zmm21 +%define W6 zmm22 +%define W7 zmm23 +%define W8 zmm24 +%define W9 zmm25 +%define W10 zmm26 +%define W11 zmm27 +%define W12 zmm28 +%define W13 zmm29 +%define W14 zmm30 +%define W15 zmm31 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;macros definition +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro PROCESS_LOOP 2 +%define %%WT %1 +%define %%F_IMMED %2 + + ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt + ; E=D, D=C, C=ROTL_30(B), B=A, A=T + + ; Ft + ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) + ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D + ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) + + vmovdqa32 XTMP1, B ; Copy B + vpaddd E, E, %%WT ; E = E + Wt + vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) + vpaddd E, E, KT ; E = E + Wt + Kt + vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) + vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt + vprold B, B, 30 ; B = ROTL_30(B) + vpaddd E, E, XTMP0 ; E = T + + ROTATE_ARGS +%endmacro + +%macro MSG_SCHED_ROUND_16_79 4 +%define %%WT %1 +%define %%WTp2 %2 +%define %%WTp8 %3 +%define %%WTp13 %4 + ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16) + ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt) + vpternlogd %%WT, %%WTp2, %%WTp8, 0x96 + vpxord %%WT, %%WT, %%WTp13 + vprold %%WT, %%WT, 1 +%endmacro + +%define APPEND(a,b) a %+ b +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + ; remove unwind info macros + %define func(x) x: endbranch + %macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp + 0*16], xmm6 + movdqa [rsp + 1*16], xmm7 + movdqa [rsp + 2*16], xmm8 + movdqa [rsp + 3*16], xmm9 + movdqa [rsp + 4*16], xmm10 + movdqa [rsp + 5*16], xmm11 + movdqa [rsp + 6*16], xmm12 + movdqa [rsp + 7*16], xmm13 + movdqa [rsp + 8*16], xmm14 + movdqa [rsp + 9*16], xmm15 + mov [rsp + 10*16 + 0*8], r12 + mov [rsp + 10*16 + 1*8], r13 + mov [rsp + 10*16 + 2*8], r14 + mov [rsp + 10*16 + 3*8], r15 + mov [rsp + 10*16 + 4*8], rdi + mov [rsp + 10*16 + 5*8], rsi + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep | + +[bits 64] +section .text +align 32 + +;void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +global mh_sha1_block_avx512 +func(mh_sha1_block_avx512) + endbranch + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; align rsp to 64 Bytes needed by avx512 + and rsp, ~0x3f + + ; copy segs_digests into registers. + VMOVPS HH0, [mh_digests_p + 64*0] + VMOVPS HH1, [mh_digests_p + 64*1] + VMOVPS HH2, [mh_digests_p + 64*2] + VMOVPS HH3, [mh_digests_p + 64*3] + VMOVPS HH4, [mh_digests_p + 64*4] + ;a mask used to transform to big-endian data + vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK] + +.block_loop: + ;transform to big-endian data and store on aligned_frame + ;using extra 16 ZMM registers instead of stack +%assign I 0 +%rep 8 +%assign J (I+1) + VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64] + VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64] + + vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK + vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK +%assign I (I+2) +%endrep + + vmovdqa64 A, HH0 + vmovdqa64 B, HH1 + vmovdqa64 C, HH2 + vmovdqa64 D, HH3 + vmovdqa64 E, HH4 + + vmovdqa32 KT, [K00_19] +%assign I 0xCA +%assign J 0 +%assign K 2 +%assign L 8 +%assign M 13 +%assign N 0 +%rep 80 + PROCESS_LOOP APPEND(W,J), I + %if N < 64 + MSG_SCHED_ROUND_16_79 APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M) + %endif + %if N = 19 + vmovdqa32 KT, [K20_39] + %assign I 0x96 + %elif N = 39 + vmovdqa32 KT, [K40_59] + %assign I 0xE8 + %elif N = 59 + vmovdqa32 KT, [K60_79] + %assign I 0x96 + %endif + %if N % 10 = 9 + PREFETCH_X [mh_in_p + 1024+128*(N / 10)] + %endif +%assign J ((J+1)% 16) +%assign K ((K+1)% 16) +%assign L ((L+1)% 16) +%assign M ((M+1)% 16) +%assign N (N+1) +%endrep + + ; Add old digest + vpaddd HH0,A, HH0 + vpaddd HH1,B, HH1 + vpaddd HH2,C, HH2 + vpaddd HH3,D, HH3 + vpaddd HH4,E, HH4 + + add mh_in_p, 1024 + sub loops, 1 + jne .block_loop + + ; copy segs_digests to mh_digests_p + VMOVPS [mh_digests_p + 64*0], HH0 + VMOVPS [mh_digests_p + 64*1], HH1 + VMOVPS [mh_digests_p + 64*2], HH2 + VMOVPS [mh_digests_p + 64*3], HH3 + VMOVPS [mh_digests_p + 64*4], HH4 + + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + + +section .data align=64 + +align 64 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + +K20_39: dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + +K40_59: dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + +K60_79: dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_mh_sha1_block_avx512 +no_mh_sha1_block_avx512: +%endif +%endif ; HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c new file mode 100644 index 000000000..402c9741a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c @@ -0,0 +1,387 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "mh_sha1_internal.h" +#include + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// Base multi-hash SHA1 Functions +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +#define store_w(s, i, w, ww) (w[i][s] = to_be32(ww[i*HASH_SEGS+s])) // only used for step 0 ~ 15 +#define update_w(s, i, w) (w[i&15][s] = rol32(w[(i-3)&15][s]^w[(i-8)&15][s]^w[(i-14)&15][s]^w[(i-16)&15][s], 1)) // used for step > 15 +#define update_e_1(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F1(b[s],c[s],d[s]) + K_00_19 + w[i&15][s]) +#define update_e_2(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F2(b[s],c[s],d[s]) + K_20_39 + w[i&15][s]) +#define update_e_3(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F3(b[s],c[s],d[s]) + K_40_59 + w[i&15][s]) +#define update_e_4(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F4(b[s],c[s],d[s]) + K_60_79 + w[i&15][s]) +#define update_b(s, b) (b[s] = rol32(b[s],30)) + +#define STORE_W(i, w, ww) \ + store_w(0, i, w, ww); \ + store_w(1, i, w, ww); \ + store_w(2, i, w, ww); \ + store_w(3, i, w, ww); \ + store_w(4, i, w, ww); \ + store_w(5, i, w, ww); \ + store_w(6, i, w, ww); \ + store_w(7, i, w, ww); \ + store_w(8, i, w, ww); \ + store_w(9, i, w, ww); \ + store_w(10, i, w, ww); \ + store_w(11, i, w, ww); \ + store_w(12, i, w, ww); \ + store_w(13, i, w, ww); \ + store_w(14, i, w, ww); \ + store_w(15, i, w, ww) + +#define UPDATE_W(i, w) \ + update_w(0, i, w); \ + update_w(1, i, w); \ + update_w(2, i, w); \ + update_w(3, i, w); \ + update_w(4, i, w); \ + update_w(5, i, w); \ + update_w(6, i, w); \ + update_w(7, i, w); \ + update_w(8, i, w); \ + update_w(9, i, w); \ + update_w(10, i, w); \ + update_w(11, i, w); \ + update_w(12, i, w); \ + update_w(13, i, w); \ + update_w(14, i, w); \ + update_w(15, i, w) + +#define UPDATE_E1(a, b, c, d, e, i, w) \ + update_e_1(0, a, b, c, d, e, i, w); \ + update_e_1(1, a, b, c, d, e, i, w); \ + update_e_1(2, a, b, c, d, e, i, w); \ + update_e_1(3, a, b, c, d, e, i, w); \ + update_e_1(4, a, b, c, d, e, i, w); \ + update_e_1(5, a, b, c, d, e, i, w); \ + update_e_1(6, a, b, c, d, e, i, w); \ + update_e_1(7, a, b, c, d, e, i, w); \ + update_e_1(8, a, b, c, d, e, i, w); \ + update_e_1(9, a, b, c, d, e, i, w); \ + update_e_1(10, a, b, c, d, e, i, w); \ + update_e_1(11, a, b, c, d, e, i, w); \ + update_e_1(12, a, b, c, d, e, i, w); \ + update_e_1(13, a, b, c, d, e, i, w); \ + update_e_1(14, a, b, c, d, e, i, w); \ + update_e_1(15, a, b, c, d, e, i, w) + +#define UPDATE_E2(a, b, c, d, e, i, w) \ + update_e_2(0, a, b, c, d, e, i, w); \ + update_e_2(1, a, b, c, d, e, i, w); \ + update_e_2(2, a, b, c, d, e, i, w); \ + update_e_2(3, a, b, c, d, e, i, w); \ + update_e_2(4, a, b, c, d, e, i, w); \ + update_e_2(5, a, b, c, d, e, i, w); \ + update_e_2(6, a, b, c, d, e, i, w); \ + update_e_2(7, a, b, c, d, e, i, w); \ + update_e_2(8, a, b, c, d, e, i, w); \ + update_e_2(9, a, b, c, d, e, i, w); \ + update_e_2(10, a, b, c, d, e, i, w); \ + update_e_2(11, a, b, c, d, e, i, w); \ + update_e_2(12, a, b, c, d, e, i, w); \ + update_e_2(13, a, b, c, d, e, i, w); \ + update_e_2(14, a, b, c, d, e, i, w); \ + update_e_2(15, a, b, c, d, e, i, w) + +#define UPDATE_E3(a, b, c, d, e, i, w) \ + update_e_3(0, a, b, c, d, e, i, w); \ + update_e_3(1, a, b, c, d, e, i, w); \ + update_e_3(2, a, b, c, d, e, i, w); \ + update_e_3(3, a, b, c, d, e, i, w); \ + update_e_3(4, a, b, c, d, e, i, w); \ + update_e_3(5, a, b, c, d, e, i, w); \ + update_e_3(6, a, b, c, d, e, i, w); \ + update_e_3(7, a, b, c, d, e, i, w); \ + update_e_3(8, a, b, c, d, e, i, w); \ + update_e_3(9, a, b, c, d, e, i, w); \ + update_e_3(10, a, b, c, d, e, i, w); \ + update_e_3(11, a, b, c, d, e, i, w); \ + update_e_3(12, a, b, c, d, e, i, w); \ + update_e_3(13, a, b, c, d, e, i, w); \ + update_e_3(14, a, b, c, d, e, i, w); \ + update_e_3(15, a, b, c, d, e, i, w) + +#define UPDATE_E4(a, b, c, d, e, i, w) \ + update_e_4(0, a, b, c, d, e, i, w); \ + update_e_4(1, a, b, c, d, e, i, w); \ + update_e_4(2, a, b, c, d, e, i, w); \ + update_e_4(3, a, b, c, d, e, i, w); \ + update_e_4(4, a, b, c, d, e, i, w); \ + update_e_4(5, a, b, c, d, e, i, w); \ + update_e_4(6, a, b, c, d, e, i, w); \ + update_e_4(7, a, b, c, d, e, i, w); \ + update_e_4(8, a, b, c, d, e, i, w); \ + update_e_4(9, a, b, c, d, e, i, w); \ + update_e_4(10, a, b, c, d, e, i, w); \ + update_e_4(11, a, b, c, d, e, i, w); \ + update_e_4(12, a, b, c, d, e, i, w); \ + update_e_4(13, a, b, c, d, e, i, w); \ + update_e_4(14, a, b, c, d, e, i, w); \ + update_e_4(15, a, b, c, d, e, i, w) + +#define UPDATE_B(b) \ + update_b(0, b); \ + update_b(1, b); \ + update_b(2, b); \ + update_b(3, b); \ + update_b(4, b); \ + update_b(5, b); \ + update_b(6, b); \ + update_b(7, b); \ + update_b(8, b); \ + update_b(9, b); \ + update_b(10, b); \ + update_b(11, b); \ + update_b(12, b); \ + update_b(13, b); \ + update_b(14, b); \ + update_b(15, b) + +static inline void step00_15(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS], + uint32_t * ww) +{ + STORE_W(i, w, ww); + UPDATE_E1(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void step16_19(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E1(a, b, c, d, e, i, w); + UPDATE_B(b); + +} + +static inline void step20_39(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E2(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void step40_59(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E3(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void step60_79(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E4(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void init_abcde(uint32_t * xx, uint32_t n, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS]) +{ + xx[0] = digests[n][0]; + xx[1] = digests[n][1]; + xx[2] = digests[n][2]; + xx[3] = digests[n][3]; + xx[4] = digests[n][4]; + xx[5] = digests[n][5]; + xx[6] = digests[n][6]; + xx[7] = digests[n][7]; + xx[8] = digests[n][8]; + xx[9] = digests[n][9]; + xx[10] = digests[n][10]; + xx[11] = digests[n][11]; + xx[12] = digests[n][12]; + xx[13] = digests[n][13]; + xx[14] = digests[n][14]; + xx[15] = digests[n][15]; +} + +static inline void add_abcde(uint32_t * xx, uint32_t n, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS]) +{ + digests[n][0] += xx[0]; + digests[n][1] += xx[1]; + digests[n][2] += xx[2]; + digests[n][3] += xx[3]; + digests[n][4] += xx[4]; + digests[n][5] += xx[5]; + digests[n][6] += xx[6]; + digests[n][7] += xx[7]; + digests[n][8] += xx[8]; + digests[n][9] += xx[9]; + digests[n][10] += xx[10]; + digests[n][11] += xx[11]; + digests[n][12] += xx[12]; + digests[n][13] += xx[13]; + digests[n][14] += xx[14]; + digests[n][15] += xx[15]; +} + +/* + * API to perform 0-79 steps of the multi-hash algorithm for + * a single block of data. The caller is responsible for ensuring + * a full block of data input. + * + * Argument: + * input - the pointer to the data + * digest - the space to hold the digests for all segments. + * + * Return: + * N/A + */ +void mh_sha1_single(const uint8_t * input, uint32_t(*digests)[HASH_SEGS], + uint8_t * frame_buffer) +{ + uint32_t aa[HASH_SEGS], bb[HASH_SEGS], cc[HASH_SEGS], dd[HASH_SEGS], ee[HASH_SEGS]; + uint32_t *ww = (uint32_t *) input; + uint32_t(*w)[HASH_SEGS]; + + w = (uint32_t(*)[HASH_SEGS]) frame_buffer; + + init_abcde(aa, 0, digests); + init_abcde(bb, 1, digests); + init_abcde(cc, 2, digests); + init_abcde(dd, 3, digests); + init_abcde(ee, 4, digests); + + step00_15(0, aa, bb, cc, dd, ee, w, ww); + step00_15(1, ee, aa, bb, cc, dd, w, ww); + step00_15(2, dd, ee, aa, bb, cc, w, ww); + step00_15(3, cc, dd, ee, aa, bb, w, ww); + step00_15(4, bb, cc, dd, ee, aa, w, ww); + step00_15(5, aa, bb, cc, dd, ee, w, ww); + step00_15(6, ee, aa, bb, cc, dd, w, ww); + step00_15(7, dd, ee, aa, bb, cc, w, ww); + step00_15(8, cc, dd, ee, aa, bb, w, ww); + step00_15(9, bb, cc, dd, ee, aa, w, ww); + step00_15(10, aa, bb, cc, dd, ee, w, ww); + step00_15(11, ee, aa, bb, cc, dd, w, ww); + step00_15(12, dd, ee, aa, bb, cc, w, ww); + step00_15(13, cc, dd, ee, aa, bb, w, ww); + step00_15(14, bb, cc, dd, ee, aa, w, ww); + step00_15(15, aa, bb, cc, dd, ee, w, ww); + + step16_19(16, ee, aa, bb, cc, dd, w); + step16_19(17, dd, ee, aa, bb, cc, w); + step16_19(18, cc, dd, ee, aa, bb, w); + step16_19(19, bb, cc, dd, ee, aa, w); + + step20_39(20, aa, bb, cc, dd, ee, w); + step20_39(21, ee, aa, bb, cc, dd, w); + step20_39(22, dd, ee, aa, bb, cc, w); + step20_39(23, cc, dd, ee, aa, bb, w); + step20_39(24, bb, cc, dd, ee, aa, w); + step20_39(25, aa, bb, cc, dd, ee, w); + step20_39(26, ee, aa, bb, cc, dd, w); + step20_39(27, dd, ee, aa, bb, cc, w); + step20_39(28, cc, dd, ee, aa, bb, w); + step20_39(29, bb, cc, dd, ee, aa, w); + step20_39(30, aa, bb, cc, dd, ee, w); + step20_39(31, ee, aa, bb, cc, dd, w); + step20_39(32, dd, ee, aa, bb, cc, w); + step20_39(33, cc, dd, ee, aa, bb, w); + step20_39(34, bb, cc, dd, ee, aa, w); + step20_39(35, aa, bb, cc, dd, ee, w); + step20_39(36, ee, aa, bb, cc, dd, w); + step20_39(37, dd, ee, aa, bb, cc, w); + step20_39(38, cc, dd, ee, aa, bb, w); + step20_39(39, bb, cc, dd, ee, aa, w); + + step40_59(40, aa, bb, cc, dd, ee, w); + step40_59(41, ee, aa, bb, cc, dd, w); + step40_59(42, dd, ee, aa, bb, cc, w); + step40_59(43, cc, dd, ee, aa, bb, w); + step40_59(44, bb, cc, dd, ee, aa, w); + step40_59(45, aa, bb, cc, dd, ee, w); + step40_59(46, ee, aa, bb, cc, dd, w); + step40_59(47, dd, ee, aa, bb, cc, w); + step40_59(48, cc, dd, ee, aa, bb, w); + step40_59(49, bb, cc, dd, ee, aa, w); + step40_59(50, aa, bb, cc, dd, ee, w); + step40_59(51, ee, aa, bb, cc, dd, w); + step40_59(52, dd, ee, aa, bb, cc, w); + step40_59(53, cc, dd, ee, aa, bb, w); + step40_59(54, bb, cc, dd, ee, aa, w); + step40_59(55, aa, bb, cc, dd, ee, w); + step40_59(56, ee, aa, bb, cc, dd, w); + step40_59(57, dd, ee, aa, bb, cc, w); + step40_59(58, cc, dd, ee, aa, bb, w); + step40_59(59, bb, cc, dd, ee, aa, w); + + step60_79(60, aa, bb, cc, dd, ee, w); + step60_79(61, ee, aa, bb, cc, dd, w); + step60_79(62, dd, ee, aa, bb, cc, w); + step60_79(63, cc, dd, ee, aa, bb, w); + step60_79(64, bb, cc, dd, ee, aa, w); + step60_79(65, aa, bb, cc, dd, ee, w); + step60_79(66, ee, aa, bb, cc, dd, w); + step60_79(67, dd, ee, aa, bb, cc, w); + step60_79(68, cc, dd, ee, aa, bb, w); + step60_79(69, bb, cc, dd, ee, aa, w); + step60_79(70, aa, bb, cc, dd, ee, w); + step60_79(71, ee, aa, bb, cc, dd, w); + step60_79(72, dd, ee, aa, bb, cc, w); + step60_79(73, cc, dd, ee, aa, bb, w); + step60_79(74, bb, cc, dd, ee, aa, w); + step60_79(75, aa, bb, cc, dd, ee, w); + step60_79(76, ee, aa, bb, cc, dd, w); + step60_79(77, dd, ee, aa, bb, cc, w); + step60_79(78, cc, dd, ee, aa, bb, w); + step60_79(79, bb, cc, dd, ee, aa, w); + + add_abcde(aa, 0, digests); + add_abcde(bb, 1, digests); + add_abcde(cc, 2, digests); + add_abcde(dd, 3, digests); + add_abcde(ee, 4, digests); +} + +void mh_sha1_block_base(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks) +{ + uint32_t i; + + for (i = 0; i < num_blocks; i++) { + mh_sha1_single(input_data, digests, frame_buffer); + input_data += MH_SHA1_BLOCK_SIZE; + } + + return; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm new file mode 100644 index 000000000..3d75d1649 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm @@ -0,0 +1,498 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using SSE +;; + +%include "reg_sizes.asm" + +[bits 64] +default rel +section .text + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regC + pxor %%regF,%%regD + pand %%regF,%%regB + pxor %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regD + pxor %%regF,%%regC + pxor %%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regB + movdqa %%regT,%%regB + por %%regF,%%regC + pand %%regT,%%regC + pand %%regF,%%regD + por %%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + movdqa %%tmp, %%reg + pslld %%reg, %%imm + psrld %%tmp, (32-%%imm) + por %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + paddd %%regE,[%%data + (%%memW * 16)] + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define MOVPS movups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h4 | h4 | h4 | ...| h4 | | Ea| Eb | Ec |...| Ep | + +align 32 + +;void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +mk_global mh_sha1_block_sse, function, internal +func(mh_sha1_block_sse) + endbranch + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by sse + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + MOVPS A, [mh_digests_p + I*64 + 16*0] + MOVPS B, [mh_digests_p + I*64 + 16*1] + MOVPS C, [mh_digests_p + I*64 + 16*2] + MOVPS D, [mh_digests_p + I*64 + 16*3] + + movdqa [rsp + I*64 + 16*0], A + movdqa [rsp + I*64 + 16*1], B + movdqa [rsp + I*64 + 16*2], C + movdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + +.block_loop: + ;transform to big-endian data and store on aligned_frame + movdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + MOVPS T0,[mh_in_p + I*64+0*16] + MOVPS T1,[mh_in_p + I*64+1*16] + MOVPS T2,[mh_in_p + I*64+2*16] + MOVPS T3,[mh_in_p + I*64+3*16] + + pshufb T0, F + movdqa [mh_data_p +(I)*16 +0*256],T0 + pshufb T1, F + movdqa [mh_data_p +(I)*16 +1*256],T1 + pshufb T2, F + movdqa [mh_data_p +(I)*16 +2*256],T2 + pshufb T3, F + movdqa [mh_data_p +(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + movdqa A, [rsp + 0*64 + mh_segs] + movdqa B, [rsp + 1*64 + mh_segs] + movdqa C, [rsp + 2*64 + mh_segs] + movdqa D, [rsp + 3*64 + mh_segs] + movdqa E, [rsp + 4*64 + mh_segs] + + movdqa AA, A + movdqa BB, B + movdqa CC, C + movdqa DD, D + movdqa EE, E +;; +;; perform 0-79 steps +;; + movdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] +;; do rounds 20...39 + movdqa K, [K20_39] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + movdqa K, [K40_59] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 60...79 + movdqa K, [K60_79] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + paddd A, AA + paddd B, BB + paddd C, CC + paddd D, DD + paddd E, EE + + ; write out digests + movdqa [rsp + 0*64 + mh_segs], A + movdqa [rsp + 1*64 + mh_segs], B + movdqa [rsp + 2*64 + mh_segs], C + movdqa [rsp + 3*64 + mh_segs], D + movdqa [rsp + 4*64 + mh_segs], E + + add pref, 256 + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + movdqa A, [rsp + I*64 + 16*0] + movdqa B, [rsp + I*64 + 16*1] + movdqa C, [rsp + I*64 + 16*2] + movdqa D, [rsp + I*64 + 16*3] + + MOVPS [mh_digests_p + I*64 + 16*0], A + MOVPS [mh_digests_p + I*64 + 16*1], B + MOVPS [mh_digests_p + I*64 + 16*2], C + MOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c new file mode 100644 index 000000000..3058aaa87 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c @@ -0,0 +1,122 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/* + * mh_sha1_finalize_base.c contains the prototypes of mh_sha1_finalize_XXX + * and mh_sha1_tail_XXX. Default definitions are base type which generates + * mh_sha1_finalize_base and mh_sha1_tail_base. Other types are generated + * through different predefined macros by mh_sha1.c. + * mh_sha1_tail is used to calculate the last incomplete block of input + * data. mh_sha1_finalize is the mh_sha1_ctx wrapper of mh_sha1_tail. + */ +#ifndef MH_SHA1_FINALIZE_FUNCTION +#include +#include "mh_sha1_internal.h" + +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_base +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base +#define MH_SHA1_FINALIZE_SLVER +#endif + +void MH_SHA1_TAIL_FUNCTION(uint8_t * partial_buffer, uint32_t total_len, + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer, + uint32_t digests[SHA1_DIGEST_WORDS]) +{ + uint64_t partial_buffer_len, len_in_bit; + + partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE; + + // Padding the first block + partial_buffer[partial_buffer_len] = 0x80; + partial_buffer_len++; + memset(partial_buffer + partial_buffer_len, 0, + MH_SHA1_BLOCK_SIZE - partial_buffer_len); + + // Calculate the first block without total_length if padding needs 2 block + if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) { + MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + //Padding the second block + memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + //Padding the block + len_in_bit = to_be64((uint64_t) total_len * 8); + *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit; + MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + + //Calculate multi-hash SHA1 digests (segment digests as input message) + sha1_for_mh_sha1((uint8_t *) mh_sha1_segs_digests, digests, + 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + return; +} + +int MH_SHA1_FINALIZE_FUNCTION(struct mh_sha1_ctx *ctx, void *mh_sha1_digest) +{ + uint8_t *partial_block_buffer; + uint64_t total_len; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + + if (ctx == NULL) + return MH_SHA1_CTX_ERROR_NULL; + + total_len = ctx->total_length; + partial_block_buffer = ctx->partial_block_buffer; + + /* mh_sha1 tail */ + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + + MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests, + aligned_frame_buffer, ctx->mh_sha1_digest); + + /* Output the digests of mh_sha1 */ + if (mh_sha1_digest != NULL) { + ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0]; + ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1]; + ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2]; + ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3]; + ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4]; + } + + return MH_SHA1_CTX_ERROR_NONE; +} + +#ifdef MH_SHA1_FINALIZE_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// Version info +struct slver mh_sha1_finalize_base_slver_0000027b; +struct slver mh_sha1_finalize_base_slver = { 0x027b, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h new file mode 100644 index 000000000..81823048e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h @@ -0,0 +1,308 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_INTERNAL_H_ +#define _MH_SHA1_INTERNAL_H_ + +/** + * @file mh_sha1_internal.h + * @brief mh_sha1 internal function prototypes and macros + * + * Interface for mh_sha1 internal functions + * + */ +#include +#include "mh_sha1.h" +#include "endian_helper.h" + +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef _MSC_VER +# define inline __inline +#endif + + // 64byte pointer align +#define ALIGN_64(pointer) ( ((uint64_t)(pointer) + 0x3F)&(~0x3F) ) + + /******************************************************************* + *mh_sha1 constants and macros + ******************************************************************/ + /* mh_sha1 constants */ +#define MH_SHA1_H0 0x67452301UL +#define MH_SHA1_H1 0xefcdab89UL +#define MH_SHA1_H2 0x98badcfeUL +#define MH_SHA1_H3 0x10325476UL +#define MH_SHA1_H4 0xc3d2e1f0UL + +#define K_00_19 0x5a827999UL +#define K_20_39 0x6ed9eba1UL +#define K_40_59 0x8f1bbcdcUL +#define K_60_79 0xca62c1d6UL + + /* mh_sha1 macros */ +#define F1(b,c,d) (d ^ (b & (c ^ d))) +#define F2(b,c,d) (b ^ c ^ d) +#define F3(b,c,d) ((b & c) | (d & (b | c))) +#define F4(b,c,d) (b ^ c ^ d) + +#define rol32(x, r) (((x)<<(r)) ^ ((x)>>(32-(r)))) + + /******************************************************************* + * SHA1 API internal function prototypes + ******************************************************************/ + + /** + * @brief Performs complete SHA1 algorithm. + * + * @param input Pointer to buffer containing the input message. + * @param digest Pointer to digest to update. + * @param len Length of buffer. + * @returns None + */ + void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len); + + /******************************************************************* + * mh_sha1 API internal function prototypes + * Multiple versions of Update and Finalize functions are supplied which use + * multiple versions of block and tail process subfunctions. + ******************************************************************/ + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @returns none + * + */ + void mh_sha1_tail(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_base(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires SSE + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_sse(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires AVX + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_avx(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires AVX2 + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_avx2(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires AVX512 + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_avx512(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_base(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires SSE + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires AVX + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires AVX2 + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires AVX512 + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm new file mode 100644 index 000000000..590aa6c5f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm @@ -0,0 +1,77 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +%include "reg_sizes.asm" +%include "multibinary.asm" + +%ifidn __OUTPUT_FORMAT__, elf32 + [bits 32] +%else + default rel + [bits 64] + + extern mh_sha1_update_sse + extern mh_sha1_update_avx + extern mh_sha1_update_avx2 + extern mh_sha1_finalize_sse + extern mh_sha1_finalize_avx + extern mh_sha1_finalize_avx2 + + %ifdef HAVE_AS_KNOWS_AVX512 + extern mh_sha1_update_avx512 + extern mh_sha1_finalize_avx512 + %endif + +%endif + +extern mh_sha1_update_base +extern mh_sha1_finalize_base + +mbin_interface mh_sha1_update +mbin_interface mh_sha1_finalize + +%ifidn __OUTPUT_FORMAT__, elf64 + + %ifdef HAVE_AS_KNOWS_AVX512 + mbin_dispatch_init6 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2, mh_sha1_update_avx512 + mbin_dispatch_init6 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2, mh_sha1_finalize_avx512 + %else + mbin_dispatch_init5 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2 + mbin_dispatch_init5 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2 + %endif + +%else + mbin_dispatch_init2 mh_sha1_update, mh_sha1_update_base + mbin_dispatch_init2 mh_sha1_finalize, mh_sha1_finalize_base +%endif + +;;; func core, ver, snum +slversion mh_sha1_update, 00, 02, 0272 +slversion mh_sha1_finalize, 00, 02, 0273 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c new file mode 100644 index 000000000..4fd6c09a1 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c @@ -0,0 +1,180 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "mh_sha1.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Loop many times over same +# define TEST_LEN 16*1024 +# define TEST_LOOPS 20000 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define TEST_LEN 32*1024*1024 +# define TEST_LOOPS 100 +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif +#define TEST_MEM TEST_LEN + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_CTX_ERROR_NONE){ \ + printf("The mh_sha1 function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + + return mh_sha1_fail; +} + +int main(int argc, char *argv[]) +{ + int i, fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint8_t *buff = NULL; + struct mh_sha1_ctx *update_ctx_test = NULL, *update_ctx_base = NULL; + struct perf start, stop; + + printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n"); + + buff = malloc(TEST_LEN); + update_ctx_test = malloc(sizeof(*update_ctx_test)); + update_ctx_base = malloc(sizeof(*update_ctx_base)); + + if (buff == NULL || update_ctx_base == NULL || update_ctx_test == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + // mh_sha1 base version + mh_sha1_init(update_ctx_base); + mh_sha1_update_base(update_ctx_base, buff, TEST_LEN); + mh_sha1_finalize_base(update_ctx_base, hash_base); + + perf_start(&start); + for (i = 0; i < TEST_LOOPS / 10; i++) { + mh_sha1_init(update_ctx_base); + mh_sha1_update_base(update_ctx_base, buff, TEST_LEN); + mh_sha1_finalize_base(update_ctx_base, hash_base); + } + perf_stop(&stop); + printf("mh_sha1_update_base" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + //Update feature test + CHECK_RETURN(mh_sha1_init(update_ctx_test)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test)); + + perf_start(&start); + for (i = 0; i < TEST_LOOPS; i++) { + CHECK_RETURN(mh_sha1_init(update_ctx_test)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test)); + } + perf_stop(&stop); + printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + // Check results + fail = compare_digests(hash_base, hash_test); + + if (fail) { + printf("Fail size=%d\n", TEST_LEN); + return -1; + } + + if (fail) + printf("Test failed function test%d\n", fail); + else + printf("Pass func check\n"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c new file mode 100644 index 000000000..71caba50e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c @@ -0,0 +1,430 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include "mh_sha1_internal.h" + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + // Macros and sub-functions which already exist in source code file + // (sha1_for_mh_sha1.c) is part of ISA-L library as internal functions. + // The reason why writing them twice is the linking issue caused by + // mh_sha1_ref(). mh_sha1_ref() needs these macros and sub-functions + // without linking ISA-L library. So mh_sha1_ref() includes them in + // order to contain essential sub-functions in its own object file. +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +#if (__GNUC__ >= 11) +# define OPT_FIX __attribute__ ((noipa)) +#else +# define OPT_FIX +#endif + +#define W(x) w[(x) & 15] + +#define step00_19(i,a,b,c,d,e) \ + if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + else W(i) = to_be32(ww[i]); \ + e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \ + b = rol32(b,30) + +#define step20_39(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \ + b = rol32(b,30) + +#define step40_59(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \ + b = rol32(b,30) + +#define step60_79(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \ + b = rol32(b,30) + +static void OPT_FIX sha1_single_for_mh_sha1_ref(const uint8_t * data, uint32_t digest[]) +{ + uint32_t a, b, c, d, e; + uint32_t w[16] = { 0 }; + uint32_t *ww = (uint32_t *) data; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + + step00_19(0, a, b, c, d, e); + step00_19(1, e, a, b, c, d); + step00_19(2, d, e, a, b, c); + step00_19(3, c, d, e, a, b); + step00_19(4, b, c, d, e, a); + step00_19(5, a, b, c, d, e); + step00_19(6, e, a, b, c, d); + step00_19(7, d, e, a, b, c); + step00_19(8, c, d, e, a, b); + step00_19(9, b, c, d, e, a); + step00_19(10, a, b, c, d, e); + step00_19(11, e, a, b, c, d); + step00_19(12, d, e, a, b, c); + step00_19(13, c, d, e, a, b); + step00_19(14, b, c, d, e, a); + step00_19(15, a, b, c, d, e); + step00_19(16, e, a, b, c, d); + step00_19(17, d, e, a, b, c); + step00_19(18, c, d, e, a, b); + step00_19(19, b, c, d, e, a); + + step20_39(20, a, b, c, d, e); + step20_39(21, e, a, b, c, d); + step20_39(22, d, e, a, b, c); + step20_39(23, c, d, e, a, b); + step20_39(24, b, c, d, e, a); + step20_39(25, a, b, c, d, e); + step20_39(26, e, a, b, c, d); + step20_39(27, d, e, a, b, c); + step20_39(28, c, d, e, a, b); + step20_39(29, b, c, d, e, a); + step20_39(30, a, b, c, d, e); + step20_39(31, e, a, b, c, d); + step20_39(32, d, e, a, b, c); + step20_39(33, c, d, e, a, b); + step20_39(34, b, c, d, e, a); + step20_39(35, a, b, c, d, e); + step20_39(36, e, a, b, c, d); + step20_39(37, d, e, a, b, c); + step20_39(38, c, d, e, a, b); + step20_39(39, b, c, d, e, a); + + step40_59(40, a, b, c, d, e); + step40_59(41, e, a, b, c, d); + step40_59(42, d, e, a, b, c); + step40_59(43, c, d, e, a, b); + step40_59(44, b, c, d, e, a); + step40_59(45, a, b, c, d, e); + step40_59(46, e, a, b, c, d); + step40_59(47, d, e, a, b, c); + step40_59(48, c, d, e, a, b); + step40_59(49, b, c, d, e, a); + step40_59(50, a, b, c, d, e); + step40_59(51, e, a, b, c, d); + step40_59(52, d, e, a, b, c); + step40_59(53, c, d, e, a, b); + step40_59(54, b, c, d, e, a); + step40_59(55, a, b, c, d, e); + step40_59(56, e, a, b, c, d); + step40_59(57, d, e, a, b, c); + step40_59(58, c, d, e, a, b); + step40_59(59, b, c, d, e, a); + + step60_79(60, a, b, c, d, e); + step60_79(61, e, a, b, c, d); + step60_79(62, d, e, a, b, c); + step60_79(63, c, d, e, a, b); + step60_79(64, b, c, d, e, a); + step60_79(65, a, b, c, d, e); + step60_79(66, e, a, b, c, d); + step60_79(67, d, e, a, b, c); + step60_79(68, c, d, e, a, b); + step60_79(69, b, c, d, e, a); + step60_79(70, a, b, c, d, e); + step60_79(71, e, a, b, c, d); + step60_79(72, d, e, a, b, c); + step60_79(73, c, d, e, a, b); + step60_79(74, b, c, d, e, a); + step60_79(75, a, b, c, d, e); + step60_79(76, e, a, b, c, d); + step60_79(77, d, e, a, b, c); + step60_79(78, c, d, e, a, b); + step60_79(79, b, c, d, e, a); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +void sha1_for_mh_sha1_ref(const uint8_t * input_data, uint32_t * digest, const uint32_t len) +{ + uint32_t i, j; + uint8_t buf[2 * SHA1_BLOCK_SIZE]; + + digest[0] = MH_SHA1_H0; + digest[1] = MH_SHA1_H1; + digest[2] = MH_SHA1_H2; + digest[3] = MH_SHA1_H3; + digest[4] = MH_SHA1_H4; + + i = len; + while (i >= SHA1_BLOCK_SIZE) { + sha1_single_for_mh_sha1_ref(input_data, digest); + input_data += SHA1_BLOCK_SIZE; + i -= SHA1_BLOCK_SIZE; + } + + memcpy(buf, input_data, i); + buf[i++] = 0x80; + for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++) + buf[j] = 0; + + if (i > SHA1_BLOCK_SIZE - 8) + i = 2 * SHA1_BLOCK_SIZE; + else + i = SHA1_BLOCK_SIZE; + + *(uint64_t *) (buf + i - 8) = to_be64((uint64_t) len * 8); + + sha1_single_for_mh_sha1_ref(buf, digest); + if (i == (2 * SHA1_BLOCK_SIZE)) + sha1_single_for_mh_sha1_ref(buf + SHA1_BLOCK_SIZE, digest); +} + +/* + * buffer to rearrange one segment data from one block. + * + * Layout of new_data: + * segment + * ------------------------- + * w0 | w1 | ... | w15 + * + */ +static inline void transform_input_single(uint32_t * new_data, uint32_t * input, + uint32_t segment) +{ + new_data[16 * segment + 0] = input[16 * 0 + segment]; + new_data[16 * segment + 1] = input[16 * 1 + segment]; + new_data[16 * segment + 2] = input[16 * 2 + segment]; + new_data[16 * segment + 3] = input[16 * 3 + segment]; + new_data[16 * segment + 4] = input[16 * 4 + segment]; + new_data[16 * segment + 5] = input[16 * 5 + segment]; + new_data[16 * segment + 6] = input[16 * 6 + segment]; + new_data[16 * segment + 7] = input[16 * 7 + segment]; + new_data[16 * segment + 8] = input[16 * 8 + segment]; + new_data[16 * segment + 9] = input[16 * 9 + segment]; + new_data[16 * segment + 10] = input[16 * 10 + segment]; + new_data[16 * segment + 11] = input[16 * 11 + segment]; + new_data[16 * segment + 12] = input[16 * 12 + segment]; + new_data[16 * segment + 13] = input[16 * 13 + segment]; + new_data[16 * segment + 14] = input[16 * 14 + segment]; + new_data[16 * segment + 15] = input[16 * 15 + segment]; +} + +// Adapt parameters to sha1_single_for_mh_sha1_ref +#define sha1_update_one_seg(data, digest) \ + sha1_single_for_mh_sha1_ref((const uint8_t *)(data), (uint32_t *)(digest)) + +/* + * buffer to Rearrange all segments data from one block. + * + * Layout of new_data: + * segment + * ------------------------- + * seg0: | w0 | w1 | ... | w15 + * seg1: | w0 | w1 | ... | w15 + * seg2: | w0 | w1 | ... | w15 + * .... + * seg15: | w0 | w1 | ... | w15 + * + */ +static inline void transform_input(uint32_t * new_data, uint32_t * input, uint32_t block) +{ + uint32_t *current_input = input + block * MH_SHA1_BLOCK_SIZE / 4; + + transform_input_single(new_data, current_input, 0); + transform_input_single(new_data, current_input, 1); + transform_input_single(new_data, current_input, 2); + transform_input_single(new_data, current_input, 3); + transform_input_single(new_data, current_input, 4); + transform_input_single(new_data, current_input, 5); + transform_input_single(new_data, current_input, 6); + transform_input_single(new_data, current_input, 7); + transform_input_single(new_data, current_input, 8); + transform_input_single(new_data, current_input, 9); + transform_input_single(new_data, current_input, 10); + transform_input_single(new_data, current_input, 11); + transform_input_single(new_data, current_input, 12); + transform_input_single(new_data, current_input, 13); + transform_input_single(new_data, current_input, 14); + transform_input_single(new_data, current_input, 15); + +} + +/* + * buffer to Calculate all segments' digests from one block. + * + * Layout of seg_digest: + * segment + * ------------------------- + * seg0: | H0 | H1 | ... | H4 + * seg1: | H0 | H1 | ... | H4 + * seg2: | H0 | H1 | ... | H4 + * .... + * seg15: | H0 | H1 | ... | H4 + * + */ +static inline void sha1_update_all_segs(uint32_t * new_data, + uint32_t(*mh_sha1_seg_digests)[SHA1_DIGEST_WORDS]) +{ + sha1_update_one_seg(&(new_data)[16 * 0], mh_sha1_seg_digests[0]); + sha1_update_one_seg(&(new_data)[16 * 1], mh_sha1_seg_digests[1]); + sha1_update_one_seg(&(new_data)[16 * 2], mh_sha1_seg_digests[2]); + sha1_update_one_seg(&(new_data)[16 * 3], mh_sha1_seg_digests[3]); + sha1_update_one_seg(&(new_data)[16 * 4], mh_sha1_seg_digests[4]); + sha1_update_one_seg(&(new_data)[16 * 5], mh_sha1_seg_digests[5]); + sha1_update_one_seg(&(new_data)[16 * 6], mh_sha1_seg_digests[6]); + sha1_update_one_seg(&(new_data)[16 * 7], mh_sha1_seg_digests[7]); + sha1_update_one_seg(&(new_data)[16 * 8], mh_sha1_seg_digests[8]); + sha1_update_one_seg(&(new_data)[16 * 9], mh_sha1_seg_digests[9]); + sha1_update_one_seg(&(new_data)[16 * 10], mh_sha1_seg_digests[10]); + sha1_update_one_seg(&(new_data)[16 * 11], mh_sha1_seg_digests[11]); + sha1_update_one_seg(&(new_data)[16 * 12], mh_sha1_seg_digests[12]); + sha1_update_one_seg(&(new_data)[16 * 13], mh_sha1_seg_digests[13]); + sha1_update_one_seg(&(new_data)[16 * 14], mh_sha1_seg_digests[14]); + sha1_update_one_seg(&(new_data)[16 * 15], mh_sha1_seg_digests[15]); +} + +void mh_sha1_block_ref(const uint8_t * input_data, uint32_t(*digests)[HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks) +{ + uint32_t i, j; + uint32_t *temp_buffer = (uint32_t *) frame_buffer; + uint32_t(*trans_digests)[SHA1_DIGEST_WORDS]; + + trans_digests = (uint32_t(*)[SHA1_DIGEST_WORDS]) digests; + + // Re-structure seg_digests from 5*16 to 16*5 + for (j = 0; j < HASH_SEGS; j++) { + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + temp_buffer[j * SHA1_DIGEST_WORDS + i] = digests[i][j]; + } + } + memcpy(trans_digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + // Calculate digests for all segments, leveraging sha1 API + for (i = 0; i < num_blocks; i++) { + transform_input(temp_buffer, (uint32_t *) input_data, i); + sha1_update_all_segs(temp_buffer, trans_digests); + } + + // Re-structure seg_digests from 16*5 to 5*16 + for (j = 0; j < HASH_SEGS; j++) { + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + temp_buffer[i * HASH_SEGS + j] = trans_digests[j][i]; + } + } + memcpy(digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + return; +} + +void mh_sha1_tail_ref(uint8_t * partial_buffer, uint32_t total_len, + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer, + uint32_t digests[SHA1_DIGEST_WORDS]) +{ + uint64_t partial_buffer_len, len_in_bit; + + partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE; + + // Padding the first block + partial_buffer[partial_buffer_len] = 0x80; + partial_buffer_len++; + memset(partial_buffer + partial_buffer_len, 0, + MH_SHA1_BLOCK_SIZE - partial_buffer_len); + + // Calculate the first block without total_length if padding needs 2 block + if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) { + mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + //Padding the second block + memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + //Padding the block + len_in_bit = to_be64((uint64_t) total_len * 8); + *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit; + mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + + //Calculate multi-hash SHA1 digests (segment digests as input message) + sha1_for_mh_sha1_ref((uint8_t *) mh_sha1_segs_digests, digests, + 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + return; +} + +void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest) +{ + uint64_t total_len; + uint64_t num_blocks; + uint32_t mh_sha1_segs_digests[SHA1_DIGEST_WORDS][HASH_SEGS]; + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE]; + uint8_t partial_block_buffer[MH_SHA1_BLOCK_SIZE * 2]; + uint32_t mh_sha1_hash_dword[SHA1_DIGEST_WORDS]; + uint32_t i; + const uint8_t *input_data = (const uint8_t *)buffer; + + /* Initialize digests of all segments */ + for (i = 0; i < HASH_SEGS; i++) { + mh_sha1_segs_digests[0][i] = MH_SHA1_H0; + mh_sha1_segs_digests[1][i] = MH_SHA1_H1; + mh_sha1_segs_digests[2][i] = MH_SHA1_H2; + mh_sha1_segs_digests[3][i] = MH_SHA1_H3; + mh_sha1_segs_digests[4][i] = MH_SHA1_H4; + } + + total_len = len; + + // Calculate blocks + num_blocks = len / MH_SHA1_BLOCK_SIZE; + if (num_blocks > 0) { + //do num_blocks process + mh_sha1_block_ref(input_data, mh_sha1_segs_digests, frame_buffer, num_blocks); + len -= num_blocks * MH_SHA1_BLOCK_SIZE; + input_data += num_blocks * MH_SHA1_BLOCK_SIZE; + } + // Store the partial block + if (len != 0) { + memcpy(partial_block_buffer, input_data, len); + } + + /* Finalize */ + mh_sha1_tail_ref(partial_block_buffer, total_len, mh_sha1_segs_digests, + frame_buffer, mh_sha1_hash_dword); + + // Output the digests of mh_sha1 + if (mh_sha1_digest != NULL) { + mh_sha1_digest[0] = mh_sha1_hash_dword[0]; + mh_sha1_digest[1] = mh_sha1_hash_dword[1]; + mh_sha1_digest[2] = mh_sha1_hash_dword[2]; + mh_sha1_digest[3] = mh_sha1_hash_dword[3]; + mh_sha1_digest[4] = mh_sha1_hash_dword[4]; + } + + return; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c new file mode 100644 index 000000000..792c4452b --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c @@ -0,0 +1,217 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "mh_sha1.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_CTX_ERROR_NONE){ \ + printf("The mh_sha1 function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); +#define MH_SHA1_REF mh_sha1_ref + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_ref[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_ref[i]) + mh_sha1_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("ref: "); + dump((char *)hash_ref, 20); + printf("test: "); + dump((char *)hash_test, 20); + } + + return mh_sha1_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS]; + uint8_t *buff = NULL; + int size, offset; + struct mh_sha1_ctx *update_ctx = NULL; + + printf(xstr(TEST_UPDATE_FUNCTION) "_test:\n"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + MH_SHA1_REF(buff, TEST_LEN, hash_ref); + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages + for (size = TEST_LEN; size >= 0; size--) { + + // Fill with rand data + rand_buffer(buff, size); + + MH_SHA1_REF(buff, size, hash_ref); + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + if ((size & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various buffer offsets and sizes + printf("offset tests"); + for (size = TEST_LEN - 256; size > 256; size -= 11) { + for (offset = 0; offset < 256; offset++) { + MH_SHA1_REF(buff + offset, size, hash_ref); + + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + } + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + // Run efence tests + printf("efence tests"); + for (size = TEST_SIZE; size > 0; size--) { + offset = TEST_LEN - size; + + MH_SHA1_REF(buff + offset, size, hash_ref); + + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf(xstr(TEST_UPDATE_FUNCTION) "_test:"); + printf(" %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c new file mode 100644 index 000000000..4af220299 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c @@ -0,0 +1,110 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/* + * mh_sha1_update_base.c contains the prototype of mh_sha1_update_XXX. + * Default definitions are base type which generates mh_sha1_update_base. + * Other types are generated through different predefined macros by mh_sha1.c. + */ +#ifndef MH_SHA1_UPDATE_FUNCTION +#include "mh_sha1_internal.h" +#include + +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_base +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base +#define MH_SHA1_UPDATE_SLVER +#endif + +int MH_SHA1_UPDATE_FUNCTION(struct mh_sha1_ctx *ctx, const void *buffer, uint32_t len) +{ + + uint8_t *partial_block_buffer; + uint64_t partial_block_len; + uint64_t num_blocks; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + const uint8_t *input_data = (const uint8_t *)buffer; + + if (ctx == NULL) + return MH_SHA1_CTX_ERROR_NULL; + + if (len == 0) + return MH_SHA1_CTX_ERROR_NONE; + + partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE; + partial_block_buffer = ctx->partial_block_buffer; + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + + ctx->total_length += len; + // No enough input data for mh_sha1 calculation + if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) { + memcpy(partial_block_buffer + partial_block_len, input_data, len); + return MH_SHA1_CTX_ERROR_NONE; + } + // mh_sha1 calculation for the previous partial block + if (partial_block_len != 0) { + memcpy(partial_block_buffer + partial_block_len, input_data, + MH_SHA1_BLOCK_SIZE - partial_block_len); + //do one_block process + MH_SHA1_BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests, + aligned_frame_buffer, 1); + input_data += MH_SHA1_BLOCK_SIZE - partial_block_len; + len -= MH_SHA1_BLOCK_SIZE - partial_block_len; + memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + // Calculate mh_sha1 for the current blocks + num_blocks = len / MH_SHA1_BLOCK_SIZE; + if (num_blocks > 0) { + //do num_blocks process + MH_SHA1_BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer, + num_blocks); + len -= num_blocks * MH_SHA1_BLOCK_SIZE; + input_data += num_blocks * MH_SHA1_BLOCK_SIZE; + } + // Store the partial block + if (len != 0) { + memcpy(partial_block_buffer, input_data, len); + } + + return MH_SHA1_CTX_ERROR_NONE; + +} + +#ifdef MH_SHA1_UPDATE_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + + // Version info +struct slver mh_sha1_update_base_slver_0000027a; +struct slver mh_sha1_update_base_slver = { 0x027a, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c new file mode 100644 index 000000000..942dfd09f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c @@ -0,0 +1,240 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include +#include +#include "mh_sha1.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_CTX_ERROR_NONE){ \ + printf("The mh_sha1 function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_ref[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_ref[i]) + mh_sha1_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("ref: "); + dump((char *)hash_ref, 20); + printf("test: "); + dump((char *)hash_test, 20); + } + + return mh_sha1_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0, i; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS]; + uint8_t *buff = NULL; + int update_count; + int size1, size2, offset, addr_offset; + struct mh_sha1_ctx *update_ctx = NULL; + uint8_t *mem_addr = NULL; + + printf(xstr(TEST_UPDATE_FUNCTION) "_test:"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages by update twice. + printf("\n various size messages by update twice tests"); + for (size1 = TEST_LEN; size1 >= 0; size1--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + // subsequent update + size2 = TEST_LEN - size1; // size2 is different with the former + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various update count + printf("\n various update count tests"); + for (update_count = 1; update_count <= TEST_LEN; update_count++) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + // subsequent update + size1 = TEST_LEN / update_count; + size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former + + CHECK_RETURN(mh_sha1_init(update_ctx)); + for (i = 1, offset = 0; i < update_count; i++) { + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1)); + offset += size1; + } + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // test various start address of ctx. + printf("\n various start address of ctx test"); + free(update_ctx); + mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10); + for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + // a unaligned offset + update_ctx = (struct mh_sha1_ctx *)(mem_addr + addr_offset); + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail addr_offset=%d\n", addr_offset); + return -1; + } + + if ((addr_offset & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; + +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c new file mode 100644 index 000000000..224977e6c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c @@ -0,0 +1,204 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "mh_sha1_internal.h" +#include + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// Reference SHA1 Functions for mh_sha1 +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +#if (__GNUC__ >= 11) +# define OPT_FIX __attribute__ ((noipa)) +#else +# define OPT_FIX +#endif + +#define W(x) w[(x) & 15] + +#define step00_19(i,a,b,c,d,e) \ + if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + else W(i) = to_be32(ww[i]); \ + e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \ + b = rol32(b,30) + +#define step20_39(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \ + b = rol32(b,30) + +#define step40_59(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \ + b = rol32(b,30) + +#define step60_79(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \ + b = rol32(b,30) + +static void OPT_FIX sha1_single_for_mh_sha1(const uint8_t * data, uint32_t digest[]) +{ + uint32_t a, b, c, d, e; + uint32_t w[16] = { 0 }; + uint32_t *ww = (uint32_t *) data; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + + step00_19(0, a, b, c, d, e); + step00_19(1, e, a, b, c, d); + step00_19(2, d, e, a, b, c); + step00_19(3, c, d, e, a, b); + step00_19(4, b, c, d, e, a); + step00_19(5, a, b, c, d, e); + step00_19(6, e, a, b, c, d); + step00_19(7, d, e, a, b, c); + step00_19(8, c, d, e, a, b); + step00_19(9, b, c, d, e, a); + step00_19(10, a, b, c, d, e); + step00_19(11, e, a, b, c, d); + step00_19(12, d, e, a, b, c); + step00_19(13, c, d, e, a, b); + step00_19(14, b, c, d, e, a); + step00_19(15, a, b, c, d, e); + step00_19(16, e, a, b, c, d); + step00_19(17, d, e, a, b, c); + step00_19(18, c, d, e, a, b); + step00_19(19, b, c, d, e, a); + + step20_39(20, a, b, c, d, e); + step20_39(21, e, a, b, c, d); + step20_39(22, d, e, a, b, c); + step20_39(23, c, d, e, a, b); + step20_39(24, b, c, d, e, a); + step20_39(25, a, b, c, d, e); + step20_39(26, e, a, b, c, d); + step20_39(27, d, e, a, b, c); + step20_39(28, c, d, e, a, b); + step20_39(29, b, c, d, e, a); + step20_39(30, a, b, c, d, e); + step20_39(31, e, a, b, c, d); + step20_39(32, d, e, a, b, c); + step20_39(33, c, d, e, a, b); + step20_39(34, b, c, d, e, a); + step20_39(35, a, b, c, d, e); + step20_39(36, e, a, b, c, d); + step20_39(37, d, e, a, b, c); + step20_39(38, c, d, e, a, b); + step20_39(39, b, c, d, e, a); + + step40_59(40, a, b, c, d, e); + step40_59(41, e, a, b, c, d); + step40_59(42, d, e, a, b, c); + step40_59(43, c, d, e, a, b); + step40_59(44, b, c, d, e, a); + step40_59(45, a, b, c, d, e); + step40_59(46, e, a, b, c, d); + step40_59(47, d, e, a, b, c); + step40_59(48, c, d, e, a, b); + step40_59(49, b, c, d, e, a); + step40_59(50, a, b, c, d, e); + step40_59(51, e, a, b, c, d); + step40_59(52, d, e, a, b, c); + step40_59(53, c, d, e, a, b); + step40_59(54, b, c, d, e, a); + step40_59(55, a, b, c, d, e); + step40_59(56, e, a, b, c, d); + step40_59(57, d, e, a, b, c); + step40_59(58, c, d, e, a, b); + step40_59(59, b, c, d, e, a); + + step60_79(60, a, b, c, d, e); + step60_79(61, e, a, b, c, d); + step60_79(62, d, e, a, b, c); + step60_79(63, c, d, e, a, b); + step60_79(64, b, c, d, e, a); + step60_79(65, a, b, c, d, e); + step60_79(66, e, a, b, c, d); + step60_79(67, d, e, a, b, c); + step60_79(68, c, d, e, a, b); + step60_79(69, b, c, d, e, a); + step60_79(70, a, b, c, d, e); + step60_79(71, e, a, b, c, d); + step60_79(72, d, e, a, b, c); + step60_79(73, c, d, e, a, b); + step60_79(74, b, c, d, e, a); + step60_79(75, a, b, c, d, e); + step60_79(76, e, a, b, c, d); + step60_79(77, d, e, a, b, c); + step60_79(78, c, d, e, a, b); + step60_79(79, b, c, d, e, a); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len) +{ + uint32_t i, j; + uint8_t buf[2 * SHA1_BLOCK_SIZE]; + + digest[0] = MH_SHA1_H0; + digest[1] = MH_SHA1_H1; + digest[2] = MH_SHA1_H2; + digest[3] = MH_SHA1_H3; + digest[4] = MH_SHA1_H4; + + i = len; + while (i >= SHA1_BLOCK_SIZE) { + sha1_single_for_mh_sha1(input_data, digest); + input_data += SHA1_BLOCK_SIZE; + i -= SHA1_BLOCK_SIZE; + } + + memcpy(buf, input_data, i); + buf[i++] = 0x80; + for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++) + buf[j] = 0; + + if (i > SHA1_BLOCK_SIZE - 8) + i = 2 * SHA1_BLOCK_SIZE; + else + i = SHA1_BLOCK_SIZE; + + *(uint64_t *) (buf + i - 8) = to_be64((uint64_t) len * 8); + + sha1_single_for_mh_sha1(buf, digest); + if (i == (2 * SHA1_BLOCK_SIZE)) + sha1_single_for_mh_sha1(buf + SHA1_BLOCK_SIZE, digest); +} -- cgit v1.2.3