diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/crypto/isa-l/isa-l_crypto/mh_sha1 | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/mh_sha1')
17 files changed, 4495 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am new file mode 100644 index 00000000..1ec5f35c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/Makefile.am @@ -0,0 +1,67 @@ +######################################################################## +# Copyright(c) 2011-2016 Intel Corporation All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +######################################################################## + +lsrc_sha1 = mh_sha1/sha1_for_mh_sha1.c + +lsrc_mh_sha1 = mh_sha1/mh_sha1.c \ + mh_sha1/mh_sha1_block_sse.asm \ + mh_sha1/mh_sha1_block_avx.asm \ + mh_sha1/mh_sha1_block_avx2.asm \ + mh_sha1/mh_sha1_multibinary.asm \ + mh_sha1/mh_sha1_finalize_base.c \ + mh_sha1/mh_sha1_update_base.c \ + mh_sha1/mh_sha1_block_base.c + +lsrc_mh_sha1 += mh_sha1/mh_sha1_block_avx512.asm \ + mh_sha1/mh_sha1_avx512.c + +lsrc += $(lsrc_sha1) \ + $(lsrc_mh_sha1) + +other_src += mh_sha1/mh_sha1_ref.c \ + include/reg_sizes.asm \ + include/multibinary.asm \ + include/test.h \ + mh_sha1/mh_sha1_internal.h + +extern_hdrs += include/mh_sha1.h + +check_tests += mh_sha1/mh_sha1_test +unit_tests += mh_sha1/mh_sha1_update_test + +perf_tests += mh_sha1/mh_sha1_perf + + +mh_sha1_test: mh_sha1_ref.o +mh_sha1_mh_sha1_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la + +mh_sha1_update_test: mh_sha1_ref.o +mh_sha1_mh_sha1_update_test_LDADD = mh_sha1/mh_sha1_ref.lo libisal_crypto.la + +mh_sha1_mh_sha1_perf_LDADD = libisal_crypto.la diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c new file mode 100644 index 00000000..6cb458fc --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1.c @@ -0,0 +1,137 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_internal.h" + +int mh_sha1_init(struct mh_sha1_ctx *ctx) +{ + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint32_t i; + + if (ctx == NULL) + return MH_SHA1_CTX_ERROR_NULL; + + memset(ctx, 0, sizeof(*ctx)); + + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + for (i = 0; i < HASH_SEGS; i++) { + mh_sha1_segs_digests[0][i] = MH_SHA1_H0; + mh_sha1_segs_digests[1][i] = MH_SHA1_H1; + mh_sha1_segs_digests[2][i] = MH_SHA1_H2; + mh_sha1_segs_digests[3][i] = MH_SHA1_H3; + mh_sha1_segs_digests[4][i] = MH_SHA1_H4; + } + + return MH_SHA1_CTX_ERROR_NONE; +} + +/***************mh_sha1_update***********/ +// mh_sha1_update_sse.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_sse +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_update_avx.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_update_avx2.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx2 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2 +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail + +// mh_sha1_finalize_sse.c and mh_sha1_tail_sse.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_sse +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_sse +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_sse +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_finalize_avx.c and mh_sha1_tail_avx.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +// mh_sha1_finalize_avx2.c and mh_sha1_tail_avx2.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx2 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx2 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx2 +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************version info***********/ + +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; +// Version info +struct slver mh_sha1_init_slver_00000271; +struct slver mh_sha1_init_slver = { 0x0271, 0x00, 0x00 }; + +// mh_sha1_update version info +struct slver mh_sha1_update_sse_slver_00000274; +struct slver mh_sha1_update_sse_slver = { 0x0274, 0x00, 0x00 }; + +struct slver mh_sha1_update_avx_slver_02000276; +struct slver mh_sha1_update_avx_slver = { 0x0276, 0x00, 0x02 }; + +struct slver mh_sha1_update_avx2_slver_04000278; +struct slver mh_sha1_update_avx2_slver = { 0x0278, 0x00, 0x04 }; + +// mh_sha1_finalize version info +struct slver mh_sha1_finalize_sse_slver_00000275; +struct slver mh_sha1_finalize_sse_slver = { 0x0275, 0x00, 0x00 }; + +struct slver mh_sha1_finalize_avx_slver_02000277; +struct slver mh_sha1_finalize_avx_slver = { 0x0277, 0x00, 0x02 }; + +struct slver mh_sha1_finalize_avx2_slver_04000279; +struct slver mh_sha1_finalize_avx2_slver = { 0x0279, 0x00, 0x04 }; diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c new file mode 100644 index 00000000..15f0ae1e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_avx512.c @@ -0,0 +1,71 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_internal.h" + +#ifdef HAVE_AS_KNOWS_AVX512 + +/***************mh_sha1_update***********/ +// mh_sha1_update_avx512.c +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_avx512 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512 +#include "mh_sha1_update_base.c" +#undef MH_SHA1_UPDATE_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************mh_sha1_finalize AND mh_sha1_tail***********/ +// mh_sha1_tail is used to calculate the last incomplete src data block +// mh_sha1_finalize is a mh_sha1_ctx wrapper of mh_sha1_tail +// mh_sha1_finalize_avx512.c and mh_sha1_tail_avx512.c +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_avx512 +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_avx512 +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_avx512 +#include "mh_sha1_finalize_base.c" +#undef MH_SHA1_FINALIZE_FUNCTION +#undef MH_SHA1_TAIL_FUNCTION +#undef MH_SHA1_BLOCK_FUNCTION + +/***************version info***********/ +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// mh_sha1_update version info +struct slver mh_sha1_update_avx512_slver_0600027c; +struct slver mh_sha1_update_avx512_slver = { 0x027c, 0x00, 0x06 }; + +// mh_sha1_finalize version info +struct slver mh_sha1_finalize_avx512_slver_0600027d; +struct slver mh_sha1_finalize_avx512_slver = { 0x027d, 0x00, 0x06 }; + +#endif // HAVE_AS_KNOWS_AVX512 + diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm new file mode 100644 index 00000000..8b603199 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx.asm @@ -0,0 +1,502 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX +;; + +%include "reg_sizes.asm" +default rel + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-(%%imm)) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; non-destructive +; PROLD_nd reg, imm, tmp, src +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-(%%imm)) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 16)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 16] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 16] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 + +;void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +global mh_sha1_block_avx:function internal +func(mh_sha1_block_avx) + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by avx + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + VMOVPS A, [mh_digests_p + I*64 + 16*0] + VMOVPS B, [mh_digests_p + I*64 + 16*1] + VMOVPS C, [mh_digests_p + I*64 + 16*2] + VMOVPS D, [mh_digests_p + I*64 + 16*3] + + vmovdqa [rsp + I*64 + 16*0], A + vmovdqa [rsp + I*64 + 16*1], B + vmovdqa [rsp + I*64 + 16*2], C + vmovdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + VMOVPS T0,[mh_in_p + I*64+0*16] + VMOVPS T1,[mh_in_p + I*64+1*16] + VMOVPS T2,[mh_in_p + I*64+2*16] + VMOVPS T3,[mh_in_p + I*64+3*16] + + vpshufb T0, F + vmovdqa [mh_data_p +(I)*16 +0*256],T0 + vpshufb T1, F + vmovdqa [mh_data_p +(I)*16 +1*256],T1 + vpshufb T2, F + vmovdqa [mh_data_p +(I)*16 +2*256],T2 + vpshufb T3, F + vmovdqa [mh_data_p +(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A, AA + vpaddd B, BB + vpaddd C, CC + vpaddd D, DD + vpaddd E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add pref, 256 + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + vmovdqa A, [rsp + I*64 + 16*0] + vmovdqa B, [rsp + I*64 + 16*1] + vmovdqa C, [rsp + I*64 + 16*2] + vmovdqa D, [rsp + I*64 + 16*3] + + VMOVPS [mh_digests_p + I*64 + 16*0], A + VMOVPS [mh_digests_p + I*64 + 16*1], B + VMOVPS [mh_digests_p + I*64 + 16*2], C + VMOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm new file mode 100644 index 00000000..77aeda0d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx2.asm @@ -0,0 +1,509 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX-2 +;; + +%include "reg_sizes.asm" +default rel + +;; Magic functions defined in FIPS 180-1 +;; +;MAGIC_F0 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF, %%regC,%%regD + vpand %%regF, %%regF,%%regB + vpxor %%regF, %%regF,%%regD +%endmacro + +;MAGIC_F1 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpxor %%regF,%%regD,%%regC + vpxor %%regF,%%regF,%%regB +%endmacro + + + +;MAGIC_F2 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ ;; ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + vpor %%regF,%%regB,%%regC + vpand %%regT,%%regB,%%regC + vpand %%regF,%%regF,%%regD + vpor %%regF,%%regF,%%regT +%endmacro + +;MAGIC_F3 MACRO regF:REQ,regB:REQ,regC:REQ,regD:REQ,regT:REQ +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + vpsrld %%tmp, %%reg, (32-%%imm) + vpslld %%reg, %%reg, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD_nd 4 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 +%define %%src %4 + vpsrld %%tmp, %%src, (32-%%imm) + vpslld %%reg, %%src, %%imm + vpor %%reg, %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + vpaddd %%regE, %%regE,[%%data + (%%memW * 32)] + PROLD_nd %%regT,5, %%regF,%%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE, %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + vpaddd %%regE, %%regE,%%immCNT + + vmovdqa W14, [%%data + ((%%memW - 14) & 15) * 32] + vpxor W16, W16, W14 + vpxor W16, W16, [%%data + ((%%memW - 8) & 15) * 32] + vpxor W16, W16, [%%data + ((%%memW - 3) & 15) * 32] + + vpsrld %%regF, W16, (32-1) + vpslld W16, W16, 1 + vpor %%regF, %%regF, W16 + ROTATE_W + + vmovdqa [%%data + ((%%memW - 0) & 15) * 32],%%regF + vpaddd %%regE, %%regE,%%regF + + PROLD_nd %%regT,5, %%regF, %%regA + vpaddd %%regE, %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + vpaddd %%regE,%%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovups + +%define A ymm0 +%define B ymm1 +%define C ymm2 +%define D ymm3 +%define E ymm4 + +%define F ymm5 +%define T0 ymm6 +%define T1 ymm7 +%define T2 ymm8 +%define T3 ymm9 +%define T4 ymm10 +%define T5 ymm11 +%define T6 ymm12 +%define T7 ymm13 +%define T8 ymm14 +%define T9 ymm15 + +%define AA ymm5 +%define BB ymm6 +%define CC ymm7 +%define DD ymm8 +%define EE ymm9 +%define TMP ymm10 +%define FUN ymm11 +%define K ymm12 +%define W14 ymm13 +%define W15 ymm14 +%define W16 ymm15 + + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 + +;void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +global mh_sha1_block_avx2:function internal +func(mh_sha1_block_avx2) + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 32 Bytes needed by avx2 + and rsp, ~0x1F + + %assign I 0 ; copy segs_digests into stack + %rep 2 + VMOVPS A, [mh_digests_p + I*32*5 + 32*0] + VMOVPS B, [mh_digests_p + I*32*5 + 32*1] + VMOVPS C, [mh_digests_p + I*32*5 + 32*2] + VMOVPS D, [mh_digests_p + I*32*5 + 32*3] + VMOVPS E, [mh_digests_p + I*32*5 + 32*4] + + vmovdqa [rsp + I*32*5 + 32*0], A + vmovdqa [rsp + I*32*5 + 32*1], B + vmovdqa [rsp + I*32*5 + 32*2], C + vmovdqa [rsp + I*32*5 + 32*3], D + vmovdqa [rsp + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + +.block_loop: + ;transform to big-endian data and store on aligned_frame + vmovdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*8_SEGS*5*2 +%assign I 0 +%rep 16 + VMOVPS T0,[mh_in_p + I*64+0*32] + VMOVPS T1,[mh_in_p + I*64+1*32] + + vpshufb T0, T0, F + vmovdqa [mh_data_p +I*32+0*512],T0 + vpshufb T1, T1, F + vmovdqa [mh_data_p +I*32+1*512],T1 +%assign I (I+1) +%endrep + + mov mh_segs, 0 ;start from the first 8 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + vmovdqa A, [rsp + 0*64 + mh_segs] + vmovdqa B, [rsp + 1*64 + mh_segs] + vmovdqa C, [rsp + 2*64 + mh_segs] + vmovdqa D, [rsp + 3*64 + mh_segs] + vmovdqa E, [rsp + 4*64 + mh_segs] + + vmovdqa AA, A + vmovdqa BB, B + vmovdqa CC, C + vmovdqa DD, D + vmovdqa EE, E +;; +;; perform 0-79 steps +;; + vmovdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS +%assign I (I+1) +%endrep + +;; do rounds 16...19 + vmovdqa W16, [mh_data_p + ((16 - 16) & 15) * 32] + vmovdqa W15, [mh_data_p + ((16 - 15) & 15) * 32] + %rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 20...39 + vmovdqa K, [K20_39] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep +;; do rounds 40...59 + vmovdqa K, [K40_59] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*2] + PREFETCH_X [mh_in_p + pref+128*3] +;; do rounds 60...79 + vmovdqa K, [K60_79] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + vpaddd A,A, AA + vpaddd B,B, BB + vpaddd C,C, CC + vpaddd D,D, DD + vpaddd E,E, EE + + ; write out digests + vmovdqa [rsp + 0*64 + mh_segs], A + vmovdqa [rsp + 1*64 + mh_segs], B + vmovdqa [rsp + 2*64 + mh_segs], C + vmovdqa [rsp + 3*64 + mh_segs], D + vmovdqa [rsp + 4*64 + mh_segs], E + + add pref, 512 + + add mh_data_p, 512 + add mh_segs, 32 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 2 + vmovdqa A, [rsp + I*32*5 + 32*0] + vmovdqa B, [rsp + I*32*5 + 32*1] + vmovdqa C, [rsp + I*32*5 + 32*2] + vmovdqa D, [rsp + I*32*5 + 32*3] + vmovdqa E, [rsp + I*32*5 + 32*4] + + VMOVPS [mh_digests_p + I*32*5 + 32*0], A + VMOVPS [mh_digests_p + I*32*5 + 32*1], B + VMOVPS [mh_digests_p + I*32*5 + 32*2], C + VMOVPS [mh_digests_p + I*32*5 + 32*3], D + VMOVPS [mh_digests_p + I*32*5 + 32*4], E + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=32 + +align 32 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + dq 0x0405060700010203, 0x0c0d0e0f08090a0b +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 + dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm new file mode 100644 index 00000000..3738c6d4 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_avx512.asm @@ -0,0 +1,403 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using AVX-512 +;; + +%include "reg_sizes.asm" + +%ifdef HAVE_AS_KNOWS_AVX512 +default rel + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define VMOVPS vmovdqu64 +;SIMD variables definition +%define A zmm0 +%define B zmm1 +%define C zmm2 +%define D zmm3 +%define E zmm4 +%define HH0 zmm5 +%define HH1 zmm6 +%define HH2 zmm7 +%define HH3 zmm8 +%define HH4 zmm9 +%define KT zmm10 +%define XTMP0 zmm11 +%define XTMP1 zmm12 +%define SHUF_MASK zmm13 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;using extra 16 ZMM registers to place the inverse input data +%define W0 zmm16 +%define W1 zmm17 +%define W2 zmm18 +%define W3 zmm19 +%define W4 zmm20 +%define W5 zmm21 +%define W6 zmm22 +%define W7 zmm23 +%define W8 zmm24 +%define W9 zmm25 +%define W10 zmm26 +%define W11 zmm27 +%define W12 zmm28 +%define W13 zmm29 +%define W14 zmm30 +%define W15 zmm31 +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;macros definition +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%macro PROCESS_LOOP 2 +%define %%WT %1 +%define %%F_IMMED %2 + + ; T = ROTL_5(A) + Ft(B,C,D) + E + Kt + Wt + ; E=D, D=C, C=ROTL_30(B), B=A, A=T + + ; Ft + ; 0-19 Ch(B,C,D) = (B&C) ^ (~B&D) + ; 20-39, 60-79 Parity(B,C,D) = B ^ C ^ D + ; 40-59 Maj(B,C,D) = (B&C) ^ (B&D) ^ (C&D) + + vmovdqa32 XTMP1, B ; Copy B + vpaddd E, E, %%WT ; E = E + Wt + vpternlogd XTMP1, C, D, %%F_IMMED ; TMP1 = Ft(B,C,D) + vpaddd E, E, KT ; E = E + Wt + Kt + vprold XTMP0, A, 5 ; TMP0 = ROTL_5(A) + vpaddd E, E, XTMP1 ; E = Ft(B,C,D) + E + Kt + Wt + vprold B, B, 30 ; B = ROTL_30(B) + vpaddd E, E, XTMP0 ; E = T + + ROTATE_ARGS +%endmacro + +%macro MSG_SCHED_ROUND_16_79 4 +%define %%WT %1 +%define %%WTp2 %2 +%define %%WTp8 %3 +%define %%WTp13 %4 + ; Wt = ROTL_1(Wt-3 ^ Wt-8 ^ Wt-14 ^ Wt-16) + ; Wt+16 = ROTL_1(Wt+13 ^ Wt+8 ^ Wt+2 ^ Wt) + vpternlogd %%WT, %%WTp2, %%WTp8, 0x96 + vpxord %%WT, %%WT, %%WTp13 + vprold %%WT, %%WT, 1 +%endmacro + +%define APPEND(a,b) a %+ b +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + ; remove unwind info macros + %define func(x) x: + %macro FUNC_SAVE 0 + sub rsp, stack_size + movdqa [rsp + 0*16], xmm6 + movdqa [rsp + 1*16], xmm7 + movdqa [rsp + 2*16], xmm8 + movdqa [rsp + 3*16], xmm9 + movdqa [rsp + 4*16], xmm10 + movdqa [rsp + 5*16], xmm11 + movdqa [rsp + 6*16], xmm12 + movdqa [rsp + 7*16], xmm13 + movdqa [rsp + 8*16], xmm14 + movdqa [rsp + 9*16], xmm15 + mov [rsp + 10*16 + 0*8], r12 + mov [rsp + 10*16 + 1*8], r13 + mov [rsp + 10*16 + 2*8], r14 + mov [rsp + 10*16 + 3*8], r15 + mov [rsp + 10*16 + 4*8], rdi + mov [rsp + 10*16 + 5*8], rsi + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +[bits 64] +section .text +align 32 + +;void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +global mh_sha1_block_avx512 +func(mh_sha1_block_avx512) + FUNC_SAVE + + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; align rsp to 64 Bytes needed by avx512 + and rsp, ~0x3f + + ; copy segs_digests into registers. + VMOVPS HH0, [mh_digests_p + 64*0] + VMOVPS HH1, [mh_digests_p + 64*1] + VMOVPS HH2, [mh_digests_p + 64*2] + VMOVPS HH3, [mh_digests_p + 64*3] + VMOVPS HH4, [mh_digests_p + 64*4] + ;a mask used to transform to big-endian data + vmovdqa64 SHUF_MASK, [PSHUFFLE_BYTE_FLIP_MASK] + +.block_loop: + ;transform to big-endian data and store on aligned_frame + ;using extra 16 ZMM registers instead of stack +%assign I 0 +%rep 8 +%assign J (I+1) + VMOVPS APPEND(W,I),[mh_in_p + I*64+0*64] + VMOVPS APPEND(W,J),[mh_in_p + I*64+1*64] + + vpshufb APPEND(W,I), APPEND(W,I), SHUF_MASK + vpshufb APPEND(W,J), APPEND(W,J), SHUF_MASK +%assign I (I+2) +%endrep + + vmovdqa64 A, HH0 + vmovdqa64 B, HH1 + vmovdqa64 C, HH2 + vmovdqa64 D, HH3 + vmovdqa64 E, HH4 + + vmovdqa32 KT, [K00_19] +%assign I 0xCA +%assign J 0 +%assign K 2 +%assign L 8 +%assign M 13 +%assign N 0 +%rep 80 + PROCESS_LOOP APPEND(W,J), I + %if N < 64 + MSG_SCHED_ROUND_16_79 APPEND(W,J), APPEND(W,K), APPEND(W,L), APPEND(W,M) + %endif + %if N = 19 + vmovdqa32 KT, [K20_39] + %assign I 0x96 + %elif N = 39 + vmovdqa32 KT, [K40_59] + %assign I 0xE8 + %elif N = 59 + vmovdqa32 KT, [K60_79] + %assign I 0x96 + %endif + %if N % 20 = 19 + PREFETCH_X [mh_in_p + 1024+128*(N / 20)] + PREFETCH_X [mh_in_p + 1024+128*(N / 20 +1)] + %endif +%assign J ((J+1)% 16) +%assign K ((K+1)% 16) +%assign L ((L+1)% 16) +%assign M ((M+1)% 16) +%assign N (N+1) +%endrep + + ; Add old digest + vpaddd HH0,A, HH0 + vpaddd HH1,B, HH1 + vpaddd HH2,C, HH2 + vpaddd HH3,D, HH3 + vpaddd HH4,E, HH4 + + add mh_in_p, 1024 + sub loops, 1 + jne .block_loop + + ; copy segs_digests to mh_digests_p + VMOVPS [mh_digests_p + 64*0], HH0 + VMOVPS [mh_digests_p + 64*1], HH1 + VMOVPS [mh_digests_p + 64*2], HH2 + VMOVPS [mh_digests_p + 64*3], HH3 + VMOVPS [mh_digests_p + 64*4], HH4 + + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + + +section .data align=64 + +align 64 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + dq 0x0405060700010203 + dq 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + dq 0x5A8279995A827999 + +K20_39: dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + dq 0x6ED9EBA16ED9EBA1 + +K40_59: dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + dq 0x8F1BBCDC8F1BBCDC + +K60_79: dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + dq 0xCA62C1D6CA62C1D6 + +%else +%ifidn __OUTPUT_FORMAT__, win64 +global no_mh_sha1_block_avx512 +no_mh_sha1_block_avx512: +%endif +%endif ; HAVE_AS_KNOWS_AVX512 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c new file mode 100644 index 00000000..cdee69a4 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_base.c @@ -0,0 +1,387 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "mh_sha1_internal.h" +#include <string.h> + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// Base multi-hash SHA1 Functions +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +#define store_w(s, i, w, ww) (w[i][s] = bswap(ww[i*HASH_SEGS+s])) // only used for step 0 ~ 15 +#define update_w(s, i, w) (w[i&15][s] = rol32(w[(i-3)&15][s]^w[(i-8)&15][s]^w[(i-14)&15][s]^w[(i-16)&15][s], 1)) // used for step > 15 +#define update_e_1(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F1(b[s],c[s],d[s]) + K_00_19 + w[i&15][s]) +#define update_e_2(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F2(b[s],c[s],d[s]) + K_20_39 + w[i&15][s]) +#define update_e_3(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F3(b[s],c[s],d[s]) + K_40_59 + w[i&15][s]) +#define update_e_4(s, a, b, c, d, e, i, w) (e[s] += rol32(a[s],5) + F4(b[s],c[s],d[s]) + K_60_79 + w[i&15][s]) +#define update_b(s, b) (b[s] = rol32(b[s],30)) + +#define STORE_W(i, w, ww) \ + store_w(0, i, w, ww); \ + store_w(1, i, w, ww); \ + store_w(2, i, w, ww); \ + store_w(3, i, w, ww); \ + store_w(4, i, w, ww); \ + store_w(5, i, w, ww); \ + store_w(6, i, w, ww); \ + store_w(7, i, w, ww); \ + store_w(8, i, w, ww); \ + store_w(9, i, w, ww); \ + store_w(10, i, w, ww); \ + store_w(11, i, w, ww); \ + store_w(12, i, w, ww); \ + store_w(13, i, w, ww); \ + store_w(14, i, w, ww); \ + store_w(15, i, w, ww) + +#define UPDATE_W(i, w) \ + update_w(0, i, w); \ + update_w(1, i, w); \ + update_w(2, i, w); \ + update_w(3, i, w); \ + update_w(4, i, w); \ + update_w(5, i, w); \ + update_w(6, i, w); \ + update_w(7, i, w); \ + update_w(8, i, w); \ + update_w(9, i, w); \ + update_w(10, i, w); \ + update_w(11, i, w); \ + update_w(12, i, w); \ + update_w(13, i, w); \ + update_w(14, i, w); \ + update_w(15, i, w) + +#define UPDATE_E1(a, b, c, d, e, i, w) \ + update_e_1(0, a, b, c, d, e, i, w); \ + update_e_1(1, a, b, c, d, e, i, w); \ + update_e_1(2, a, b, c, d, e, i, w); \ + update_e_1(3, a, b, c, d, e, i, w); \ + update_e_1(4, a, b, c, d, e, i, w); \ + update_e_1(5, a, b, c, d, e, i, w); \ + update_e_1(6, a, b, c, d, e, i, w); \ + update_e_1(7, a, b, c, d, e, i, w); \ + update_e_1(8, a, b, c, d, e, i, w); \ + update_e_1(9, a, b, c, d, e, i, w); \ + update_e_1(10, a, b, c, d, e, i, w); \ + update_e_1(11, a, b, c, d, e, i, w); \ + update_e_1(12, a, b, c, d, e, i, w); \ + update_e_1(13, a, b, c, d, e, i, w); \ + update_e_1(14, a, b, c, d, e, i, w); \ + update_e_1(15, a, b, c, d, e, i, w) + +#define UPDATE_E2(a, b, c, d, e, i, w) \ + update_e_2(0, a, b, c, d, e, i, w); \ + update_e_2(1, a, b, c, d, e, i, w); \ + update_e_2(2, a, b, c, d, e, i, w); \ + update_e_2(3, a, b, c, d, e, i, w); \ + update_e_2(4, a, b, c, d, e, i, w); \ + update_e_2(5, a, b, c, d, e, i, w); \ + update_e_2(6, a, b, c, d, e, i, w); \ + update_e_2(7, a, b, c, d, e, i, w); \ + update_e_2(8, a, b, c, d, e, i, w); \ + update_e_2(9, a, b, c, d, e, i, w); \ + update_e_2(10, a, b, c, d, e, i, w); \ + update_e_2(11, a, b, c, d, e, i, w); \ + update_e_2(12, a, b, c, d, e, i, w); \ + update_e_2(13, a, b, c, d, e, i, w); \ + update_e_2(14, a, b, c, d, e, i, w); \ + update_e_2(15, a, b, c, d, e, i, w) + +#define UPDATE_E3(a, b, c, d, e, i, w) \ + update_e_3(0, a, b, c, d, e, i, w); \ + update_e_3(1, a, b, c, d, e, i, w); \ + update_e_3(2, a, b, c, d, e, i, w); \ + update_e_3(3, a, b, c, d, e, i, w); \ + update_e_3(4, a, b, c, d, e, i, w); \ + update_e_3(5, a, b, c, d, e, i, w); \ + update_e_3(6, a, b, c, d, e, i, w); \ + update_e_3(7, a, b, c, d, e, i, w); \ + update_e_3(8, a, b, c, d, e, i, w); \ + update_e_3(9, a, b, c, d, e, i, w); \ + update_e_3(10, a, b, c, d, e, i, w); \ + update_e_3(11, a, b, c, d, e, i, w); \ + update_e_3(12, a, b, c, d, e, i, w); \ + update_e_3(13, a, b, c, d, e, i, w); \ + update_e_3(14, a, b, c, d, e, i, w); \ + update_e_3(15, a, b, c, d, e, i, w) + +#define UPDATE_E4(a, b, c, d, e, i, w) \ + update_e_4(0, a, b, c, d, e, i, w); \ + update_e_4(1, a, b, c, d, e, i, w); \ + update_e_4(2, a, b, c, d, e, i, w); \ + update_e_4(3, a, b, c, d, e, i, w); \ + update_e_4(4, a, b, c, d, e, i, w); \ + update_e_4(5, a, b, c, d, e, i, w); \ + update_e_4(6, a, b, c, d, e, i, w); \ + update_e_4(7, a, b, c, d, e, i, w); \ + update_e_4(8, a, b, c, d, e, i, w); \ + update_e_4(9, a, b, c, d, e, i, w); \ + update_e_4(10, a, b, c, d, e, i, w); \ + update_e_4(11, a, b, c, d, e, i, w); \ + update_e_4(12, a, b, c, d, e, i, w); \ + update_e_4(13, a, b, c, d, e, i, w); \ + update_e_4(14, a, b, c, d, e, i, w); \ + update_e_4(15, a, b, c, d, e, i, w) + +#define UPDATE_B(b) \ + update_b(0, b); \ + update_b(1, b); \ + update_b(2, b); \ + update_b(3, b); \ + update_b(4, b); \ + update_b(5, b); \ + update_b(6, b); \ + update_b(7, b); \ + update_b(8, b); \ + update_b(9, b); \ + update_b(10, b); \ + update_b(11, b); \ + update_b(12, b); \ + update_b(13, b); \ + update_b(14, b); \ + update_b(15, b) + +static inline void step00_15(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS], + uint32_t * ww) +{ + STORE_W(i, w, ww); + UPDATE_E1(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void step16_19(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E1(a, b, c, d, e, i, w); + UPDATE_B(b); + +} + +static inline void step20_39(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E2(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void step40_59(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E3(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void step60_79(int i, uint32_t * a, uint32_t * b, uint32_t * c, + uint32_t * d, uint32_t * e, uint32_t(*w)[HASH_SEGS]) +{ + UPDATE_W(i, w); + UPDATE_E4(a, b, c, d, e, i, w); + UPDATE_B(b); +} + +static inline void init_abcde(uint32_t * xx, uint32_t n, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS]) +{ + xx[0] = digests[n][0]; + xx[1] = digests[n][1]; + xx[2] = digests[n][2]; + xx[3] = digests[n][3]; + xx[4] = digests[n][4]; + xx[5] = digests[n][5]; + xx[6] = digests[n][6]; + xx[7] = digests[n][7]; + xx[8] = digests[n][8]; + xx[9] = digests[n][9]; + xx[10] = digests[n][10]; + xx[11] = digests[n][11]; + xx[12] = digests[n][12]; + xx[13] = digests[n][13]; + xx[14] = digests[n][14]; + xx[15] = digests[n][15]; +} + +static inline void add_abcde(uint32_t * xx, uint32_t n, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS]) +{ + digests[n][0] += xx[0]; + digests[n][1] += xx[1]; + digests[n][2] += xx[2]; + digests[n][3] += xx[3]; + digests[n][4] += xx[4]; + digests[n][5] += xx[5]; + digests[n][6] += xx[6]; + digests[n][7] += xx[7]; + digests[n][8] += xx[8]; + digests[n][9] += xx[9]; + digests[n][10] += xx[10]; + digests[n][11] += xx[11]; + digests[n][12] += xx[12]; + digests[n][13] += xx[13]; + digests[n][14] += xx[14]; + digests[n][15] += xx[15]; +} + +/* + * API to perform 0-79 steps of the multi-hash algorithm for + * a single block of data. The caller is responsible for ensuring + * a full block of data input. + * + * Argument: + * input - the pointer to the data + * digest - the space to hold the digests for all segments. + * + * Return: + * N/A + */ +void mh_sha1_single(const uint8_t * input, uint32_t(*digests)[HASH_SEGS], + uint8_t * frame_buffer) +{ + uint32_t aa[HASH_SEGS], bb[HASH_SEGS], cc[HASH_SEGS], dd[HASH_SEGS], ee[HASH_SEGS]; + uint32_t *ww = (uint32_t *) input; + uint32_t(*w)[HASH_SEGS]; + + w = (uint32_t(*)[HASH_SEGS]) frame_buffer; + + init_abcde(aa, 0, digests); + init_abcde(bb, 1, digests); + init_abcde(cc, 2, digests); + init_abcde(dd, 3, digests); + init_abcde(ee, 4, digests); + + step00_15(0, aa, bb, cc, dd, ee, w, ww); + step00_15(1, ee, aa, bb, cc, dd, w, ww); + step00_15(2, dd, ee, aa, bb, cc, w, ww); + step00_15(3, cc, dd, ee, aa, bb, w, ww); + step00_15(4, bb, cc, dd, ee, aa, w, ww); + step00_15(5, aa, bb, cc, dd, ee, w, ww); + step00_15(6, ee, aa, bb, cc, dd, w, ww); + step00_15(7, dd, ee, aa, bb, cc, w, ww); + step00_15(8, cc, dd, ee, aa, bb, w, ww); + step00_15(9, bb, cc, dd, ee, aa, w, ww); + step00_15(10, aa, bb, cc, dd, ee, w, ww); + step00_15(11, ee, aa, bb, cc, dd, w, ww); + step00_15(12, dd, ee, aa, bb, cc, w, ww); + step00_15(13, cc, dd, ee, aa, bb, w, ww); + step00_15(14, bb, cc, dd, ee, aa, w, ww); + step00_15(15, aa, bb, cc, dd, ee, w, ww); + + step16_19(16, ee, aa, bb, cc, dd, w); + step16_19(17, dd, ee, aa, bb, cc, w); + step16_19(18, cc, dd, ee, aa, bb, w); + step16_19(19, bb, cc, dd, ee, aa, w); + + step20_39(20, aa, bb, cc, dd, ee, w); + step20_39(21, ee, aa, bb, cc, dd, w); + step20_39(22, dd, ee, aa, bb, cc, w); + step20_39(23, cc, dd, ee, aa, bb, w); + step20_39(24, bb, cc, dd, ee, aa, w); + step20_39(25, aa, bb, cc, dd, ee, w); + step20_39(26, ee, aa, bb, cc, dd, w); + step20_39(27, dd, ee, aa, bb, cc, w); + step20_39(28, cc, dd, ee, aa, bb, w); + step20_39(29, bb, cc, dd, ee, aa, w); + step20_39(30, aa, bb, cc, dd, ee, w); + step20_39(31, ee, aa, bb, cc, dd, w); + step20_39(32, dd, ee, aa, bb, cc, w); + step20_39(33, cc, dd, ee, aa, bb, w); + step20_39(34, bb, cc, dd, ee, aa, w); + step20_39(35, aa, bb, cc, dd, ee, w); + step20_39(36, ee, aa, bb, cc, dd, w); + step20_39(37, dd, ee, aa, bb, cc, w); + step20_39(38, cc, dd, ee, aa, bb, w); + step20_39(39, bb, cc, dd, ee, aa, w); + + step40_59(40, aa, bb, cc, dd, ee, w); + step40_59(41, ee, aa, bb, cc, dd, w); + step40_59(42, dd, ee, aa, bb, cc, w); + step40_59(43, cc, dd, ee, aa, bb, w); + step40_59(44, bb, cc, dd, ee, aa, w); + step40_59(45, aa, bb, cc, dd, ee, w); + step40_59(46, ee, aa, bb, cc, dd, w); + step40_59(47, dd, ee, aa, bb, cc, w); + step40_59(48, cc, dd, ee, aa, bb, w); + step40_59(49, bb, cc, dd, ee, aa, w); + step40_59(50, aa, bb, cc, dd, ee, w); + step40_59(51, ee, aa, bb, cc, dd, w); + step40_59(52, dd, ee, aa, bb, cc, w); + step40_59(53, cc, dd, ee, aa, bb, w); + step40_59(54, bb, cc, dd, ee, aa, w); + step40_59(55, aa, bb, cc, dd, ee, w); + step40_59(56, ee, aa, bb, cc, dd, w); + step40_59(57, dd, ee, aa, bb, cc, w); + step40_59(58, cc, dd, ee, aa, bb, w); + step40_59(59, bb, cc, dd, ee, aa, w); + + step60_79(60, aa, bb, cc, dd, ee, w); + step60_79(61, ee, aa, bb, cc, dd, w); + step60_79(62, dd, ee, aa, bb, cc, w); + step60_79(63, cc, dd, ee, aa, bb, w); + step60_79(64, bb, cc, dd, ee, aa, w); + step60_79(65, aa, bb, cc, dd, ee, w); + step60_79(66, ee, aa, bb, cc, dd, w); + step60_79(67, dd, ee, aa, bb, cc, w); + step60_79(68, cc, dd, ee, aa, bb, w); + step60_79(69, bb, cc, dd, ee, aa, w); + step60_79(70, aa, bb, cc, dd, ee, w); + step60_79(71, ee, aa, bb, cc, dd, w); + step60_79(72, dd, ee, aa, bb, cc, w); + step60_79(73, cc, dd, ee, aa, bb, w); + step60_79(74, bb, cc, dd, ee, aa, w); + step60_79(75, aa, bb, cc, dd, ee, w); + step60_79(76, ee, aa, bb, cc, dd, w); + step60_79(77, dd, ee, aa, bb, cc, w); + step60_79(78, cc, dd, ee, aa, bb, w); + step60_79(79, bb, cc, dd, ee, aa, w); + + add_abcde(aa, 0, digests); + add_abcde(bb, 1, digests); + add_abcde(cc, 2, digests); + add_abcde(dd, 3, digests); + add_abcde(ee, 4, digests); +} + +void mh_sha1_block_base(const uint8_t * input_data, + uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks) +{ + uint32_t i; + + for (i = 0; i < num_blocks; i++) { + mh_sha1_single(input_data, digests, frame_buffer); + input_data += MH_SHA1_BLOCK_SIZE; + } + + return; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm new file mode 100644 index 00000000..1e53cfec --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_block_sse.asm @@ -0,0 +1,494 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; code to compute 16 SHA1 using SSE +;; + +%include "reg_sizes.asm" +default rel + +;; Magic functions defined in FIPS 180-1 +;; +; macro MAGIC_F0 F,B,C,D,T ;; F = (D ^ (B & (C ^ D))) +%macro MAGIC_F0 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regC + pxor %%regF,%%regD + pand %%regF,%%regB + pxor %%regF,%%regD +%endmacro + +; macro MAGIC_F1 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F1 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regD + pxor %%regF,%%regC + pxor %%regF,%%regB +%endmacro + +; macro MAGIC_F2 F,B,C,D,T ;; F = ((B & C) | (B & D) | (C & D)) +%macro MAGIC_F2 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + movdqa %%regF,%%regB + movdqa %%regT,%%regB + por %%regF,%%regC + pand %%regT,%%regC + pand %%regF,%%regD + por %%regF,%%regT +%endmacro + +; macro MAGIC_F3 F,B,C,D,T ;; F = (B ^ C ^ D) +%macro MAGIC_F3 5 +%define %%regF %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regT %5 + MAGIC_F1 %%regF,%%regB,%%regC,%%regD,%%regT +%endmacro + +; PROLD reg, imm, tmp +%macro PROLD 3 +%define %%reg %1 +%define %%imm %2 +%define %%tmp %3 + movdqa %%tmp, %%reg + pslld %%reg, %%imm + psrld %%tmp, (32-%%imm) + por %%reg, %%tmp +%endmacro + +%macro SHA1_STEP_00_15 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + paddd %%regE,[%%data + (%%memW * 16)] + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +%macro SHA1_STEP_16_79 11 +%define %%regA %1 +%define %%regB %2 +%define %%regC %3 +%define %%regD %4 +%define %%regE %5 +%define %%regT %6 +%define %%regF %7 +%define %%memW %8 +%define %%immCNT %9 +%define %%MAGIC %10 +%define %%data %11 + paddd %%regE,%%immCNT + movdqa W14, [%%data + ((%%memW - 14) & 15) * 16] + pxor W16, W14 + pxor W16, [%%data + ((%%memW - 8) & 15) * 16] + pxor W16, [%%data + ((%%memW - 3) & 15) * 16] + movdqa %%regF, W16 + pslld W16, 1 + psrld %%regF, (32-1) + por %%regF, W16 + ROTATE_W + + movdqa [%%data + ((%%memW - 0) & 15) * 16],%%regF + paddd %%regE,%%regF + movdqa %%regT,%%regA + PROLD %%regT,5, %%regF + paddd %%regE,%%regT + %%MAGIC %%regF,%%regB,%%regC,%%regD,%%regT ;; FUN = MAGIC_Fi(B,C,D) + PROLD %%regB,30, %%regT + paddd %%regE,%%regF +%endmacro + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifidn __OUTPUT_FORMAT__, elf64 + ; Linux + %define arg0 rdi + %define arg1 rsi + %define arg2 rdx + %define arg3 rcx + + %define arg4 r8 + %define arg5 r9 + + %define tmp1 r10 + %define tmp2 r11 + %define tmp3 r12 ; must be saved and restored + %define tmp4 r13 ; must be saved and restored + %define tmp5 r14 ; must be saved and restored + %define tmp6 r15 ; must be saved and restored + %define return rax + + %define func(x) x: + %macro FUNC_SAVE 0 + push r12 + push r13 + push r14 + push r15 + %endmacro + %macro FUNC_RESTORE 0 + pop r15 + pop r14 + pop r13 + pop r12 + %endmacro +%else + ; Windows + %define arg0 rcx + %define arg1 rdx + %define arg2 r8 + %define arg3 r9 + + %define arg4 r10 + %define arg5 r11 + %define tmp1 r12 ; must be saved and restored + %define tmp2 r13 ; must be saved and restored + %define tmp3 r14 ; must be saved and restored + %define tmp4 r15 ; must be saved and restored + %define tmp5 rdi ; must be saved and restored + %define tmp6 rsi ; must be saved and restored + %define return rax + + %define stack_size 10*16 + 7*8 ; must be an odd multiple of 8 + %define func(x) proc_frame x + %macro FUNC_SAVE 0 + alloc_stack stack_size + save_xmm128 xmm6, 0*16 + save_xmm128 xmm7, 1*16 + save_xmm128 xmm8, 2*16 + save_xmm128 xmm9, 3*16 + save_xmm128 xmm10, 4*16 + save_xmm128 xmm11, 5*16 + save_xmm128 xmm12, 6*16 + save_xmm128 xmm13, 7*16 + save_xmm128 xmm14, 8*16 + save_xmm128 xmm15, 9*16 + save_reg r12, 10*16 + 0*8 + save_reg r13, 10*16 + 1*8 + save_reg r14, 10*16 + 2*8 + save_reg r15, 10*16 + 3*8 + save_reg rdi, 10*16 + 4*8 + save_reg rsi, 10*16 + 5*8 + end_prolog + %endmacro + + %macro FUNC_RESTORE 0 + movdqa xmm6, [rsp + 0*16] + movdqa xmm7, [rsp + 1*16] + movdqa xmm8, [rsp + 2*16] + movdqa xmm9, [rsp + 3*16] + movdqa xmm10, [rsp + 4*16] + movdqa xmm11, [rsp + 5*16] + movdqa xmm12, [rsp + 6*16] + movdqa xmm13, [rsp + 7*16] + movdqa xmm14, [rsp + 8*16] + movdqa xmm15, [rsp + 9*16] + mov r12, [rsp + 10*16 + 0*8] + mov r13, [rsp + 10*16 + 1*8] + mov r14, [rsp + 10*16 + 2*8] + mov r15, [rsp + 10*16 + 3*8] + mov rdi, [rsp + 10*16 + 4*8] + mov rsi, [rsp + 10*16 + 5*8] + add rsp, stack_size + %endmacro +%endif +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define loops arg3 +;variables of mh_sha1 +%define mh_in_p arg0 +%define mh_digests_p arg1 +%define mh_data_p arg2 +%define mh_segs tmp1 +;variables used by storing segs_digests on stack +%define RSP_SAVE tmp2 +%define FRAMESZ 4*5*16 ;BYTES*DWORDS*SEGS + +%define pref tmp3 +%macro PREFETCH_X 1 +%define %%mem %1 + prefetchnta %%mem +%endmacro +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%define MOVPS movups + +%define A xmm0 +%define B xmm1 +%define C xmm2 +%define D xmm3 +%define E xmm4 +%define F xmm5 ; tmp +%define G xmm6 ; tmp + +%define TMP G +%define FUN F +%define K xmm7 + +%define AA xmm8 +%define BB xmm9 +%define CC xmm10 +%define DD xmm11 +%define EE xmm12 + +%define T0 xmm6 +%define T1 xmm7 +%define T2 xmm8 +%define T3 xmm9 +%define T4 xmm10 +%define T5 xmm11 + +%macro ROTATE_ARGS 0 +%xdefine TMP_ E +%xdefine E D +%xdefine D C +%xdefine C B +%xdefine B A +%xdefine A TMP_ +%endm + +%define W14 xmm13 +%define W15 xmm14 +%define W16 xmm15 + +%macro ROTATE_W 0 +%xdefine TMP_ W16 +%xdefine W16 W15 +%xdefine W15 W14 +%xdefine W14 TMP_ +%endm + + +;init hash digests +; segs_digests:low addr-> high_addr +; a | b | c | ...| p | (16) +; h0 | h0 | h0 | ...| h0 | | Aa| Ab | Ac |...| Ap | +; h1 | h1 | h1 | ...| h1 | | Ba| Bb | Bc |...| Bp | +; .... +; h5 | h5 | h5 | ...| h5 | | Ea| Eb | Ec |...| Ep | + +align 32 + +;void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], +; uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); +; arg 0 pointer to input data +; arg 1 pointer to digests, include segments digests(uint32_t digests[16][5]) +; arg 2 pointer to aligned_frame_buffer which is used to save the big_endian data. +; arg 3 number of 1KB blocks +; +global mh_sha1_block_sse:function internal +func(mh_sha1_block_sse) + FUNC_SAVE + ; save rsp + mov RSP_SAVE, rsp + + cmp loops, 0 + jle .return + + ; leave enough space to store segs_digests + sub rsp, FRAMESZ + ; align rsp to 16 Bytes needed by sse + and rsp, ~0x0F + + %assign I 0 ; copy segs_digests into stack + %rep 5 + MOVPS A, [mh_digests_p + I*64 + 16*0] + MOVPS B, [mh_digests_p + I*64 + 16*1] + MOVPS C, [mh_digests_p + I*64 + 16*2] + MOVPS D, [mh_digests_p + I*64 + 16*3] + + movdqa [rsp + I*64 + 16*0], A + movdqa [rsp + I*64 + 16*1], B + movdqa [rsp + I*64 + 16*2], C + movdqa [rsp + I*64 + 16*3], D + %assign I (I+1) + %endrep + +.block_loop: + ;transform to big-endian data and store on aligned_frame + movdqa F, [PSHUFFLE_BYTE_FLIP_MASK] + ;transform input data from DWORD*16_SEGS*5 to DWORD*4_SEGS*5*4 + %assign I 0 + %rep 16 + MOVPS T0,[mh_in_p + I*64+0*16] + MOVPS T1,[mh_in_p + I*64+1*16] + MOVPS T2,[mh_in_p + I*64+2*16] + MOVPS T3,[mh_in_p + I*64+3*16] + + pshufb T0, F + movdqa [mh_data_p +(I)*16 +0*256],T0 + pshufb T1, F + movdqa [mh_data_p +(I)*16 +1*256],T1 + pshufb T2, F + movdqa [mh_data_p +(I)*16 +2*256],T2 + pshufb T3, F + movdqa [mh_data_p +(I)*16 +3*256],T3 + %assign I (I+1) + %endrep + + mov mh_segs, 0 ;start from the first 4 segments + mov pref, 1024 ;avoid prefetch repeadtedly + .segs_loop: + ;; Initialize digests + movdqa A, [rsp + 0*64 + mh_segs] + movdqa B, [rsp + 1*64 + mh_segs] + movdqa C, [rsp + 2*64 + mh_segs] + movdqa D, [rsp + 3*64 + mh_segs] + movdqa E, [rsp + 4*64 + mh_segs] + + movdqa AA, A + movdqa BB, B + movdqa CC, C + movdqa DD, D + movdqa EE, E +;; +;; perform 0-79 steps +;; + movdqa K, [K00_19] +;; do rounds 0...15 + %assign I 0 + %rep 16 + SHA1_STEP_00_15 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 16...19 + movdqa W16, [mh_data_p + ((16 - 16) & 15) * 16] + movdqa W15, [mh_data_p + ((16 - 15) & 15) * 16] + %rep 4 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F0, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*0] +;; do rounds 20...39 + movdqa K, [K20_39] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F1, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + +;; do rounds 40...59 + movdqa K, [K40_59] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F2, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + PREFETCH_X [mh_in_p + pref+128*1] +;; do rounds 60...79 + movdqa K, [K60_79] + %rep 20 + SHA1_STEP_16_79 A,B,C,D,E, TMP,FUN, I, K, MAGIC_F3, mh_data_p + ROTATE_ARGS + %assign I (I+1) + %endrep + + paddd A, AA + paddd B, BB + paddd C, CC + paddd D, DD + paddd E, EE + + ; write out digests + movdqa [rsp + 0*64 + mh_segs], A + movdqa [rsp + 1*64 + mh_segs], B + movdqa [rsp + 2*64 + mh_segs], C + movdqa [rsp + 3*64 + mh_segs], D + movdqa [rsp + 4*64 + mh_segs], E + + add pref, 256 + add mh_data_p, 256 + add mh_segs, 16 + cmp mh_segs, 64 + jc .segs_loop + + sub mh_data_p, (1024) + add mh_in_p, (1024) + sub loops, 1 + jne .block_loop + + + %assign I 0 ; copy segs_digests back to mh_digests_p + %rep 5 + movdqa A, [rsp + I*64 + 16*0] + movdqa B, [rsp + I*64 + 16*1] + movdqa C, [rsp + I*64 + 16*2] + movdqa D, [rsp + I*64 + 16*3] + + MOVPS [mh_digests_p + I*64 + 16*0], A + MOVPS [mh_digests_p + I*64 + 16*1], B + MOVPS [mh_digests_p + I*64 + 16*2], C + MOVPS [mh_digests_p + I*64 + 16*3], D + %assign I (I+1) + %endrep + mov rsp, RSP_SAVE ; restore rsp + +.return: + FUNC_RESTORE + ret + +endproc_frame + +section .data align=16 + +align 16 +PSHUFFLE_BYTE_FLIP_MASK: dq 0x0405060700010203, 0x0c0d0e0f08090a0b + +K00_19: dq 0x5A8279995A827999, 0x5A8279995A827999 +K20_39: dq 0x6ED9EBA16ED9EBA1, 0x6ED9EBA16ED9EBA1 +K40_59: dq 0x8F1BBCDC8F1BBCDC, 0x8F1BBCDC8F1BBCDC +K60_79: dq 0xCA62C1D6CA62C1D6, 0xCA62C1D6CA62C1D6 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c new file mode 100644 index 00000000..9eab755a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_finalize_base.c @@ -0,0 +1,122 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/* + * mh_sha1_finalize_base.c contains the prototypes of mh_sha1_finalize_XXX + * and mh_sha1_tail_XXX. Default definitions are base type which generates + * mh_sha1_finalize_base and mh_sha1_tail_base. Other types are generated + * through different predefined macros by mh_sha1.c. + * mh_sha1_tail is used to calculate the last incomplete block of input + * data. mh_sha1_finalize is the mh_sha1_ctx wrapper of mh_sha1_tail. + */ +#ifndef MH_SHA1_FINALIZE_FUNCTION +#include <string.h> +#include "mh_sha1_internal.h" + +#define MH_SHA1_FINALIZE_FUNCTION mh_sha1_finalize_base +#define MH_SHA1_TAIL_FUNCTION mh_sha1_tail_base +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base +#define MH_SHA1_FINALIZE_SLVER +#endif + +void MH_SHA1_TAIL_FUNCTION(uint8_t * partial_buffer, uint32_t total_len, + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer, + uint32_t digests[SHA1_DIGEST_WORDS]) +{ + uint64_t partial_buffer_len, len_in_bit; + + partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE; + + // Padding the first block + partial_buffer[partial_buffer_len] = 0x80; + partial_buffer_len++; + memset(partial_buffer + partial_buffer_len, 0, + MH_SHA1_BLOCK_SIZE - partial_buffer_len); + + // Calculate the first block without total_length if padding needs 2 block + if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) { + MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + //Padding the second block + memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + //Padding the block + len_in_bit = bswap64((uint64_t) total_len * 8); + *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit; + MH_SHA1_BLOCK_FUNCTION(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + + //Calculate multi-hash SHA1 digests (segment digests as input message) + sha1_for_mh_sha1((uint8_t *) mh_sha1_segs_digests, digests, + 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + return; +} + +int MH_SHA1_FINALIZE_FUNCTION(struct mh_sha1_ctx *ctx, void *mh_sha1_digest) +{ + uint8_t *partial_block_buffer; + uint64_t total_len; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + + if (ctx == NULL) + return MH_SHA1_CTX_ERROR_NULL; + + total_len = ctx->total_length; + partial_block_buffer = ctx->partial_block_buffer; + + /* mh_sha1 tail */ + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + + MH_SHA1_TAIL_FUNCTION(partial_block_buffer, total_len, mh_sha1_segs_digests, + aligned_frame_buffer, ctx->mh_sha1_digest); + + /* Output the digests of mh_sha1 */ + if (mh_sha1_digest != NULL) { + ((uint32_t *) mh_sha1_digest)[0] = ctx->mh_sha1_digest[0]; + ((uint32_t *) mh_sha1_digest)[1] = ctx->mh_sha1_digest[1]; + ((uint32_t *) mh_sha1_digest)[2] = ctx->mh_sha1_digest[2]; + ((uint32_t *) mh_sha1_digest)[3] = ctx->mh_sha1_digest[3]; + ((uint32_t *) mh_sha1_digest)[4] = ctx->mh_sha1_digest[4]; + } + + return MH_SHA1_CTX_ERROR_NONE; +} + +#ifdef MH_SHA1_FINALIZE_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + +// Version info +struct slver mh_sha1_finalize_base_slver_0000027b; +struct slver mh_sha1_finalize_base_slver = { 0x027b, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h new file mode 100644 index 00000000..e8f226cb --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_internal.h @@ -0,0 +1,323 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_INTERNAL_H_ +#define _MH_SHA1_INTERNAL_H_ + +/** + * @file mh_sha1_internal.h + * @brief mh_sha1 internal function prototypes and macros + * + * Interface for mh_sha1 internal functions + * + */ +#include <stdint.h> +#include "mh_sha1.h" + +#ifdef __cplusplus + extern "C" { +#endif + +#ifdef _MSC_VER +# define inline __inline +#endif + + // 64byte pointer align +#define ALIGN_64(pointer) ( ((uint64_t)(pointer) + 0x3F)&(~0x3F) ) + + /******************************************************************* + *mh_sha1 constants and macros + ******************************************************************/ + /* mh_sha1 constants */ +#define MH_SHA1_H0 0x67452301UL +#define MH_SHA1_H1 0xefcdab89UL +#define MH_SHA1_H2 0x98badcfeUL +#define MH_SHA1_H3 0x10325476UL +#define MH_SHA1_H4 0xc3d2e1f0UL + +#define K_00_19 0x5a827999UL +#define K_20_39 0x6ed9eba1UL +#define K_40_59 0x8f1bbcdcUL +#define K_60_79 0xca62c1d6UL + + /* mh_sha1 macros */ +#define F1(b,c,d) (d ^ (b & (c ^ d))) +#define F2(b,c,d) (b ^ c ^ d) +#define F3(b,c,d) ((b & c) | (d & (b | c))) +#define F4(b,c,d) (b ^ c ^ d) + +#define rol32(x, r) (((x)<<(r)) ^ ((x)>>(32-(r)))) + +#define bswap(x) (((x)<<24) | (((x)&0xff00)<<8) | (((x)&0xff0000)>>8) | ((x)>>24)) +#define bswap64(x) (((x)<<56) | (((x)&0xff00)<<40) | (((x)&0xff0000)<<24) | \ + (((x)&0xff000000)<<8) | (((x)&0xff00000000ull)>>8) | \ + (((x)&0xff0000000000ull)<<24) | \ + (((x)&0xff000000000000ull)<<40) | \ + (((x)&0xff00000000000000ull)<<56)) + + /******************************************************************* + * SHA1 API internal function prototypes + ******************************************************************/ + + /** + * @brief Performs complete SHA1 algorithm. + * + * @param input Pointer to buffer containing the input message. + * @param digest Pointer to digest to update. + * @param len Length of buffer. + * @returns None + */ + void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len); + + /** + * @brief Calculate sha1 digest of blocks which size is SHA1_BLOCK_SIZE + * + * @param data Pointer to data buffer containing the input message. + * @param digest Pointer to sha1 digest. + * @returns None + */ + void sha1_single_for_mh_sha1(const uint8_t * data, uint32_t digest[]); + + /******************************************************************* + * mh_sha1 API internal function prototypes + * Multiple versions of Update and Finalize functions are supplied which use + * multiple versions of block and tail process subfunctions. + ******************************************************************/ + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @returns none + * + */ + void mh_sha1_tail(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_base(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires SSE + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_sse(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires AVX + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_avx(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires AVX2 + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_avx2(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Tail process for multi-hash sha1. + * + * Calculate the remainder of input data which is less than MH_SHA1_BLOCK_SIZE. + * It will output the final SHA1 digest based on mh_sha1_segs_digests. + * + * @requires AVX512 + * + * @param partial_buffer Pointer to the start addr of remainder + * @param total_len The total length of all sections of input data. + * @param mh_sha1_segs_digests The digests of all 16 segments . + * @param frame_buffer Pointer to buffer which is a temp working area + * @param mh_sha1_digest mh_sha1 digest + * @returns none + * + */ + void mh_sha1_tail_avx512(uint8_t *partial_buffer, uint32_t total_len, + uint32_t (*mh_sha1_segs_digests)[HASH_SEGS], + uint8_t *frame_buffer, uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_base(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires SSE + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_sse(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires AVX + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_avx(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires AVX2 + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_avx2(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + + /** + * @brief Calculate mh_sha1 digest of blocks which size is MH_SHA1_BLOCK_SIZE*N. + * + * @requires AVX512 + * + * @param input_data Pointer to input data to be processed + * @param digests 16 segments digests + * @param frame_buffer Pointer to buffer which is a temp working area + * @param num_blocks The number of blocks. + * @returns none + * + */ + void mh_sha1_block_avx512(const uint8_t * input_data, uint32_t digests[SHA1_DIGEST_WORDS][HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm new file mode 100644 index 00000000..83c39a31 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_multibinary.asm @@ -0,0 +1,83 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +%ifidn __OUTPUT_FORMAT__, elf64 +%define WRT_OPT wrt ..plt +%else +%define WRT_OPT +%endif + +%include "reg_sizes.asm" +%include "multibinary.asm" + +%ifidn __OUTPUT_FORMAT__, elf32 + [bits 32] +%else + default rel + [bits 64] + + extern mh_sha1_update_sse + extern mh_sha1_update_avx + extern mh_sha1_update_avx2 + extern mh_sha1_finalize_sse + extern mh_sha1_finalize_avx + extern mh_sha1_finalize_avx2 + + %ifdef HAVE_AS_KNOWS_AVX512 + extern mh_sha1_update_avx512 + extern mh_sha1_finalize_avx512 + %endif + +%endif + +extern mh_sha1_update_base +extern mh_sha1_finalize_base + +mbin_interface mh_sha1_update +mbin_interface mh_sha1_finalize + +%ifidn __OUTPUT_FORMAT__, elf64 + + %ifdef HAVE_AS_KNOWS_AVX512 + mbin_dispatch_init6 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2, mh_sha1_update_avx512 + mbin_dispatch_init6 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2, mh_sha1_finalize_avx512 + %else + mbin_dispatch_init5 mh_sha1_update, mh_sha1_update_base, mh_sha1_update_sse, mh_sha1_update_avx, mh_sha1_update_avx2 + mbin_dispatch_init5 mh_sha1_finalize, mh_sha1_finalize_base, mh_sha1_finalize_sse, mh_sha1_finalize_avx, mh_sha1_finalize_avx2 + %endif + +%else + mbin_dispatch_init2 mh_sha1_update, mh_sha1_update_base + mbin_dispatch_init2 mh_sha1_finalize, mh_sha1_finalize_base +%endif + +;;; func core, ver, snum +slversion mh_sha1_update, 00, 02, 0272 +slversion mh_sha1_finalize, 00, 02, 0273 diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c new file mode 100644 index 00000000..72b2a523 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_perf.c @@ -0,0 +1,180 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1.h" +#include "test.h" + +//#define CACHED_TEST +#ifdef CACHED_TEST +// Loop many times over same +# define TEST_LEN 16*1024 +# define TEST_LOOPS 20000 +# define TEST_TYPE_STR "_warm" +#else +// Uncached test. Pull from large mem base. +# define TEST_LEN 32*1024*1024 +# define TEST_LOOPS 100 +# define TEST_TYPE_STR "_cold" +#endif + +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif +#define TEST_MEM TEST_LEN + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_CTX_ERROR_NONE){ \ + printf("The mh_sha1 function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + + return mh_sha1_fail; +} + +int main(int argc, char *argv[]) +{ + int i, fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_base[SHA1_DIGEST_WORDS]; + uint8_t *buff = NULL; + struct mh_sha1_ctx *update_ctx_test = NULL, *update_ctx_base = NULL; + struct perf start, stop; + + printf(xstr(TEST_UPDATE_FUNCTION) "_perf:\n"); + + buff = malloc(TEST_LEN); + update_ctx_test = malloc(sizeof(*update_ctx_test)); + update_ctx_base = malloc(sizeof(*update_ctx_base)); + + if (buff == NULL || update_ctx_base == NULL || update_ctx_test == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + // mh_sha1 base version + mh_sha1_init(update_ctx_base); + mh_sha1_update_base(update_ctx_base, buff, TEST_LEN); + mh_sha1_finalize_base(update_ctx_base, hash_base); + + perf_start(&start); + for (i = 0; i < TEST_LOOPS / 10; i++) { + mh_sha1_init(update_ctx_base); + mh_sha1_update_base(update_ctx_base, buff, TEST_LEN); + mh_sha1_finalize_base(update_ctx_base, hash_base); + } + perf_stop(&stop); + printf("mh_sha1_update_base" TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + //Update feature test + CHECK_RETURN(mh_sha1_init(update_ctx_test)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test)); + + perf_start(&start); + for (i = 0; i < TEST_LOOPS; i++) { + CHECK_RETURN(mh_sha1_init(update_ctx_test)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx_test, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx_test, hash_test)); + } + perf_stop(&stop); + printf(xstr(TEST_UPDATE_FUNCTION) TEST_TYPE_STR ": "); + perf_print(stop, start, (long long)TEST_MEM * i); + + // Check results + fail = compare_digests(hash_base, hash_test); + + if (fail) { + printf("Fail size=%d\n", TEST_LEN); + return -1; + } + + if (fail) + printf("Test failed function test%d\n", fail); + else + printf("Pass func check\n"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c new file mode 100644 index 00000000..fee928a9 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_ref.c @@ -0,0 +1,438 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <string.h> +#include "mh_sha1_internal.h" + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + // Macros and sub-functions which already exist in source code file + // (sha1_for_mh_sha1.c) is part of ISA-L library as internal functions. + // The reason why writing them twice is the linking issue caused by + // mh_sha1_ref(). mh_sha1_ref() needs these macros and sub-functions + // without linking ISA-L library. So mh_sha1_ref() includes them in + // order to contain essential sub-functions in its own object file. +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +#define W(x) w[(x) & 15] + +#define step00_19(i,a,b,c,d,e) \ + if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + else W(i) = bswap(ww[i]); \ + e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \ + b = rol32(b,30) + +#define step20_39(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \ + b = rol32(b,30) + +#define step40_59(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \ + b = rol32(b,30) + +#define step60_79(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \ + b = rol32(b,30) + +void sha1_single_for_mh_sha1_ref(const uint8_t * data, uint32_t digest[]) +{ + uint32_t a, b, c, d, e; + uint32_t w[16] = { 0 }; + uint32_t *ww = (uint32_t *) data; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + + step00_19(0, a, b, c, d, e); + step00_19(1, e, a, b, c, d); + step00_19(2, d, e, a, b, c); + step00_19(3, c, d, e, a, b); + step00_19(4, b, c, d, e, a); + step00_19(5, a, b, c, d, e); + step00_19(6, e, a, b, c, d); + step00_19(7, d, e, a, b, c); + step00_19(8, c, d, e, a, b); + step00_19(9, b, c, d, e, a); + step00_19(10, a, b, c, d, e); + step00_19(11, e, a, b, c, d); + step00_19(12, d, e, a, b, c); + step00_19(13, c, d, e, a, b); + step00_19(14, b, c, d, e, a); + step00_19(15, a, b, c, d, e); + step00_19(16, e, a, b, c, d); + step00_19(17, d, e, a, b, c); + step00_19(18, c, d, e, a, b); + step00_19(19, b, c, d, e, a); + + step20_39(20, a, b, c, d, e); + step20_39(21, e, a, b, c, d); + step20_39(22, d, e, a, b, c); + step20_39(23, c, d, e, a, b); + step20_39(24, b, c, d, e, a); + step20_39(25, a, b, c, d, e); + step20_39(26, e, a, b, c, d); + step20_39(27, d, e, a, b, c); + step20_39(28, c, d, e, a, b); + step20_39(29, b, c, d, e, a); + step20_39(30, a, b, c, d, e); + step20_39(31, e, a, b, c, d); + step20_39(32, d, e, a, b, c); + step20_39(33, c, d, e, a, b); + step20_39(34, b, c, d, e, a); + step20_39(35, a, b, c, d, e); + step20_39(36, e, a, b, c, d); + step20_39(37, d, e, a, b, c); + step20_39(38, c, d, e, a, b); + step20_39(39, b, c, d, e, a); + + step40_59(40, a, b, c, d, e); + step40_59(41, e, a, b, c, d); + step40_59(42, d, e, a, b, c); + step40_59(43, c, d, e, a, b); + step40_59(44, b, c, d, e, a); + step40_59(45, a, b, c, d, e); + step40_59(46, e, a, b, c, d); + step40_59(47, d, e, a, b, c); + step40_59(48, c, d, e, a, b); + step40_59(49, b, c, d, e, a); + step40_59(50, a, b, c, d, e); + step40_59(51, e, a, b, c, d); + step40_59(52, d, e, a, b, c); + step40_59(53, c, d, e, a, b); + step40_59(54, b, c, d, e, a); + step40_59(55, a, b, c, d, e); + step40_59(56, e, a, b, c, d); + step40_59(57, d, e, a, b, c); + step40_59(58, c, d, e, a, b); + step40_59(59, b, c, d, e, a); + + step60_79(60, a, b, c, d, e); + step60_79(61, e, a, b, c, d); + step60_79(62, d, e, a, b, c); + step60_79(63, c, d, e, a, b); + step60_79(64, b, c, d, e, a); + step60_79(65, a, b, c, d, e); + step60_79(66, e, a, b, c, d); + step60_79(67, d, e, a, b, c); + step60_79(68, c, d, e, a, b); + step60_79(69, b, c, d, e, a); + step60_79(70, a, b, c, d, e); + step60_79(71, e, a, b, c, d); + step60_79(72, d, e, a, b, c); + step60_79(73, c, d, e, a, b); + step60_79(74, b, c, d, e, a); + step60_79(75, a, b, c, d, e); + step60_79(76, e, a, b, c, d); + step60_79(77, d, e, a, b, c); + step60_79(78, c, d, e, a, b); + step60_79(79, b, c, d, e, a); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +void sha1_for_mh_sha1_ref(const uint8_t * input_data, uint32_t * digest, const uint32_t len) +{ + uint32_t i, j; + uint8_t buf[2 * SHA1_BLOCK_SIZE]; + union { + uint64_t uint; + uint8_t uchar[8]; + } convert; + uint8_t *p; + + digest[0] = MH_SHA1_H0; + digest[1] = MH_SHA1_H1; + digest[2] = MH_SHA1_H2; + digest[3] = MH_SHA1_H3; + digest[4] = MH_SHA1_H4; + + i = len; + while (i >= SHA1_BLOCK_SIZE) { + sha1_single_for_mh_sha1_ref(input_data, digest); + input_data += SHA1_BLOCK_SIZE; + i -= SHA1_BLOCK_SIZE; + } + + memcpy(buf, input_data, i); + buf[i++] = 0x80; + for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++) + buf[j] = 0; + + if (i > SHA1_BLOCK_SIZE - 8) + i = 2 * SHA1_BLOCK_SIZE; + else + i = SHA1_BLOCK_SIZE; + + convert.uint = 8 * len; + p = buf + i - 8; + p[0] = convert.uchar[7]; + p[1] = convert.uchar[6]; + p[2] = convert.uchar[5]; + p[3] = convert.uchar[4]; + p[4] = convert.uchar[3]; + p[5] = convert.uchar[2]; + p[6] = convert.uchar[1]; + p[7] = convert.uchar[0]; + + sha1_single_for_mh_sha1_ref(buf, digest); + if (i == (2 * SHA1_BLOCK_SIZE)) + sha1_single_for_mh_sha1_ref(buf + SHA1_BLOCK_SIZE, digest); +} + +/* + * buffer to rearrange one segment data from one block. + * + * Layout of new_data: + * segment + * ------------------------- + * w0 | w1 | ... | w15 + * + */ +static inline void transform_input_single(uint32_t * new_data, uint32_t * input, + uint32_t segment) +{ + new_data[16 * segment + 0] = input[16 * 0 + segment]; + new_data[16 * segment + 1] = input[16 * 1 + segment]; + new_data[16 * segment + 2] = input[16 * 2 + segment]; + new_data[16 * segment + 3] = input[16 * 3 + segment]; + new_data[16 * segment + 4] = input[16 * 4 + segment]; + new_data[16 * segment + 5] = input[16 * 5 + segment]; + new_data[16 * segment + 6] = input[16 * 6 + segment]; + new_data[16 * segment + 7] = input[16 * 7 + segment]; + new_data[16 * segment + 8] = input[16 * 8 + segment]; + new_data[16 * segment + 9] = input[16 * 9 + segment]; + new_data[16 * segment + 10] = input[16 * 10 + segment]; + new_data[16 * segment + 11] = input[16 * 11 + segment]; + new_data[16 * segment + 12] = input[16 * 12 + segment]; + new_data[16 * segment + 13] = input[16 * 13 + segment]; + new_data[16 * segment + 14] = input[16 * 14 + segment]; + new_data[16 * segment + 15] = input[16 * 15 + segment]; +} + +// Adapt parameters to sha1_single_for_mh_sha1_ref +#define sha1_update_one_seg(data, digest) \ + sha1_single_for_mh_sha1_ref((const uint8_t *)(data), (uint32_t *)(digest)) + +/* + * buffer to Rearrange all segments data from one block. + * + * Layout of new_data: + * segment + * ------------------------- + * seg0: | w0 | w1 | ... | w15 + * seg1: | w0 | w1 | ... | w15 + * seg2: | w0 | w1 | ... | w15 + * .... + * seg15: | w0 | w1 | ... | w15 + * + */ +static inline void transform_input(uint32_t * new_data, uint32_t * input, uint32_t block) +{ + uint32_t *current_input = input + block * MH_SHA1_BLOCK_SIZE / 4; + + transform_input_single(new_data, current_input, 0); + transform_input_single(new_data, current_input, 1); + transform_input_single(new_data, current_input, 2); + transform_input_single(new_data, current_input, 3); + transform_input_single(new_data, current_input, 4); + transform_input_single(new_data, current_input, 5); + transform_input_single(new_data, current_input, 6); + transform_input_single(new_data, current_input, 7); + transform_input_single(new_data, current_input, 8); + transform_input_single(new_data, current_input, 9); + transform_input_single(new_data, current_input, 10); + transform_input_single(new_data, current_input, 11); + transform_input_single(new_data, current_input, 12); + transform_input_single(new_data, current_input, 13); + transform_input_single(new_data, current_input, 14); + transform_input_single(new_data, current_input, 15); + +} + +/* + * buffer to Calculate all segments' digests from one block. + * + * Layout of seg_digest: + * segment + * ------------------------- + * seg0: | H0 | H1 | ... | H4 + * seg1: | H0 | H1 | ... | H4 + * seg2: | H0 | H1 | ... | H4 + * .... + * seg15: | H0 | H1 | ... | H4 + * + */ +static inline void sha1_update_all_segs(uint32_t * new_data, + uint32_t(*mh_sha1_seg_digests)[SHA1_DIGEST_WORDS]) +{ + sha1_update_one_seg(&(new_data)[16 * 0], mh_sha1_seg_digests[0]); + sha1_update_one_seg(&(new_data)[16 * 1], mh_sha1_seg_digests[1]); + sha1_update_one_seg(&(new_data)[16 * 2], mh_sha1_seg_digests[2]); + sha1_update_one_seg(&(new_data)[16 * 3], mh_sha1_seg_digests[3]); + sha1_update_one_seg(&(new_data)[16 * 4], mh_sha1_seg_digests[4]); + sha1_update_one_seg(&(new_data)[16 * 5], mh_sha1_seg_digests[5]); + sha1_update_one_seg(&(new_data)[16 * 6], mh_sha1_seg_digests[6]); + sha1_update_one_seg(&(new_data)[16 * 7], mh_sha1_seg_digests[7]); + sha1_update_one_seg(&(new_data)[16 * 8], mh_sha1_seg_digests[8]); + sha1_update_one_seg(&(new_data)[16 * 9], mh_sha1_seg_digests[9]); + sha1_update_one_seg(&(new_data)[16 * 10], mh_sha1_seg_digests[10]); + sha1_update_one_seg(&(new_data)[16 * 11], mh_sha1_seg_digests[11]); + sha1_update_one_seg(&(new_data)[16 * 12], mh_sha1_seg_digests[12]); + sha1_update_one_seg(&(new_data)[16 * 13], mh_sha1_seg_digests[13]); + sha1_update_one_seg(&(new_data)[16 * 14], mh_sha1_seg_digests[14]); + sha1_update_one_seg(&(new_data)[16 * 15], mh_sha1_seg_digests[15]); +} + +void mh_sha1_block_ref(const uint8_t * input_data, uint32_t(*digests)[HASH_SEGS], + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE], uint32_t num_blocks) +{ + uint32_t i, j; + uint32_t *temp_buffer = (uint32_t *) frame_buffer; + uint32_t(*trans_digests)[SHA1_DIGEST_WORDS]; + + trans_digests = (uint32_t(*)[SHA1_DIGEST_WORDS]) digests; + + // Re-structure seg_digests from 5*16 to 16*5 + for (j = 0; j < HASH_SEGS; j++) { + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + temp_buffer[j * SHA1_DIGEST_WORDS + i] = digests[i][j]; + } + } + memcpy(trans_digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + // Calculate digests for all segments, leveraging sha1 API + for (i = 0; i < num_blocks; i++) { + transform_input(temp_buffer, (uint32_t *) input_data, i); + sha1_update_all_segs(temp_buffer, trans_digests); + } + + // Re-structure seg_digests from 16*5 to 5*16 + for (j = 0; j < HASH_SEGS; j++) { + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + temp_buffer[i * HASH_SEGS + j] = trans_digests[j][i]; + } + } + memcpy(digests, temp_buffer, 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + return; +} + +void mh_sha1_tail_ref(uint8_t * partial_buffer, uint32_t total_len, + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS], uint8_t * frame_buffer, + uint32_t digests[SHA1_DIGEST_WORDS]) +{ + uint64_t partial_buffer_len, len_in_bit; + + partial_buffer_len = total_len % MH_SHA1_BLOCK_SIZE; + + // Padding the first block + partial_buffer[partial_buffer_len] = 0x80; + partial_buffer_len++; + memset(partial_buffer + partial_buffer_len, 0, + MH_SHA1_BLOCK_SIZE - partial_buffer_len); + + // Calculate the first block without total_length if padding needs 2 block + if (partial_buffer_len > (MH_SHA1_BLOCK_SIZE - 8)) { + mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + //Padding the second block + memset(partial_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + //Padding the block + len_in_bit = bswap64((uint64_t) total_len * 8); + *(uint64_t *) (partial_buffer + MH_SHA1_BLOCK_SIZE - 8) = len_in_bit; + mh_sha1_block_ref(partial_buffer, mh_sha1_segs_digests, frame_buffer, 1); + + //Calculate multi-hash SHA1 digests (segment digests as input message) + sha1_for_mh_sha1_ref((uint8_t *) mh_sha1_segs_digests, digests, + 4 * SHA1_DIGEST_WORDS * HASH_SEGS); + + return; +} + +void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest) +{ + uint64_t total_len; + uint64_t num_blocks; + uint32_t mh_sha1_segs_digests[SHA1_DIGEST_WORDS][HASH_SEGS]; + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE]; + uint8_t partial_block_buffer[MH_SHA1_BLOCK_SIZE * 2]; + uint32_t mh_sha1_hash_dword[SHA1_DIGEST_WORDS]; + uint32_t i; + const uint8_t *input_data = (const uint8_t *)buffer; + + /* Initialize digests of all segments */ + for (i = 0; i < HASH_SEGS; i++) { + mh_sha1_segs_digests[0][i] = MH_SHA1_H0; + mh_sha1_segs_digests[1][i] = MH_SHA1_H1; + mh_sha1_segs_digests[2][i] = MH_SHA1_H2; + mh_sha1_segs_digests[3][i] = MH_SHA1_H3; + mh_sha1_segs_digests[4][i] = MH_SHA1_H4; + } + + total_len = len; + + // Calculate blocks + num_blocks = len / MH_SHA1_BLOCK_SIZE; + if (num_blocks > 0) { + //do num_blocks process + mh_sha1_block_ref(input_data, mh_sha1_segs_digests, frame_buffer, num_blocks); + len -= num_blocks * MH_SHA1_BLOCK_SIZE; + input_data += num_blocks * MH_SHA1_BLOCK_SIZE; + } + // Store the partial block + if (len != 0) { + memcpy(partial_block_buffer, input_data, len); + } + + /* Finalize */ + mh_sha1_tail_ref(partial_block_buffer, total_len, mh_sha1_segs_digests, + frame_buffer, mh_sha1_hash_dword); + + // Output the digests of mh_sha1 + if (mh_sha1_digest != NULL) { + mh_sha1_digest[0] = mh_sha1_hash_dword[0]; + mh_sha1_digest[1] = mh_sha1_hash_dword[1]; + mh_sha1_digest[2] = mh_sha1_hash_dword[2]; + mh_sha1_digest[3] = mh_sha1_hash_dword[3]; + mh_sha1_digest[4] = mh_sha1_hash_dword[4]; + } + + return; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c new file mode 100644 index 00000000..0279cd70 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_test.c @@ -0,0 +1,217 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_CTX_ERROR_NONE){ \ + printf("The mh_sha1 function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); +#define MH_SHA1_REF mh_sha1_ref + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + + return mh_sha1_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS]; + uint8_t *buff = NULL; + int size, offset; + struct mh_sha1_ctx *update_ctx = NULL; + + printf(xstr(TEST_UPDATE_FUNCTION) "_test:\n"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + MH_SHA1_REF(buff, TEST_LEN, hash_ref); + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages + for (size = TEST_LEN; size >= 0; size--) { + + // Fill with rand data + rand_buffer(buff, size); + + MH_SHA1_REF(buff, size, hash_ref); + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + if ((size & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various buffer offsets and sizes + printf("offset tests"); + for (size = TEST_LEN - 256; size > 256; size -= 11) { + for (offset = 0; offset < 256; offset++) { + MH_SHA1_REF(buff + offset, size, hash_ref); + + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + } + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + // Run efence tests + printf("efence tests"); + for (size = TEST_SIZE; size > 0; size--) { + offset = TEST_LEN - size; + + MH_SHA1_REF(buff + offset, size, hash_ref); + + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size=%d\n", size); + return -1; + } + + if ((size & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf(xstr(TEST_UPDATE_FUNCTION) "_test:"); + printf(" %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c new file mode 100644 index 00000000..8879b287 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_base.c @@ -0,0 +1,110 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/* + * mh_sha1_update_base.c contains the prototype of mh_sha1_update_XXX. + * Default definitions are base type which generates mh_sha1_update_base. + * Other types are generated through different predefined macros by mh_sha1.c. + */ +#ifndef MH_SHA1_UPDATE_FUNCTION +#include "mh_sha1_internal.h" +#include <string.h> + +#define MH_SHA1_UPDATE_FUNCTION mh_sha1_update_base +#define MH_SHA1_BLOCK_FUNCTION mh_sha1_block_base +#define MH_SHA1_UPDATE_SLVER +#endif + +int MH_SHA1_UPDATE_FUNCTION(struct mh_sha1_ctx *ctx, const void *buffer, uint32_t len) +{ + + uint8_t *partial_block_buffer; + uint64_t partial_block_len; + uint64_t num_blocks; + uint32_t(*mh_sha1_segs_digests)[HASH_SEGS]; + uint8_t *aligned_frame_buffer; + const uint8_t *input_data = (const uint8_t *)buffer; + + if (ctx == NULL) + return MH_SHA1_CTX_ERROR_NULL; + + if (len == 0) + return MH_SHA1_CTX_ERROR_NONE; + + partial_block_len = ctx->total_length % MH_SHA1_BLOCK_SIZE; + partial_block_buffer = ctx->partial_block_buffer; + aligned_frame_buffer = (uint8_t *) ALIGN_64(ctx->frame_buffer); + mh_sha1_segs_digests = (uint32_t(*)[HASH_SEGS]) ctx->mh_sha1_interim_digests; + + ctx->total_length += len; + // No enough input data for mh_sha1 calculation + if (len + partial_block_len < MH_SHA1_BLOCK_SIZE) { + memcpy(partial_block_buffer + partial_block_len, input_data, len); + return MH_SHA1_CTX_ERROR_NONE; + } + // mh_sha1 calculation for the previous partial block + if (partial_block_len != 0) { + memcpy(partial_block_buffer + partial_block_len, input_data, + MH_SHA1_BLOCK_SIZE - partial_block_len); + //do one_block process + MH_SHA1_BLOCK_FUNCTION(partial_block_buffer, mh_sha1_segs_digests, + aligned_frame_buffer, 1); + input_data += MH_SHA1_BLOCK_SIZE - partial_block_len; + len -= MH_SHA1_BLOCK_SIZE - partial_block_len; + memset(partial_block_buffer, 0, MH_SHA1_BLOCK_SIZE); + } + // Calculate mh_sha1 for the current blocks + num_blocks = len / MH_SHA1_BLOCK_SIZE; + if (num_blocks > 0) { + //do num_blocks process + MH_SHA1_BLOCK_FUNCTION(input_data, mh_sha1_segs_digests, aligned_frame_buffer, + num_blocks); + len -= num_blocks * MH_SHA1_BLOCK_SIZE; + input_data += num_blocks * MH_SHA1_BLOCK_SIZE; + } + // Store the partial block + if (len != 0) { + memcpy(partial_block_buffer, input_data, len); + } + + return MH_SHA1_CTX_ERROR_NONE; + +} + +#ifdef MH_SHA1_UPDATE_SLVER +struct slver { + uint16_t snum; + uint8_t ver; + uint8_t core; +}; + + // Version info +struct slver mh_sha1_update_base_slver_0000027a; +struct slver mh_sha1_update_base_slver = { 0x027a, 0x00, 0x00 }; +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c new file mode 100644 index 00000000..2b1b7917 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/mh_sha1_update_test.c @@ -0,0 +1,240 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include "mh_sha1.h" + +#define TEST_LEN 16*1024 +#define TEST_SIZE 8*1024 +#define TEST_MEM TEST_LEN +#ifndef TEST_SEED +# define TEST_SEED 0x1234 +#endif + +#define str(s) #s +#define xstr(s) str(s) + +#define _FUNC_TOKEN(func, type) func##type +#define FUNC_TOKEN(func, type) _FUNC_TOKEN(func, type) + +#ifndef MH_SHA1_FUNC_TYPE +#define MH_SHA1_FUNC_TYPE +#endif + +#define TEST_UPDATE_FUNCTION FUNC_TOKEN(mh_sha1_update, MH_SHA1_FUNC_TYPE) +#define TEST_FINAL_FUNCTION FUNC_TOKEN(mh_sha1_finalize, MH_SHA1_FUNC_TYPE) + +#define CHECK_RETURN(state) do{ \ + if((state) != MH_SHA1_CTX_ERROR_NONE){ \ + printf("The mh_sha1 function is failed.\n"); \ + return 1; \ + } \ + }while(0) + +extern void mh_sha1_ref(const void *buffer, uint32_t len, uint32_t * mh_sha1_digest); + +// Generates pseudo-random data +void rand_buffer(uint8_t * buf, long buffer_size) +{ + long i; + for (i = 0; i < buffer_size; i++) + buf[i] = rand(); +} + +void dump(char *buf, int len) +{ + int i; + for (i = 0; i < len;) { + printf(" %2x", 0xff & buf[i++]); + if (i % 20 == 0) + printf("\n"); + } + if (i % 20 != 0) + printf("\n"); +} + +int compare_digests(uint32_t hash_base[SHA1_DIGEST_WORDS], + uint32_t hash_test[SHA1_DIGEST_WORDS]) +{ + int i; + int mh_sha1_fail = 0; + + for (i = 0; i < SHA1_DIGEST_WORDS; i++) { + if (hash_test[i] != hash_base[i]) + mh_sha1_fail++; + } + + if (mh_sha1_fail) { + printf("mh_sha1 fail test\n"); + printf("base: "); + dump((char *)hash_base, 20); + printf("ref: "); + dump((char *)hash_test, 20); + } + + return mh_sha1_fail; +} + +int main(int argc, char *argv[]) +{ + int fail = 0, i; + uint32_t hash_test[SHA1_DIGEST_WORDS], hash_ref[SHA1_DIGEST_WORDS]; + uint8_t *buff = NULL; + int update_count; + int size1, size2, offset, addr_offset; + struct mh_sha1_ctx *update_ctx = NULL; + uint8_t *mem_addr = NULL; + + printf(xstr(TEST_UPDATE_FUNCTION) "_test:"); + + srand(TEST_SEED); + + buff = malloc(TEST_LEN); + update_ctx = malloc(sizeof(*update_ctx)); + + if (buff == NULL || update_ctx == NULL) { + printf("malloc failed test aborted\n"); + return -1; + } + // Rand test1 + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("fail rand1 test\n"); + return -1; + } else + putchar('.'); + + // Test various size messages by update twice. + printf("\n various size messages by update twice tests"); + for (size1 = TEST_LEN; size1 >= 0; size1--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + // subsequent update + size2 = TEST_LEN - size1; // size2 is different with the former + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, size1)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + size1, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // Test various update count + printf("\n various update count tests"); + for (update_count = 1; update_count <= TEST_LEN; update_count++) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + // subsequent update + size1 = TEST_LEN / update_count; + size2 = TEST_LEN - size1 * (update_count - 1); // size2 is different with the former + + CHECK_RETURN(mh_sha1_init(update_ctx)); + for (i = 1, offset = 0; i < update_count; i++) { + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size1)); + offset += size1; + } + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff + offset, size2)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail size1=%d\n", size1); + return -1; + } + + if ((size2 & 0xff) == 0) { + putchar('.'); + fflush(0); + } + } + + // test various start address of ctx. + printf("\n various start address of ctx test"); + free(update_ctx); + mem_addr = (uint8_t *) malloc(sizeof(*update_ctx) + AVX512_ALIGNED * 10); + for (addr_offset = AVX512_ALIGNED * 10; addr_offset >= 0; addr_offset--) { + + // Fill with rand data + rand_buffer(buff, TEST_LEN); + + mh_sha1_ref(buff, TEST_LEN, hash_ref); + + // a unaligned offset + update_ctx = (struct mh_sha1_ctx *)(mem_addr + addr_offset); + CHECK_RETURN(mh_sha1_init(update_ctx)); + CHECK_RETURN(TEST_UPDATE_FUNCTION(update_ctx, buff, TEST_LEN)); + CHECK_RETURN(TEST_FINAL_FUNCTION(update_ctx, hash_test)); + + fail = compare_digests(hash_ref, hash_test); + + if (fail) { + printf("Fail addr_offset=%d\n", addr_offset); + return -1; + } + + if ((addr_offset & 0xf) == 0) { + putchar('.'); + fflush(0); + } + } + + printf("\n" xstr(TEST_UPDATE_FUNCTION) "_test: %s\n", fail == 0 ? "Pass" : "Fail"); + + return fail; + +} diff --git a/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c new file mode 100644 index 00000000..697155b5 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/mh_sha1/sha1_for_mh_sha1.c @@ -0,0 +1,212 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#include "mh_sha1_internal.h" +#include <string.h> + +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// +// Reference SHA1 Functions for mh_sha1 +//////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////// + +#define W(x) w[(x) & 15] + +#define step00_19(i,a,b,c,d,e) \ + if (i>15) W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + else W(i) = bswap(ww[i]); \ + e += rol32(a,5) + F1(b,c,d) + 0x5A827999 + W(i); \ + b = rol32(b,30) + +#define step20_39(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F2(b,c,d) + 0x6ED9EBA1 + W(i); \ + b = rol32(b,30) + +#define step40_59(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F3(b,c,d) + 0x8F1BBCDC + W(i); \ + b = rol32(b,30) + +#define step60_79(i,a,b,c,d,e) \ + W(i) = rol32(W(i-3)^W(i-8)^W(i-14)^W(i-16), 1); \ + e += rol32(a,5) + F4(b,c,d) + 0xCA62C1D6 + W(i); \ + b = rol32(b,30) + +void sha1_single_for_mh_sha1(const uint8_t * data, uint32_t digest[]) +{ + uint32_t a, b, c, d, e; + uint32_t w[16] = { 0 }; + uint32_t *ww = (uint32_t *) data; + + a = digest[0]; + b = digest[1]; + c = digest[2]; + d = digest[3]; + e = digest[4]; + + step00_19(0, a, b, c, d, e); + step00_19(1, e, a, b, c, d); + step00_19(2, d, e, a, b, c); + step00_19(3, c, d, e, a, b); + step00_19(4, b, c, d, e, a); + step00_19(5, a, b, c, d, e); + step00_19(6, e, a, b, c, d); + step00_19(7, d, e, a, b, c); + step00_19(8, c, d, e, a, b); + step00_19(9, b, c, d, e, a); + step00_19(10, a, b, c, d, e); + step00_19(11, e, a, b, c, d); + step00_19(12, d, e, a, b, c); + step00_19(13, c, d, e, a, b); + step00_19(14, b, c, d, e, a); + step00_19(15, a, b, c, d, e); + step00_19(16, e, a, b, c, d); + step00_19(17, d, e, a, b, c); + step00_19(18, c, d, e, a, b); + step00_19(19, b, c, d, e, a); + + step20_39(20, a, b, c, d, e); + step20_39(21, e, a, b, c, d); + step20_39(22, d, e, a, b, c); + step20_39(23, c, d, e, a, b); + step20_39(24, b, c, d, e, a); + step20_39(25, a, b, c, d, e); + step20_39(26, e, a, b, c, d); + step20_39(27, d, e, a, b, c); + step20_39(28, c, d, e, a, b); + step20_39(29, b, c, d, e, a); + step20_39(30, a, b, c, d, e); + step20_39(31, e, a, b, c, d); + step20_39(32, d, e, a, b, c); + step20_39(33, c, d, e, a, b); + step20_39(34, b, c, d, e, a); + step20_39(35, a, b, c, d, e); + step20_39(36, e, a, b, c, d); + step20_39(37, d, e, a, b, c); + step20_39(38, c, d, e, a, b); + step20_39(39, b, c, d, e, a); + + step40_59(40, a, b, c, d, e); + step40_59(41, e, a, b, c, d); + step40_59(42, d, e, a, b, c); + step40_59(43, c, d, e, a, b); + step40_59(44, b, c, d, e, a); + step40_59(45, a, b, c, d, e); + step40_59(46, e, a, b, c, d); + step40_59(47, d, e, a, b, c); + step40_59(48, c, d, e, a, b); + step40_59(49, b, c, d, e, a); + step40_59(50, a, b, c, d, e); + step40_59(51, e, a, b, c, d); + step40_59(52, d, e, a, b, c); + step40_59(53, c, d, e, a, b); + step40_59(54, b, c, d, e, a); + step40_59(55, a, b, c, d, e); + step40_59(56, e, a, b, c, d); + step40_59(57, d, e, a, b, c); + step40_59(58, c, d, e, a, b); + step40_59(59, b, c, d, e, a); + + step60_79(60, a, b, c, d, e); + step60_79(61, e, a, b, c, d); + step60_79(62, d, e, a, b, c); + step60_79(63, c, d, e, a, b); + step60_79(64, b, c, d, e, a); + step60_79(65, a, b, c, d, e); + step60_79(66, e, a, b, c, d); + step60_79(67, d, e, a, b, c); + step60_79(68, c, d, e, a, b); + step60_79(69, b, c, d, e, a); + step60_79(70, a, b, c, d, e); + step60_79(71, e, a, b, c, d); + step60_79(72, d, e, a, b, c); + step60_79(73, c, d, e, a, b); + step60_79(74, b, c, d, e, a); + step60_79(75, a, b, c, d, e); + step60_79(76, e, a, b, c, d); + step60_79(77, d, e, a, b, c); + step60_79(78, c, d, e, a, b); + step60_79(79, b, c, d, e, a); + + digest[0] += a; + digest[1] += b; + digest[2] += c; + digest[3] += d; + digest[4] += e; +} + +void sha1_for_mh_sha1(const uint8_t * input_data, uint32_t * digest, const uint32_t len) +{ + uint32_t i, j; + uint8_t buf[2 * SHA1_BLOCK_SIZE]; + union { + uint64_t uint; + uint8_t uchar[8]; + } convert; + uint8_t *p; + + digest[0] = MH_SHA1_H0; + digest[1] = MH_SHA1_H1; + digest[2] = MH_SHA1_H2; + digest[3] = MH_SHA1_H3; + digest[4] = MH_SHA1_H4; + + i = len; + while (i >= SHA1_BLOCK_SIZE) { + sha1_single_for_mh_sha1(input_data, digest); + input_data += SHA1_BLOCK_SIZE; + i -= SHA1_BLOCK_SIZE; + } + + memcpy(buf, input_data, i); + buf[i++] = 0x80; + for (j = i; j < ((2 * SHA1_BLOCK_SIZE) - 8); j++) + buf[j] = 0; + + if (i > SHA1_BLOCK_SIZE - 8) + i = 2 * SHA1_BLOCK_SIZE; + else + i = SHA1_BLOCK_SIZE; + + convert.uint = 8 * len; + p = buf + i - 8; + p[0] = convert.uchar[7]; + p[1] = convert.uchar[6]; + p[2] = convert.uchar[5]; + p[3] = convert.uchar[4]; + p[4] = convert.uchar[3]; + p[5] = convert.uchar[2]; + p[6] = convert.uchar[1]; + p[7] = convert.uchar[0]; + + sha1_single_for_mh_sha1(buf, digest); + if (i == (2 * SHA1_BLOCK_SIZE)) + sha1_single_for_mh_sha1(buf + SHA1_BLOCK_SIZE, digest); +} |