From e6918187568dbd01842d8d1d2c808ce16a894239 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 21 Apr 2024 13:54:28 +0200 Subject: Adding upstream version 18.2.2. Signed-off-by: Daniel Baumann --- .../isa-l_crypto/include/aarch64_multibinary.h | 301 ++++++++++ src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h | 165 ++++++ src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h | 613 ++++++++++++++++++++ src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h | 76 +++ src/crypto/isa-l/isa-l_crypto/include/aes_xts.h | 214 +++++++ .../isa-l/isa-l_crypto/include/datastruct.asm | 79 +++ .../isa-l/isa-l_crypto/include/endian_helper.h | 83 +++ src/crypto/isa-l/isa-l_crypto/include/intrinreg.h | 65 +++ src/crypto/isa-l/isa-l_crypto/include/md5_mb.h | 372 +++++++++++++ src/crypto/isa-l/isa-l_crypto/include/memcpy.asm | 615 +++++++++++++++++++++ .../isa-l/isa-l_crypto/include/memcpy_inline.h | 375 +++++++++++++ src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h | 315 +++++++++++ .../isa-l_crypto/include/mh_sha1_murmur3_x64_128.h | 327 +++++++++++ src/crypto/isa-l/isa-l_crypto/include/mh_sha256.h | 315 +++++++++++ .../isa-l/isa-l_crypto/include/multi_buffer.h | 112 ++++ .../isa-l/isa-l_crypto/include/multibinary.asm | 517 +++++++++++++++++ .../isa-l/isa-l_crypto/include/reg_sizes.asm | 442 +++++++++++++++ .../isa-l/isa-l_crypto/include/rolling_hashx.h | 114 ++++ src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h | 450 +++++++++++++++ src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h | 451 +++++++++++++++ src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h | 422 ++++++++++++++ src/crypto/isa-l/isa-l_crypto/include/sm3_mb.h | 155 ++++++ src/crypto/isa-l/isa-l_crypto/include/test.h | 111 ++++ src/crypto/isa-l/isa-l_crypto/include/types.h | 100 ++++ 24 files changed, 6789 insertions(+) create mode 100644 src/crypto/isa-l/isa-l_crypto/include/aarch64_multibinary.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/aes_xts.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/datastruct.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/include/endian_helper.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/intrinreg.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/md5_mb.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/memcpy.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/mh_sha256.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/multibinary.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm create mode 100644 src/crypto/isa-l/isa-l_crypto/include/rolling_hashx.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/sm3_mb.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/test.h create mode 100644 src/crypto/isa-l/isa-l_crypto/include/types.h (limited to 'src/crypto/isa-l/isa-l_crypto/include') diff --git a/src/crypto/isa-l/isa-l_crypto/include/aarch64_multibinary.h b/src/crypto/isa-l/isa-l_crypto/include/aarch64_multibinary.h new file mode 100644 index 000000000..a8f81b232 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aarch64_multibinary.h @@ -0,0 +1,301 @@ +/********************************************************************** + Copyright(c) 2019 Arm Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Arm Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ +#ifndef __AARCH64_MULTIBINARY_H__ +#define __AARCH64_MULTIBINARY_H__ +#ifndef __aarch64__ +#error "This file is for aarch64 only" +#endif +#include +#ifdef __ASSEMBLY__ +/** + * # mbin_interface : the wrapper layer for isal-l api + * + * ## references: + * * https://sourceware.org/git/gitweb.cgi?p=glibc.git;a=blob;f=sysdeps/aarch64/dl-trampoline.S + * * http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf + * * https://static.docs.arm.com/ihi0057/b/IHI0057B_aadwarf64.pdf?_ga=2.80574487.1870739014.1564969896-1634778941.1548729310 + * + * ## Usage: + * 1. Define dispather function + * 2. name must be \name\()_dispatcher + * 3. Prototype should be *"void * \name\()_dispatcher"* + * 4. The dispather should return the right function pointer , revision and a string information . + **/ +.macro mbin_interface name:req + .extern \name\()_dispatcher + .section .data + .balign 8 + .global \name\()_dispatcher_info + .type \name\()_dispatcher_info,%object + + \name\()_dispatcher_info: + .quad \name\()_mbinit //func_entry + + .size \name\()_dispatcher_info,. - \name\()_dispatcher_info + + .balign 8 + .text + \name\()_mbinit: + //save lp fp, sub sp + .cfi_startproc + stp x29, x30, [sp, -224]! + + //add cfi directive to avoid GDB bt cmds error + //set cfi(Call Frame Information) + .cfi_def_cfa_offset 224 + .cfi_offset 29, -224 + .cfi_offset 30, -216 + + //save parameter/result/indirect result registers + stp x8, x9, [sp, 16] + .cfi_offset 8, -208 + .cfi_offset 9, -200 + stp x0, x1, [sp, 32] + .cfi_offset 0, -192 + .cfi_offset 1, -184 + stp x2, x3, [sp, 48] + .cfi_offset 2, -176 + .cfi_offset 3, -168 + stp x4, x5, [sp, 64] + .cfi_offset 4, -160 + .cfi_offset 5, -152 + stp x6, x7, [sp, 80] + .cfi_offset 6, -144 + .cfi_offset 7, -136 + stp q0, q1, [sp, 96] + .cfi_offset 64, -128 + .cfi_offset 65, -112 + stp q2, q3, [sp, 128] + .cfi_offset 66, -96 + .cfi_offset 67, -80 + stp q4, q5, [sp, 160] + .cfi_offset 68, -64 + .cfi_offset 69, -48 + stp q6, q7, [sp, 192] + .cfi_offset 70, -32 + .cfi_offset 71, -16 + + /** + * The dispatcher functions have the following prototype: + * void * function_dispatcher(void) + * As the dispatcher is returning a struct, by the AAPCS, + */ + + + bl \name\()_dispatcher + //restore temp/indirect result registers + ldp x8, x9, [sp, 16] + .cfi_restore 8 + .cfi_restore 9 + + // save function entry + str x0, [x9] + + //restore parameter/result registers + ldp x0, x1, [sp, 32] + .cfi_restore 0 + .cfi_restore 1 + ldp x2, x3, [sp, 48] + .cfi_restore 2 + .cfi_restore 3 + ldp x4, x5, [sp, 64] + .cfi_restore 4 + .cfi_restore 5 + ldp x6, x7, [sp, 80] + .cfi_restore 6 + .cfi_restore 7 + ldp q0, q1, [sp, 96] + .cfi_restore 64 + .cfi_restore 65 + ldp q2, q3, [sp, 128] + .cfi_restore 66 + .cfi_restore 67 + ldp q4, q5, [sp, 160] + .cfi_restore 68 + .cfi_restore 69 + ldp q6, q7, [sp, 192] + .cfi_restore 70 + .cfi_restore 71 + //save lp fp and sp + ldp x29, x30, [sp], 224 + //restore cfi setting + .cfi_restore 30 + .cfi_restore 29 + .cfi_def_cfa_offset 0 + .cfi_endproc + + .global \name + .type \name,%function + .align 2 + \name\(): + adrp x9, :got:\name\()_dispatcher_info + ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info] + ldr x10,[x9] + br x10 + .size \name,. - \name + +.endm + +/** + * mbin_interface_base is used for the interfaces which have only + * noarch implementation + */ +.macro mbin_interface_base name:req, base:req + .extern \base + .section .data + .balign 8 + .global \name\()_dispatcher_info + .type \name\()_dispatcher_info,%object + + \name\()_dispatcher_info: + .quad \base //func_entry + .size \name\()_dispatcher_info,. - \name\()_dispatcher_info + + .balign 8 + .text + .global \name + .type \name,%function + .align 2 + \name\(): + adrp x9, :got:\name\()_dispatcher_info + ldr x9, [x9, #:got_lo12:\name\()_dispatcher_info] + ldr x10,[x9] + br x10 + .size \name,. - \name + +.endm + +#else /* __ASSEMBLY__ */ +#include + + + +#define DEFINE_INTERFACE_DISPATCHER(name) \ + void * name##_dispatcher(void) + +#define PROVIDER_BASIC(name) \ + PROVIDER_INFO(name##_base) + +#define DO_DIGNOSTIC(x) _Pragma GCC diagnostic ignored "-W"#x +#define DO_PRAGMA(x) _Pragma (#x) +#define DIGNOSTIC_IGNORE(x) DO_PRAGMA(GCC diagnostic ignored #x) +#define DIGNOSTIC_PUSH() DO_PRAGMA(GCC diagnostic push) +#define DIGNOSTIC_POP() DO_PRAGMA(GCC diagnostic pop) + + +#define PROVIDER_INFO(_func_entry) \ + ({ DIGNOSTIC_PUSH() \ + DIGNOSTIC_IGNORE(-Wnested-externs) \ + extern void _func_entry(void); \ + DIGNOSTIC_POP() \ + _func_entry; \ + }) + +/** + * Micro-Architector definitions + * Reference: https://developer.arm.com/docs/ddi0595/f/aarch64-system-registers/midr_el1 + */ + +#define CPU_IMPLEMENTER_RESERVE 0x00 +#define CPU_IMPLEMENTER_ARM 0x41 + + +#define CPU_PART_CORTEX_A57 0xD07 +#define CPU_PART_CORTEX_A72 0xD08 +#define CPU_PART_NEOVERSE_N1 0xD0C + +#define MICRO_ARCH_ID(imp,part) \ + (((CPU_IMPLEMENTER_##imp&0xff)<<24)|((CPU_PART_##part&0xfff)<<4)) + +#ifndef HWCAP_CPUID +#define HWCAP_CPUID (1<<11) +#endif + +/** + * @brief get_micro_arch_id + * read micro-architector register instruction if possible.This function + * provides microarchitecture information and make microarchitecture optimization + * possible. It will trap into kernel due to mrs instruction. So it should + * be called only in dispatcher, that will be called only once in program + * lifecycle. And HWCAP must be match,That will make sure there are no + * illegal instruction errors. + * + * NOTICE: + * - HWCAP_CPUID should be available. Otherwise it returns zero + * - It MUST be called inside dispather. + * - It MUST meet the HWCAP requirements + * + * Example: + * DEFINE_INTERFACE_DISPATCHER(crc32_iscsi) + * { + * unsigned long auxval = getauxval(AT_HWCAP); + * // MUST do the judgement is MUST. + * if ((HWCAP_CRC32 | HWCAP_PMULL) == (auxval & (HWCAP_CRC32 | HWCAP_PMULL))) { + * switch (get_micro_arch_id()) { + * case MICRO_ARCH_ID(ARM, CORTEX_A57): + * return PROVIDER_INFO(crc32_pmull_crc_for_a57); + * case MICRO_ARCH_ID(ARM, CORTEX_A72): + * return PROVIDER_INFO(crc32_pmull_crc_for_a72); + * case MICRO_ARCH_ID(ARM, NEOVERSE_N1): + * return PROVIDER_INFO(crc32_pmull_crc_for_n1); + * case default: + * return PROVIDER_INFO(crc32_pmull_crc_for_others); + * } + * } + * return PROVIDER_BASIC(crc32_iscsi); + * } + * KNOWN ISSUE: + * On a heterogeneous system (big.LITTLE), it will work but the performance + * might not be the best one as expected. + * + * If this function is called on the big core, it will return the function + * optimized for the big core. + * + * If execution is then scheduled to the little core. It will still work (1), + * but the function won't be optimized for the little core, thus the performance + * won't be as expected. + * + * References: + * - [CPU Feature detection](https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/tree/Documentation/arm64/cpu-feature-registers.rst?h=v5.5) + * + */ +static inline uint32_t get_micro_arch_id(void) +{ + uint32_t id=CPU_IMPLEMENTER_RESERVE; + if ((getauxval(AT_HWCAP) & HWCAP_CPUID)) { + + asm("mrs %0, MIDR_EL1 " : "=r" (id)); + } + return id&0xff00fff0; +} + + + +#endif /* __ASSEMBLY__ */ +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h b/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h new file mode 100644 index 000000000..aaf87ada1 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h @@ -0,0 +1,165 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @file aes_cbc.h + * @brief AES CBC encryption/decryption function prototypes. + * + */ +#ifndef _AES_CBC_h +#define _AES_CBC_h + +#include + +#ifdef __cplusplus +extern "C" { + +#endif + +typedef enum cbc_key_size { CBC_128_BITS = 16, CBC_192_BITS = 24, CBC_256_BITS = 32} cbc_key_size; +#define CBC_ROUND_KEY_LEN (16) +#define CBC_128_KEY_ROUNDS (10+1) /*expanded key holds 10 key rounds plus original key*/ +#define CBC_192_KEY_ROUNDS (12+1) /*expanded key holds 12 key rounds plus original key*/ +#define CBC_256_KEY_ROUNDS (14+1) /*expanded key holds 14 key rounds plus original key*/ +#define CBC_MAX_KEYS_SIZE (CBC_ROUND_KEY_LEN * CBC_256_KEY_ROUNDS) + +#define CBC_IV_DATA_LEN (16) + +/** @brief holds intermediate key data used in encryption/decryption + * + */ +struct cbc_key_data { // must be 16 byte aligned + uint8_t enc_keys[CBC_MAX_KEYS_SIZE]; + uint8_t dec_keys[CBC_MAX_KEYS_SIZE]; +}; + +/** @brief CBC-AES key pre-computation done once for a key + * + * @requires SSE4.1 and AESNI + * + * arg 1: in: pointer to key + * arg 2: OUT: pointer to a key expanded data + */ +int aes_cbc_precomp( + uint8_t *key, + int key_size, + struct cbc_key_data *keys_blk +); + +/** @brief CBC-AES 128 bit key Decryption + * + * @requires SSE4.1 and AESNI + * + * arg 1: in: pointer to input (cipher text) + * arg 2: IV: pointer to IV, Must be 16 bytes aligned to a 16 byte boundary + * arg 3: keys: pointer to keys, Must be on a 16 byte boundary and length of key size * key rounds + * arg 4: OUT: pointer to output (plain text ... in-place allowed) + * arg 5: len_bytes: length in bytes (multiple of 16) + */ +void aes_cbc_dec_128( + void *in, //!< Input cipher text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, //!< Output plain text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); + +/** @brief CBC-AES 192 bit key Decryption + * +* @requires SSE4.1 and AESNI +* +*/ +void aes_cbc_dec_192( + void *in, //!< Input cipher text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, //!< Output plain text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); + +/** @brief CBC-AES 256 bit key Decryption + * +* @requires SSE4.1 and AESNI +* +*/ +void aes_cbc_dec_256( + void *in, //!< Input cipher text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, //!< Output plain text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); + +/** @brief CBC-AES 128 bit key Encryption + * + * @requires SSE4.1 and AESNI + * + * arg 1: in: pointer to input (plain text) + * arg 2: IV: pointer to IV, Must be 16 bytes aligned to a 16 byte boundary + * arg 3: keys: pointer to keys, Must be on a 16 byte boundary and length of key size * key rounds + * arg 4: OUT: pointer to output (cipher text ... in-place allowed) + * arg 5: len_bytes: length in bytes (multiple of 16) + */ +int aes_cbc_enc_128( + void *in, //!< Input plain text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data + void *out, //!< Output cipher text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); +/** @brief CBC-AES 192 bit key Encryption + * +* @requires SSE4.1 and AESNI +* +*/ +int aes_cbc_enc_192( + void *in, //!< Input plain text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data + void *out, //!< Output cipher text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); + +/** @brief CBC-AES 256 bit key Encryption + * +* @requires SSE4.1 and AESNI +* +*/ +int aes_cbc_enc_256( + void *in, //!< Input plain text + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data + void *out, //!< Output cipher text + uint64_t len_bytes //!< Must be a multiple of 16 bytes + ); + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif //ifndef _AES_CBC_h diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h b/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h new file mode 100644 index 000000000..b407b7f6b --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h @@ -0,0 +1,613 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @file aes_gcm.h + * @brief AES GCM encryption/decryption function prototypes. + * + * At build time there is an option to use non-temporal loads and stores + * selected by defining the compile time option NT_LDST. The use of this option + * places the following restriction on the gcm encryption functions: + * + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * + * - When using the streaming API, all partial input buffers must be a multiple + * of 64 bytes long except for the last input buffer. + * + * - In-place encryption/decryption is not recommended. + * + */ + +/* +; References: +; This code was derived and highly optimized from the code described in paper: +; Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on Intel Architecture Processors. August, 2010 +; +; For the shift-based reductions used in this code, we used the method described in paper: +; Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode. January, 2010. +; +; +; +; Assumptions: Support for SSE4.1 or greater, AVX or AVX2 +; +; +; iv: +; 0 1 2 3 +; 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Salt (From the SA) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Initialization Vector | +; | (This is the sequence number from IPSec header) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | 0x1 | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +; TLen: +; from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +; + */ +#ifndef _AES_GCM_h +#define _AES_GCM_h + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. */ +#define MAX_TAG_LEN (16) +// +// IV data is limited to 16 bytes. The last DWORD (4 bytes) must be 0x1 +// +#define GCM_IV_LEN (16) +#define GCM_IV_DATA_LEN (12) +#define GCM_IV_END_MARK {0x00, 0x00, 0x00, 0x01}; +#define GCM_IV_END_START (12) + +#define LONGEST_TESTED_AAD_LENGTH (2* 1024) + +// Key lengths of 128 and 256 supported +#define GCM_128_KEY_LEN (16) +#define GCM_256_KEY_LEN (32) + +#define GCM_BLOCK_LEN 16 +#define GCM_ENC_KEY_LEN 16 +#define GCM_KEY_SETS (15) /*exp key + 14 exp round keys*/ + +/** + * @brief holds intermediate key data needed to improve performance + * + * gcm_data hold internal key information used by gcm128 and gcm256. + */ +struct gcm_data { + uint8_t expanded_keys[GCM_ENC_KEY_LEN * GCM_KEY_SETS]; + uint8_t shifted_hkey_1[GCM_ENC_KEY_LEN]; // store HashKey <<1 mod poly here + uint8_t shifted_hkey_2[GCM_ENC_KEY_LEN]; // store HashKey^2 <<1 mod poly here + uint8_t shifted_hkey_3[GCM_ENC_KEY_LEN]; // store HashKey^3 <<1 mod poly here + uint8_t shifted_hkey_4[GCM_ENC_KEY_LEN]; // store HashKey^4 <<1 mod poly here + uint8_t shifted_hkey_5[GCM_ENC_KEY_LEN]; // store HashKey^5 <<1 mod poly here + uint8_t shifted_hkey_6[GCM_ENC_KEY_LEN]; // store HashKey^6 <<1 mod poly here + uint8_t shifted_hkey_7[GCM_ENC_KEY_LEN]; // store HashKey^7 <<1 mod poly here + uint8_t shifted_hkey_8[GCM_ENC_KEY_LEN]; // store HashKey^8 <<1 mod poly here + uint8_t shifted_hkey_1_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_2_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_3_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_4_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_5_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_6_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_7_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_8_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes) + // init, update and finalize context data + uint8_t aad_hash[GCM_BLOCK_LEN]; + uint64_t aad_length; + uint64_t in_length; + uint8_t partial_block_enc_key[GCM_BLOCK_LEN]; + uint8_t orig_IV[GCM_BLOCK_LEN]; + uint8_t current_counter[GCM_BLOCK_LEN]; + uint64_t partial_block_length; +}; + +/** + * @brief holds intermediate key data needed to improve performance + * + * gcm_key_data hold internal key information used by gcm128, gcm192 and gcm256. + */ +#ifdef __WIN32 +__declspec(align(16)) +#endif /* WIN32 */ +struct gcm_key_data { + uint8_t expanded_keys[GCM_ENC_KEY_LEN * GCM_KEY_SETS]; + uint8_t shifted_hkey_1[GCM_ENC_KEY_LEN]; // store HashKey <<1 mod poly here + uint8_t shifted_hkey_2[GCM_ENC_KEY_LEN]; // store HashKey^2 <<1 mod poly here + uint8_t shifted_hkey_3[GCM_ENC_KEY_LEN]; // store HashKey^3 <<1 mod poly here + uint8_t shifted_hkey_4[GCM_ENC_KEY_LEN]; // store HashKey^4 <<1 mod poly here + uint8_t shifted_hkey_5[GCM_ENC_KEY_LEN]; // store HashKey^5 <<1 mod poly here + uint8_t shifted_hkey_6[GCM_ENC_KEY_LEN]; // store HashKey^6 <<1 mod poly here + uint8_t shifted_hkey_7[GCM_ENC_KEY_LEN]; // store HashKey^7 <<1 mod poly here + uint8_t shifted_hkey_8[GCM_ENC_KEY_LEN]; // store HashKey^8 <<1 mod poly here + uint8_t shifted_hkey_1_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits + uint8_t shifted_hkey_2_k[GCM_ENC_KEY_LEN]; // and Low 64b of HashKey^n <<1 mod poly + uint8_t shifted_hkey_3_k[GCM_ENC_KEY_LEN]; // here (for Karatsuba purposes) + uint8_t shifted_hkey_4_k[GCM_ENC_KEY_LEN]; + uint8_t shifted_hkey_5_k[GCM_ENC_KEY_LEN]; + uint8_t shifted_hkey_6_k[GCM_ENC_KEY_LEN]; + uint8_t shifted_hkey_7_k[GCM_ENC_KEY_LEN]; + uint8_t shifted_hkey_8_k[GCM_ENC_KEY_LEN]; +#ifdef GCM_BIG_DATA + uint8_t shifted_hkey_n_k[GCM_ENC_KEY_LEN * (128 - 16)]; // Big data version needs 128 +#else + uint8_t shifted_hkey_n_k[GCM_ENC_KEY_LEN * (48 - 16)]; // Others vaes version needs 48 +#endif +} +#if defined (__unix__) || (__APPLE__) || (__MINGW32__) + __attribute__ ((aligned (16))); +#else + ; +#endif + +/** + * @brief holds GCM operation context + */ +struct gcm_context_data { + // init, update and finalize context data + uint8_t aad_hash[GCM_BLOCK_LEN]; + uint64_t aad_length; + uint64_t in_length; + uint8_t partial_block_enc_key[GCM_BLOCK_LEN]; + uint8_t orig_IV[GCM_BLOCK_LEN]; + uint8_t current_counter[GCM_BLOCK_LEN]; + uint64_t partial_block_length; +}; + +/* ------------------ New interface for separate expanded keys ------------ */ + +/** + * @brief GCM-AES Encryption using 128 bit keys + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_128( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed + uint8_t const *in, //!< Plaintext input + uint64_t len, //!< Length of data in Bytes for encryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief GCM-AES Encryption using 256 bit keys + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_256( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed + uint8_t const *in, //!< Plaintext input + uint64_t len, //!< Length of data in Bytes for encryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + + +/** + * @brief GCM-AES Decryption using 128 bit keys + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_128( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed + uint8_t const *in, //!< Ciphertext input + uint64_t len, //!< Length of data in Bytes for decryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief GCM-AES Decryption using 128 bit keys + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_256( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed + uint8_t const *in, //!< Ciphertext input + uint64_t len, //!< Length of data in Bytes for decryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + + +/** + * @brief Start a AES-GCM Encryption message 128 bit key + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_init_128( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *iv, //!< Pointer to 12 byte IV structure + //!< Internally, library concates 0x00000001 value to it + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len //!< Length of AAD + ); + +/** + * @brief Start a AES-GCM Encryption message 256 bit key + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_init_256( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *iv, //!< Pointer to 12 byte IV structure + //!< Internally, library concates 0x00000001 value to it + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len //!< Length of AAD + ); + +/** + * @brief Encrypt a block of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_128_update( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t len //!< Length of data in Bytes for encryption + ); + +/** + * @brief Encrypt a block of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_256_update( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t len //!< Length of data in Bytes for encryption + ); + +/** + * @brief Decrypt a block of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_128_update( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed. + const uint8_t *in, //!< Ciphertext input + uint64_t len //!< Length of data in Bytes for decryption + ); + +/** + * @brief Decrypt a block of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_256_update( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed. + const uint8_t *in, //!< Ciphertext input + uint64_t len //!< Length of data in Bytes for decryption + ); + +/** + * @brief End encryption of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_128_finalize( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief End encryption of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_256_finalize( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief End decryption of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_128_finalize( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief End decryption of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_256_finalize( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief Pre-processes GCM key data 128 bit + * + * Prefills the gcm key data with key values for each round and + * the initial sub hash key for tag encoding + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_pre_128( + const void *key, //!< Pointer to key data + struct gcm_key_data *key_data //!< GCM expanded key data + ); + +/** + * @brief Pre-processes GCM key data 128 bit + * + * Prefills the gcm key data with key values for each round and + * the initial sub hash key for tag encoding + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_pre_256( + const void *key, //!< Pointer to key data + struct gcm_key_data *key_data //!< GCM expanded key data + ); + + + +/* ---- NT versions ---- */ +/** + * @brief GCM-AES Encryption using 128 bit keys, Non-temporal data + * + * Non-temporal version of encrypt has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_128_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed + uint8_t const *in, //!< Plaintext input + uint64_t len, //!< Length of data in Bytes for encryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief GCM-AES Encryption using 256 bit keys, Non-temporal data + * + * Non-temporal version of encrypt has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_256_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed + uint8_t const *in, //!< Plaintext input + uint64_t len, //!< Length of data in Bytes for encryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + + +/** + * @brief GCM-AES Decryption using 128 bit keys, Non-temporal data + * + * Non-temporal version of decrypt has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_128_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed + uint8_t const *in, //!< Ciphertext input + uint64_t len, //!< Length of data in Bytes for decryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + +/** + * @brief GCM-AES Decryption using 128 bit keys, Non-temporal data + * + * Non-temporal version of decrypt has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_256_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed + uint8_t const *in, //!< Ciphertext input + uint64_t len, //!< Length of data in Bytes for decryption + uint8_t *iv, //!< iv pointer to 12 byte IV structure. + //!< Internally, library concates 0x00000001 value to it. + uint8_t const *aad, //!< Additional Authentication Data (AAD) + uint64_t aad_len, //!< Length of AAD + uint8_t *auth_tag, //!< Authenticated Tag output + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). + //!< Valid values are 16 (most likely), 12 or 8 + ); + + +/** + * @brief Encrypt a block of a AES-128-GCM Encryption message, Non-temporal data + * + * Non-temporal version of encrypt update has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - All partial input buffers must be a multiple of 64 bytes long except for + * the last input buffer. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_128_update_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t len //!< Length of data in Bytes for encryption + ); + +/** + * @brief Encrypt a block of a AES-256-GCM Encryption message, Non-temporal data + * + * Non-temporal version of encrypt update has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - All partial input buffers must be a multiple of 64 bytes long except for + * the last input buffer. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_enc_256_update_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t len //!< Length of data in Bytes for encryption + ); + +/** + * @brief Decrypt a block of a AES-128-GCM Encryption message, Non-temporal data + * + * Non-temporal version of decrypt update has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - All partial input buffers must be a multiple of 64 bytes long except for + * the last input buffer. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_128_update_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed. + const uint8_t *in, //!< Ciphertext input + uint64_t len //!< Length of data in Bytes for decryption + ); + +/** + * @brief Decrypt a block of a AES-256-GCM Encryption message, Non-temporal data + * + * Non-temporal version of decrypt update has additional restrictions: + * - The plaintext and cyphertext buffers must be aligned on a 64 byte boundary. + * - All partial input buffers must be a multiple of 64 bytes long except for + * the last input buffer. + * - In-place encryption/decryption is not recommended. Performance can be slow. + * + * @requires SSE4.1 and AESNI + */ +void aes_gcm_dec_256_update_nt( + const struct gcm_key_data *key_data, //!< GCM expanded key data + struct gcm_context_data *context_data, //!< GCM operation context data + uint8_t *out, //!< Plaintext output. Decrypt in-place is allowed. + const uint8_t *in, //!< Ciphertext input + uint64_t len //!< Length of data in Bytes for decryption + ); + + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif //ifndef _AES_GCM_h diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h b/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h new file mode 100644 index 000000000..6ecded301 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h @@ -0,0 +1,76 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _KEYEXP_128_H +#define _KEYEXP_128_H + +/** + * @file aes_keyexp.h + * @brief AES key expansion functions + * + * This defines the interface to key expansion functions. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief AES key expansion 128 bit +* @requires SSE4.1 +*/ +void aes_keyexp_128( + const uint8_t *key, //!< input key for AES-128, 16 bytes + uint8_t *exp_key_enc, //!< expanded encryption keys, 16*11 bytes + uint8_t *exp_key_dec //!< expanded decryption keys, 16*11 bytes + ); + +/** @brief AES key expansion 192 bit +* @requires SSE4.1 +*/ +void aes_keyexp_192( + const uint8_t *key, //!< input key for AES-192, 16*1.5 bytes + uint8_t *exp_key_enc, //!< expanded encryption keys, 16*13 bytes + uint8_t *exp_key_dec //!< expanded decryption keys, 16*13 bytes + ); + +/** @brief AES key expansion 256 bit +* @requires SSE4.1 +*/ +void aes_keyexp_256( + const uint8_t *key, //!< input key for AES-256, 16*2 bytes + uint8_t *exp_key_enc, //!< expanded encryption keys, 16*15 bytes + uint8_t *exp_key_dec //!< expanded decryption keys, 16*15 bytes + ); + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif //ifndef _KEYEXP_128_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h b/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h new file mode 100644 index 000000000..2021284f5 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h @@ -0,0 +1,214 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#ifndef _AES_XTS_H +#define _AES_XTS_H + +/** + * @file aes_xts.h + * @brief AES XTS encryption function prototypes. + * + * This defines the interface to optimized AES XTS functions + * + * Pre-expanded keys + * + * For key encryption, pre-expanded keys are stored in the order that they will be + * used. As an example, if Key[0] is the 128-bit initial key used for an AES-128 + * encryption, the rest of the keys are stored as follows: + * + *
    + *
  • Key[0] : Initial encryption key + *
  • Key[1] : Round 1 encryption key + *
  • Key[2] : Round 2 encryption key + *
  • ... + *
  • Key[10] : Round 10 encryption key + *
+ * + * For decryption, the order of keys is reversed. However, we apply the + * necessary aesimc instructions before storing the expanded keys. For the same key + * used above, the pre-expanded keys will be stored as follows: + * + *
    + *
  • Key[0] : Round 10 encryption key + *
  • Key[1] : aesimc(Round 9 encryption key) + *
  • Key[2] : aesimc(Round 8 encryption key) + *
  • ... + *
  • Key[9] : aesimc(Round 1 encryption key) + *
  • Key[10] : Initial encryption key + *
+ * + * Note: The expanded key decryption requires a decryption key only for the block + * decryption step. The tweak step in the expanded key decryption requires the same expanded + * encryption key that is used in the expanded key encryption. + * + * Input and Output Buffers + * + * The input and output buffers can be overlapping as long as the output buffer + * pointer is not less than the input buffer pointer. If the two pointers are the + * same, then encryption/decryption will occur in-place. + * + * Data Length + * + *
    + *
  • The functions support data length of any bytes greater than or equal to 16 bytes. + *
  • Data length is a 64-bit value, which makes the largest possible data length + * 2^64 - 1 bytes. + *
  • For data lengths from 0 to 15 bytes, the functions return without any error + * codes, without reading or writing any data. + *
  • The functions only support byte lengths, not bits. + *
+ * + * Initial Tweak + * + * The functions accept a 128-bit initial tweak value. The user is responsible for + * padding the initial tweak value to this length. + * + * Data Alignment + * + * The input and output buffers, keys, pre-expanded keys and initial tweak value + * are not required to be aligned to 16 bytes, any alignment works. + * + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief XTS-AES-128 Encryption + * @requires AES-NI + */ + +void XTS_AES_128_enc( + uint8_t *k2, //!< key used for tweaking, 16 bytes + uint8_t *k1, //!< key used for encryption of tweaked plaintext, 16 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-128 Encryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_128_enc_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*11 bytes + uint8_t *k1, //!< expanded key used for encryption of tweaked plaintext, 16*11 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-128 Decryption + * @requires AES-NI + */ + +void XTS_AES_128_dec( + uint8_t *k2, //!< key used for tweaking, 16 bytes + uint8_t *k1, //!< key used for decryption of tweaked ciphertext, 16 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +/** @brief XTS-AES-128 Decryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_128_dec_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*11 bytes - encryption key is used + uint8_t *k1, //!< expanded decryption key used for decryption of tweaked ciphertext, 16*11 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +/** @brief XTS-AES-256 Encryption + * @requires AES-NI + */ + +void XTS_AES_256_enc( + uint8_t *k2, //!< key used for tweaking, 16*2 bytes + uint8_t *k1, //!< key used for encryption of tweaked plaintext, 16*2 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-256 Encryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_256_enc_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*15 bytes + uint8_t *k1, //!< expanded key used for encryption of tweaked plaintext, 16*15 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-256 Decryption + * @requires AES-NI + */ + +void XTS_AES_256_dec( + uint8_t *k2, //!< key used for tweaking, 16*2 bytes + uint8_t *k1, //!< key used for decryption of tweaked ciphertext, 16*2 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +/** @brief XTS-AES-256 Decryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_256_dec_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*15 bytes - encryption key is used + uint8_t *k1, //!< expanded decryption key used for decryption of tweaked ciphertext, 16*15 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +#ifdef __cplusplus +} +#endif + +#endif //_AES_XTS_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm b/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm new file mode 100644 index 000000000..3298ce374 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm @@ -0,0 +1,79 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Macros for defining data structures + +; Usage example + +;START_FIELDS ; JOB_AES +;;; name size align +;FIELD _plaintext, 8, 8 ; pointer to plaintext +;FIELD _ciphertext, 8, 8 ; pointer to ciphertext +;FIELD _IV, 16, 8 ; IV +;FIELD _keys, 8, 8 ; pointer to keys +;FIELD _len, 4, 4 ; length in bytes +;FIELD _status, 4, 4 ; status enumeration +;FIELD _user_data, 8, 8 ; pointer to user data +;UNION _union, size1, align1, \ + size2, align2, \ + size3, align3, \ + ... +;END_FIELDS +;%assign _JOB_AES_size _FIELD_OFFSET +;%assign _JOB_AES_align _STRUCT_ALIGN + +%ifndef _DATASTRUCT_ASM_ +%define _DATASTRUCT_ASM_ + +;; START_FIELDS +%macro START_FIELDS 0 +%assign _FIELD_OFFSET 0 +%assign _STRUCT_ALIGN 0 +%endm + +;; FIELD name size align +%macro FIELD 3 +%define %%name %1 +%define %%size %2 +%define %%align %3 + +%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1)) +%%name equ _FIELD_OFFSET +%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size) +%if (%%align > _STRUCT_ALIGN) +%assign _STRUCT_ALIGN %%align +%endif +%endm + +;; END_FIELDS +%macro END_FIELDS 0 +%assign _FIELD_OFFSET (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +%endm + +%endif ; end ifdef _DATASTRUCT_ASM_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/endian_helper.h b/src/crypto/isa-l/isa-l_crypto/include/endian_helper.h new file mode 100644 index 000000000..87d90460a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/endian_helper.h @@ -0,0 +1,83 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _ENDIAN_HELPER_H_ +#define _ENDIAN_HELPER_H_ + +/** + * @file endian_helper.h + * @brief Byte order helper routines + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined (__ICC) +# define byteswap32(x) _bswap(x) +# define byteswap64(x) _bswap64(x) +#elif defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3)) +# define byteswap32(x) __builtin_bswap32(x) +# define byteswap64(x) __builtin_bswap64(x) +#else +# define byteswap32(x) ( ((x) << 24) \ + | (((x) & 0xff00) << 8) \ + | (((x) & 0xff0000) >> 8) \ + | ((x)>>24)) +# define byteswap64(x) ( (((x) & (0xffull << 0)) << 56) \ + | (((x) & (0xffull << 8)) << 40) \ + | (((x) & (0xffull << 16)) << 24) \ + | (((x) & (0xffull << 24)) << 8) \ + | (((x) & (0xffull << 32)) >> 8) \ + | (((x) & (0xffull << 40)) >> 24) \ + | (((x) & (0xffull << 48)) >> 40) \ + | (((x) & (0xffull << 56)) >> 56)) +#endif + +// This check works when using GCC (or LLVM). Assume little-endian +// if any other compiler is being used. +#if defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__) \ + && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define to_le32(x) byteswap32(x) +#define to_le64(x) byteswap64(x) +#define to_be32(x) (x) +#define to_be64(x) (x) +#else +#define to_le32(x) (x) +#define to_le64(x) (x) +#define to_be32(x) byteswap32(x) +#define to_be64(x) byteswap64(x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif // _ISA_HELPER_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h b/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h new file mode 100644 index 000000000..3c7ba2877 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h @@ -0,0 +1,65 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +/** + * @file intrinreg.h + * @brief Defines intrinsic types used by the new hashing API + * + */ + +#ifndef _IA64_REGS_H_ +#define _IA64_REGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +# define inline __inline +#endif + +#include +#include + +// Define available register types uniformly. +/// @cond +typedef struct{ uint8_t dummy; } intrinreg1; +typedef struct{ uint16_t dummy; } intrinreg2; +typedef struct{ uint32_t dummy; } intrinreg4; +typedef struct{ uint64_t dummy; } intrinreg8; +typedef __m128 intrinreg16; +/// @endcond + + +#ifdef __cplusplus +} +#endif + +#endif // _IA64_REGS_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h b/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h new file mode 100644 index 000000000..fcbae5f62 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h @@ -0,0 +1,372 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MD5_MB_H_ +#define _MD5_MB_H_ + +/** + * @file md5_mb.h + * @brief Multi-buffer CTX API MD5 function prototypes and structures + * + * Interface for multi-buffer MD5 functions + * + * Multi-buffer MD5 Entire or First-Update..Update-Last + * + * The interface to this multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the MD5_HASH_CTX_MGR and + * MD5_HASH_CTX objects. Numerous MD5_HASH_CTX objects may be instantiated by the + * application for use with a single MD5_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * Note: The submit function does not require data buffers to be block sized. + * + * The MD5 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and + * AVX512. In addition, a multibinary interface is provided, which selects the appropriate + * architecture-specific function at runtime. + * + * Usage: The application creates a MD5_HASH_CTX_MGR object and initializes it + * with a call to md5_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512 (or no suffix for the + * multibinary version). The MD5_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 8 MD5_HASH_CTX objects (or 16 in AVX2 case, 32 in AVX512 case) + * being processed at a time. + * + * Each MD5_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the MD5_HASH_CTX to a MD5_HASH_CTX_MGR using the submit functions + * md5_ctx_mgr_submit*() with the HASH_FIRST flag set. When the MD5_HASH_CTX is + * returned to the application (via this or a later call to md5_ctx_mgr_submit*() or + * md5_ctx_mgr_flush*()), the application can then re-submit it with another call to + * md5_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, md5_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a MD5_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The MD5_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + *
    + *
  • one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + *
  • two buffers: HASH_FIRST, HASH_LAST + *
  • three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + *
+ * + * The order in which MD5_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + *
    + *
  • Submitting flags other than the allowed entire/first/update/last values + *
  • Submitting a context that is currently being managed by a MD5_HASH_CTX_MGR. + *
  • Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + *
+ * + * These error conditions are reported by returning the MD5_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the MD5_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include +#include "multi_buffer.h" +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define MD5_DIGEST_NWORDS 4 +#define MD5_MAX_LANES 32 +#define MD5_MIN_LANES 8 +#define MD5_BLOCK_SIZE 64 +#define MD5_LOG2_BLOCK_SIZE 6 +#define MD5_PADLENGTHFIELD_SIZE 8 +#define MD5_INITIAL_DIGEST \ + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 + +typedef uint32_t md5_digest_array[MD5_DIGEST_NWORDS][MD5_MAX_LANES]; +typedef uint32_t MD5_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single MD5 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint32_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[MD5_DIGEST_NWORDS],64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} MD5_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted MD5 job */ + +typedef struct { + md5_digest_array digest; + uint8_t* data_ptr[MD5_MAX_LANES]; +} MD5_MB_ARGS_X32; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + MD5_JOB *job_in_lane; +} MD5_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer MD5 jobs */ + +typedef struct { + MD5_MB_ARGS_X32 args; + uint32_t lens[MD5_MAX_LANES]; + uint64_t unused_lanes[4]; //!< each byte or nibble is index (0...31 or 15) of unused lanes. + MD5_LANE_DATA ldata[MD5_MAX_LANES]; + uint32_t num_lanes_inuse; +} MD5_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer MD5 jobs */ + +typedef struct { + MD5_MB_JOB_MGR mgr; +} MD5_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single MD5 job for the multi-buffer CTX manager */ + +typedef struct { + MD5_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint64_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[MD5_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} MD5_HASH_CTX; + +/******************************************************************* + * CTX level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level MD5 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_sse (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_sse (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_sse (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_avx (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_avx (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_avx (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_avx2 (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_avx2 (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_avx2 (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_avx512 (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_avx512 (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_avx512 (MD5_HASH_CTX_MGR* mgr); + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush (MD5_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void md5_mb_mgr_init_sse (MD5_MB_JOB_MGR *state); +MD5_JOB* md5_mb_mgr_submit_sse (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_sse (MD5_MB_JOB_MGR *state); + +#define md5_mb_mgr_init_avx md5_mb_mgr_init_sse +MD5_JOB* md5_mb_mgr_submit_avx (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_avx (MD5_MB_JOB_MGR *state); + +void md5_mb_mgr_init_avx2 (MD5_MB_JOB_MGR *state); +MD5_JOB* md5_mb_mgr_submit_avx2 (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_avx2 (MD5_MB_JOB_MGR *state); + +void md5_mb_mgr_init_avx512 (MD5_MB_JOB_MGR *state); +MD5_JOB* md5_mb_mgr_submit_avx512 (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_avx512 (MD5_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _MD5_MB_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm b/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm new file mode 100644 index 000000000..7cb153540 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm @@ -0,0 +1,615 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifndef __MEMCPY_ASM__ +%define __MEMCPY_ASM__ + +%include "reg_sizes.asm" + + +; This file defines a series of macros to copy small to medium amounts +; of data from memory to memory, where the size is variable but limited. +; +; The macros are all called as: +; memcpy DST, SRC, SIZE, TMP0, TMP1, XTMP0, XTMP1, XTMP2, XTMP3 +; with the parameters defined as: +; DST : register: pointer to dst (not modified) +; SRC : register: pointer to src (not modified) +; SIZE : register: length in bytes (not modified) +; TMP0 : 64-bit temp GPR (clobbered) +; TMP1 : 64-bit temp GPR (clobbered) +; XTMP0 : temp XMM (clobbered) +; XTMP1 : temp XMM (clobbered) +; XTMP2 : temp XMM (clobbered) +; XTMP3 : temp XMM (clobbered) +; +; The name indicates the options. The name is of the form: +; memcpy__ +; where: +; is either "sse" or "avx" or "avx2" +; is either "64" or "128" and defines largest value of SIZE +; is blank or "_1". If "_1" then the min SIZE is 1 (otherwise 0) +; is blank or "_ret". If blank, the code falls through. If "ret" +; it does a "ret" at the end +; +; For the avx2 versions, the temp XMM registers need to be YMM registers +; If the SZ is 64, then only two YMM temps are needed, i.e. it is called as: +; memcpy_avx2_64 DST, SRC, SIZE, TMP0, TMP1, YTMP0, YTMP1 +; memcpy_avx2_128 DST, SRC, SIZE, TMP0, TMP1, YTMP0, YTMP1, YTMP2, YTMP3 +; +; For example: +; memcpy_sse_64 : SSE, 0 <= size < 64, falls through +; memcpy_avx_64_1 : AVX1, 1 <= size < 64, falls through +; memcpy_sse_128_ret : SSE, 0 <= size < 128, ends with ret +; mempcy_avx_128_1_ret : AVX1, 1 <= size < 128, ends with ret +; + +%macro memcpy_sse_64 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 0, 0 +%endm + +%macro memcpy_sse_64_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 0, 0 +%endm + +%macro memcpy_sse_128 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 0, 0 +%endm + +%macro memcpy_sse_128_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 0, 0 +%endm + +%macro memcpy_sse_64_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 1, 0 +%endm + +%macro memcpy_sse_64_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 1, 0 +%endm + +%macro memcpy_sse_128_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 1, 0 +%endm + +%macro memcpy_sse_128_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 1, 0 +%endm + + +%macro memcpy_sse_16 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 0, 16, 0, 0 +%endm + +%macro memcpy_sse_16_1 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 1, 16, 0, 0 +%endm + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%macro memcpy_avx_64 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 0, 1 +%endm + +%macro memcpy_avx_64_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 0, 1 +%endm + +%macro memcpy_avx_128 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 0, 1 +%endm + +%macro memcpy_avx_128_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 0, 1 +%endm + +%macro memcpy_avx_64_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 1, 1 +%endm + +%macro memcpy_avx_64_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 1, 1 +%endm + +%macro memcpy_avx_128_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 1, 1 +%endm + +%macro memcpy_avx_128_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 1, 1 +%endm + + +%macro memcpy_avx_16 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 0, 16, 0, 1 +%endm + +%macro memcpy_avx_16_1 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 1, 16, 0, 1 +%endm + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%macro memcpy_avx2_64 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 64, 0, 2 +%endm + +%macro memcpy_avx2_64_1 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 64, 0, 2 +%endm + +%macro memcpy_avx2_128 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7, %8, %9, 0, 128, 0, 2 +%endm + +%macro memcpy_avx2_128_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7, %8, %9, 1, 128, 0, 2 +%endm + +%macro memcpy_avx2_64_ret 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 64, 1, 2 +%endm + +%macro memcpy_avx2_64_1_ret 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 64, 1, 2 +%endm + +%macro memcpy_avx2_128_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 128, 1, 2 +%endm + +%macro memcpy_avx2_128_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 128, 1, 2 +%endm + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +%macro __memcpy_int 13 +%define %%DST %1 ; register: pointer to dst (not modified) +%define %%SRC %2 ; register: pointer to src (not modified) +%define %%SIZE %3 ; register: length in bytes (not modified) +%define %%TMP0 %4 ; 64-bit temp GPR (clobbered) +%define %%TMP1 %5 ; 64-bit temp GPR (clobbered) +%define %%XTMP0 %6 ; temp XMM (clobbered) +%define %%XTMP1 %7 ; temp XMM (clobbered) +%define %%XTMP2 %8 ; temp XMM (clobbered) +%define %%XTMP3 %9 ; temp XMM (clobbered) +%define %%NOT0 %10 ; if not 0, then assume size cannot be zero +%define %%MAXSIZE %11 ; 128, 64, etc +%define %%USERET %12 ; if not 0, use "ret" at end +%define %%USEAVX %13 ; 0 = SSE, 1 = AVX1, 2 = AVX2 + +%if (%%USERET != 0) + %define %%DONE ret +%else + %define %%DONE jmp %%end +%endif + +%if (%%USEAVX != 0) + %define %%MOVDQU vmovdqu +%else + %define %%MOVDQU movdqu +%endif + +%if (%%MAXSIZE >= 128) + test %%SIZE, 64 + jz %%lt64 + %if (%%USEAVX >= 2) + %%MOVDQU %%XTMP0, [%%SRC + 0*32] + %%MOVDQU %%XTMP1, [%%SRC + 1*32] + %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*32] + %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*32] + + %%MOVDQU [%%DST + 0*32], %%XTMP0 + %%MOVDQU [%%DST + 1*32], %%XTMP1 + %%MOVDQU [%%DST + %%SIZE - 2*32], %%XTMP2 + %%MOVDQU [%%DST + %%SIZE - 1*32], %%XTMP3 + %else + %%MOVDQU %%XTMP0, [%%SRC + 0*16] + %%MOVDQU %%XTMP1, [%%SRC + 1*16] + %%MOVDQU %%XTMP2, [%%SRC + 2*16] + %%MOVDQU %%XTMP3, [%%SRC + 3*16] + %%MOVDQU [%%DST + 0*16], %%XTMP0 + %%MOVDQU [%%DST + 1*16], %%XTMP1 + %%MOVDQU [%%DST + 2*16], %%XTMP2 + %%MOVDQU [%%DST + 3*16], %%XTMP3 + + %%MOVDQU %%XTMP0, [%%SRC + %%SIZE - 4*16] + %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 3*16] + %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*16] + %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + %%SIZE - 4*16], %%XTMP0 + %%MOVDQU [%%DST + %%SIZE - 3*16], %%XTMP1 + %%MOVDQU [%%DST + %%SIZE - 2*16], %%XTMP2 + %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP3 + %endif + %%DONE +%endif + +%if (%%MAXSIZE >= 64) +%%lt64 + test %%SIZE, 32 + jz %%lt32 + %if (%%USEAVX >= 2) + %%MOVDQU %%XTMP0, [%%SRC + 0*32] + %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 1*32] + %%MOVDQU [%%DST + 0*32], %%XTMP0 + %%MOVDQU [%%DST + %%SIZE - 1*32], %%XTMP1 + %else + %%MOVDQU %%XTMP0, [%%SRC + 0*16] + %%MOVDQU %%XTMP1, [%%SRC + 1*16] + %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*16] + %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + 0*16], %%XTMP0 + %%MOVDQU [%%DST + 1*16], %%XTMP1 + %%MOVDQU [%%DST + %%SIZE - 2*16], %%XTMP2 + %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP3 + %endif + %%DONE +%endif + +%if (%%MAXSIZE >= 32) +%%lt32: + test %%SIZE, 16 + jz %%lt16 + %if (%%USEAVX >= 2) + %%MOVDQU XWORD(%%XTMP0), [%%SRC + 0*16] + %%MOVDQU XWORD(%%XTMP1), [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + 0*16], XWORD(%%XTMP0) + %%MOVDQU [%%DST + %%SIZE - 1*16], XWORD(%%XTMP1) + %else + %%MOVDQU %%XTMP0, [%%SRC + 0*16] + %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + 0*16], %%XTMP0 + %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP1 + %endif + %%DONE +%endif + +%if (%%MAXSIZE >= 16) +%%lt16: + test %%SIZE, 8 + jz %%lt8 + mov %%TMP0, [%%SRC] + mov %%TMP1, [%%SRC + %%SIZE - 8] + mov [%%DST], %%TMP0 + mov [%%DST + %%SIZE - 8], %%TMP1 + %%DONE +%endif + +%if (%%MAXSIZE >= 8) +%%lt8: + test %%SIZE, 4 + jz %%lt4 + mov DWORD(%%TMP0), [%%SRC] + mov DWORD(%%TMP1), [%%SRC + %%SIZE - 4] + mov [%%DST], DWORD(%%TMP0) + mov [%%DST + %%SIZE - 4], DWORD(%%TMP1) + %%DONE +%endif + +%if (%%MAXSIZE >= 4) +%%lt4: + test %%SIZE, 2 + jz %%lt2 + movzx DWORD(%%TMP0), word [%%SRC] + movzx DWORD(%%TMP1), byte [%%SRC + %%SIZE - 1] + mov [%%DST], WORD(%%TMP0) + mov [%%DST + %%SIZE - 1], BYTE(%%TMP1) + %%DONE +%endif + +%%lt2: +%if (%%NOT0 == 0) + test %%SIZE, 1 + jz %%end +%endif + movzx DWORD(%%TMP0), byte [%%SRC] + mov [%%DST], BYTE(%%TMP0) +%%end: +%if (%%USERET != 0) + ret +%endif +%endm + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +;; Utility macro to assist with SIMD shifting +%macro _PSRLDQ 3 +%define %%VEC %1 +%define %%REG %2 +%define %%IMM %3 + +%ifidn %%VEC, SSE + psrldq %%REG, %%IMM +%else + vpsrldq %%REG, %%REG, %%IMM +%endif +%endm + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; This section defines a series of macros to store small to medium amounts +; of data from SIMD registers to memory, where the size is variable but limited. +; +; The macros are all called as: +; memcpy DST, SRC, SIZE, TMP, IDX +; with the parameters defined as: +; DST : register: pointer to dst (not modified) +; SRC : register: src data (clobbered) +; SIZE : register: length in bytes (not modified) +; TMP : 64-bit temp GPR (clobbered) +; IDX : 64-bit GPR to store dst index/offset (clobbered) +; +; The name indicates the options. The name is of the form: +; simd_store_ +; where is the SIMD instruction type e.g. "sse" or "avx" + + +%macro simd_store_sse 5 + __simd_store %1,%2,%3,%4,%5,SSE +%endm + +%macro simd_store_avx 5 + __simd_store %1,%2,%3,%4,%5,AVX +%endm + +%macro simd_store_sse_15 5 + __simd_store %1,%2,%3,%4,%5,SSE,15 +%endm + +%macro simd_store_avx_15 5 + __simd_store %1,%2,%3,%4,%5,AVX,15 +%endm + +%macro __simd_store 6-7 +%define %%DST %1 ; register: pointer to dst (not modified) +%define %%SRC %2 ; register: src data (clobbered) +%define %%SIZE %3 ; register: length in bytes (not modified) +%define %%TMP %4 ; 64-bit temp GPR (clobbered) +%define %%IDX %5 ; 64-bit temp GPR to store dst idx (clobbered) +%define %%SIMDTYPE %6 ; "SSE" or "AVX" +%define %%MAX_LEN %7 ; [optional] maximum length to be stored, default 16 + +%define %%PSRLDQ _PSRLDQ %%SIMDTYPE, + +%ifidn %%SIMDTYPE, SSE + %define %%MOVDQU movdqu + %define %%MOVQ movq +%else + %define %%MOVDQU vmovdqu + %define %%MOVQ vmovq +%endif + +;; determine max byte size for store operation +%if %0 > 6 +%assign max_length_to_store %%MAX_LEN +%else +%assign max_length_to_store 16 +%endif + +%if max_length_to_store > 16 +%error "__simd_store macro invoked with MAX_LEN bigger than 16!" +%endif + + xor %%IDX, %%IDX ; zero idx + +%if max_length_to_store == 16 + test %%SIZE, 16 + jz %%lt16 + %%MOVDQU [%%DST], %%SRC + jmp %%end +%%lt16: +%endif + +%if max_length_to_store >= 8 + test %%SIZE, 8 + jz %%lt8 + %%MOVQ [%%DST + %%IDX], %%SRC + %%PSRLDQ %%SRC, 8 + add %%IDX, 8 +%%lt8: +%endif + + %%MOVQ %%TMP, %%SRC ; use GPR from now on + +%if max_length_to_store >= 4 + test %%SIZE, 4 + jz %%lt4 + mov [%%DST + %%IDX], DWORD(%%TMP) + shr %%TMP, 32 + add %%IDX, 4 +%%lt4: +%endif + + test %%SIZE, 2 + jz %%lt2 + mov [%%DST + %%IDX], WORD(%%TMP) + shr %%TMP, 16 + add %%IDX, 2 +%%lt2: + test %%SIZE, 1 + jz %%end + mov [%%DST + %%IDX], BYTE(%%TMP) +%%end: +%endm + +; This section defines a series of macros to load small to medium amounts +; (from 0 to 16 bytes) of data from memory to SIMD registers, +; where the size is variable but limited. +; +; The macros are all called as: +; simd_load DST, SRC, SIZE +; with the parameters defined as: +; DST : register: destination XMM register +; SRC : register: pointer to src data (not modified) +; SIZE : register: length in bytes (not modified) +; +; The name indicates the options. The name is of the form: +; simd_load__ +; where: +; is either "sse" or "avx" +; is either "15" or "16" and defines largest value of SIZE +; is blank or "_1". If "_1" then the min SIZE is 1 (otherwise 0) +; +; For example: +; simd_load_sse_16 : SSE, 0 <= size <= 16 +; simd_load_avx_15_1 : AVX, 1 <= size <= 15 + +%macro simd_load_sse_15_1 3 + __simd_load %1,%2,%3,0,0,SSE +%endm +%macro simd_load_sse_15 3 + __simd_load %1,%2,%3,1,0,SSE +%endm +%macro simd_load_sse_16_1 3 + __simd_load %1,%2,%3,0,1,SSE +%endm +%macro simd_load_sse_16 3 + __simd_load %1,%2,%3,1,1,SSE +%endm + +%macro simd_load_avx_15_1 3 + __simd_load %1,%2,%3,0,0,AVX +%endm +%macro simd_load_avx_15 3 + __simd_load %1,%2,%3,1,0,AVX +%endm +%macro simd_load_avx_16_1 3 + __simd_load %1,%2,%3,0,1,AVX +%endm +%macro simd_load_avx_16 3 + __simd_load %1,%2,%3,1,1,AVX +%endm + +%macro __simd_load 6 +%define %%DST %1 ; [out] destination XMM register +%define %%SRC %2 ; [in] pointer to src data +%define %%SIZE %3 ; [in] length in bytes (0-16 bytes) +%define %%ACCEPT_0 %4 ; 0 = min length = 1, 1 = min length = 0 +%define %%ACCEPT_16 %5 ; 0 = max length = 15 , 1 = max length = 16 +%define %%SIMDTYPE %6 ; "SSE" or "AVX" + +%ifidn %%SIMDTYPE, SSE + %define %%MOVDQU movdqu + %define %%PINSRB pinsrb + %define %%PINSRQ pinsrq + %define %%PXOR pxor +%else + %define %%MOVDQU vmovdqu + %define %%PINSRB vpinsrb + %define %%PINSRQ vpinsrq + %define %%PXOR vpxor +%endif + +%if (%%ACCEPT_16 != 0) + test %%SIZE, 16 + jz %%_skip_16 + %%MOVDQU %%DST, [%%SRC] + jmp %%end_load + +%%_skip_16: +%endif + %%PXOR %%DST, %%DST ; clear XMM register +%if (%%ACCEPT_0 != 0) + or %%SIZE, %%SIZE + je %%end_load +%endif + cmp %%SIZE, 1 + je %%_size_1 + cmp %%SIZE, 2 + je %%_size_2 + cmp %%SIZE, 3 + je %%_size_3 + cmp %%SIZE, 4 + je %%_size_4 + cmp %%SIZE, 5 + je %%_size_5 + cmp %%SIZE, 6 + je %%_size_6 + cmp %%SIZE, 7 + je %%_size_7 + cmp %%SIZE, 8 + je %%_size_8 + cmp %%SIZE, 9 + je %%_size_9 + cmp %%SIZE, 10 + je %%_size_10 + cmp %%SIZE, 11 + je %%_size_11 + cmp %%SIZE, 12 + je %%_size_12 + cmp %%SIZE, 13 + je %%_size_13 + cmp %%SIZE, 14 + je %%_size_14 + +%%_size_15: + %%PINSRB %%DST, [%%SRC + 14], 14 +%%_size_14: + %%PINSRB %%DST, [%%SRC + 13], 13 +%%_size_13: + %%PINSRB %%DST, [%%SRC + 12], 12 +%%_size_12: + %%PINSRB %%DST, [%%SRC + 11], 11 +%%_size_11: + %%PINSRB %%DST, [%%SRC + 10], 10 +%%_size_10: + %%PINSRB %%DST, [%%SRC + 9], 9 +%%_size_9: + %%PINSRB %%DST, [%%SRC + 8], 8 +%%_size_8: + %%PINSRQ %%DST, [%%SRC], 0 + jmp %%end_load +%%_size_7: + %%PINSRB %%DST, [%%SRC + 6], 6 +%%_size_6: + %%PINSRB %%DST, [%%SRC + 5], 5 +%%_size_5: + %%PINSRB %%DST, [%%SRC + 4], 4 +%%_size_4: + %%PINSRB %%DST, [%%SRC + 3], 3 +%%_size_3: + %%PINSRB %%DST, [%%SRC + 2], 2 +%%_size_2: + %%PINSRB %%DST, [%%SRC + 1], 1 +%%_size_1: + %%PINSRB %%DST, [%%SRC + 0], 0 +%%end_load: +%endm + +%endif ; ifndef __MEMCPY_ASM__ diff --git a/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h b/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h new file mode 100644 index 000000000..e0cc314d1 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h @@ -0,0 +1,375 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +/** + * @file memcpy_inline.h + * @brief Defines intrinsic memcpy functions used by the new hashing API + * + */ + +#ifndef _MEMCPY_H_ +#define _MEMCPY_H_ + +#if defined(__i386__) || defined(__x86_64__) || defined( _M_X64) \ + || defined(_M_IX86) +#include "intrinreg.h" +#endif +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(__i386__) || defined(__x86_64__) || defined( _M_X64) \ + || defined(_M_IX86) + +#define memcpy_varlen memcpy_sse_varlen +#define memcpy_fixedlen memcpy_sse_fixedlen + +#define memclr_varlen memclr_sse_varlen +#define memclr_fixedlen memclr_sse_fixedlen + +static inline void memcpy_lte32_sse_fixedlen(void* dst, const void* src, size_t nbytes); +static inline void memcpy_gte16_sse_fixedlen(void* dst, const void* src, size_t nbytes); +static inline void memcpy_sse_fixedlen (void* dst, const void* src, size_t nbytes); + +static inline void memcpy_lte32_sse_varlen (void* dst, const void* src, size_t nbytes); +static inline void memcpy_gte16_sse_varlen (void* dst, const void* src, size_t nbytes); +static inline void memcpy_sse_varlen (void* dst, const void* src, size_t nbytes); + + +static inline void memclr_lte32_sse_fixedlen(void* dst, size_t nbytes); +static inline void memclr_gte16_sse_fixedlen(void* dst, size_t nbytes); +static inline void memclr_sse_fixedlen (void* dst, size_t nbytes); + +static inline void memclr_lte32_sse_varlen (void* dst, size_t nbytes); +static inline void memclr_gte16_sse_varlen (void* dst, size_t nbytes); +static inline void memclr_sse_varlen (void* dst, size_t nbytes); + +#define MEMCPY_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, src, nbytes) \ + do { \ + intrinreg##N head; \ + intrinreg##N tail; \ + assert(N <= nbytes && nbytes <= 2*N); \ + if(N == 1 || (fixedwidth && nbytes==N) ) { \ + head = load_intrinreg##N(src); \ + store_intrinreg##N(dst, head); \ + } \ + else { \ + head = load_intrinreg##N(src); \ + tail = load_intrinreg##N((const void*)((const char*)src + (nbytes - N))); \ + store_intrinreg##N(dst, head); \ + store_intrinreg##N((void*)((char*)dst + (nbytes - N)), tail); \ + } \ + } while(0) + +#define MEMCLR_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, nbytes) \ + do { \ + const intrinreg##N zero = {0}; \ + assert(N <= nbytes && nbytes <= 2*N); \ + if(N == 1 || (fixedwidth && nbytes==N) ) { \ + store_intrinreg##N(dst, zero); \ + } \ + else { \ + store_intrinreg##N(dst, zero); \ + store_intrinreg##N((void*)((char*)dst + (nbytes - N)), zero); \ + } \ + } while(0) + +// Define load/store functions uniformly. + +#define load_intrinreg16(src) _mm_loadu_ps((const float*) src) +#define store_intrinreg16(dst,val) _mm_storeu_ps((float*) dst, val) + +static inline intrinreg8 load_intrinreg8(const void *src) +{ + return *(intrinreg8 *) src; +} + +static inline void store_intrinreg8(void *dst, intrinreg8 val) +{ + *(intrinreg8 *) dst = val; +} + +static inline intrinreg4 load_intrinreg4(const void *src) +{ + return *(intrinreg4 *) src; +} + +static inline void store_intrinreg4(void *dst, intrinreg4 val) +{ + *(intrinreg4 *) dst = val; +} + +static inline intrinreg2 load_intrinreg2(const void *src) +{ + return *(intrinreg2 *) src; +} + +static inline void store_intrinreg2(void *dst, intrinreg2 val) +{ + *(intrinreg2 *) dst = val; +} + +static inline intrinreg1 load_intrinreg1(const void *src) +{ + return *(intrinreg1 *) src; +} + +static inline void store_intrinreg1(void *dst, intrinreg1 val) +{ + *(intrinreg1 *) dst = val; +} + +static inline void memcpy_gte16_sse_fixedlen(void *dst, const void *src, size_t nbytes) +{ + size_t i; + size_t j; + intrinreg16 pool[4]; + size_t remaining_moves; + size_t tail_offset; + int do_tail; + assert(nbytes >= 16); + + for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4) { + for (j = 0; j < 4; j++) + pool[j] = + load_intrinreg16((const void *)((const char *)src + i + 16 * j)); + for (j = 0; j < 4; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]); + } + + remaining_moves = (nbytes - i) / 16; + tail_offset = nbytes - 16; + do_tail = (tail_offset & (16 - 1)); + + for (j = 0; j < remaining_moves; j++) + pool[j] = load_intrinreg16((const void *)((const char *)src + i + 16 * j)); + + if (do_tail) + pool[j] = load_intrinreg16((const void *)((const char *)src + tail_offset)); + + for (j = 0; j < remaining_moves; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]); + + if (do_tail) + store_intrinreg16((void *)((char *)dst + tail_offset), pool[j]); +} + +static inline void memclr_gte16_sse_fixedlen(void *dst, size_t nbytes) +{ + size_t i; + size_t j; + const intrinreg16 zero = { 0 }; + size_t remaining_moves; + size_t tail_offset; + int do_tail; + assert(nbytes >= 16); + + for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4) + for (j = 0; j < 4; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), zero); + + remaining_moves = (nbytes - i) / 16; + tail_offset = nbytes - 16; + do_tail = (tail_offset & (16 - 1)); + + for (j = 0; j < remaining_moves; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), zero); + + if (do_tail) + store_intrinreg16((void *)((char *)dst + tail_offset), zero); +} + +static inline void memcpy_lte32_sse_fixedlen(void *dst, const void *src, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 1, dst, src, nbytes); + else if (nbytes >= 8) + MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 1, dst, src, nbytes); + else if (nbytes >= 4) + MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 1, dst, src, nbytes); + else if (nbytes >= 2) + MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 1, dst, src, nbytes); + else if (nbytes >= 1) + MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 1, dst, src, nbytes); +} + +static inline void memclr_lte32_sse_fixedlen(void *dst, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 1, dst, nbytes); + else if (nbytes >= 8) + MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 1, dst, nbytes); + else if (nbytes >= 4) + MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 1, dst, nbytes); + else if (nbytes >= 2) + MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 1, dst, nbytes); + else if (nbytes >= 1) + MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 1, dst, nbytes); +} + +static inline void memcpy_lte32_sse_varlen(void *dst, const void *src, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 0, dst, src, nbytes); + else if (nbytes >= 8) + MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 0, dst, src, nbytes); + else if (nbytes >= 4) + MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 0, dst, src, nbytes); + else if (nbytes >= 2) + MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 0, dst, src, nbytes); + else if (nbytes >= 1) + MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 0, dst, src, nbytes); +} + +static inline void memclr_lte32_sse_varlen(void *dst, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 0, dst, nbytes); + else if (nbytes >= 8) + MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 0, dst, nbytes); + else if (nbytes >= 4) + MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 0, dst, nbytes); + else if (nbytes >= 2) + MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 0, dst, nbytes); + else if (nbytes >= 1) + MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 0, dst, nbytes); +} + +static inline void memcpy_gte16_sse_varlen(void *dst, const void *src, size_t nbytes) +{ + size_t i = 0; + intrinreg16 tail; + + assert(nbytes >= 16); + + while (i + 128 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 128); + i += 128; + } + if (i + 64 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 64); + i += 64; + } + if (i + 32 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 32); + i += 32; + } + if (i + 16 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 16); + } + + i = nbytes - 16; + tail = load_intrinreg16((const void *)((const char *)src + i)); + store_intrinreg16((void *)((char *)dst + i), tail); +} + +static inline void memclr_gte16_sse_varlen(void *dst, size_t nbytes) +{ + size_t i = 0; + const intrinreg16 zero = { 0 }; + + assert(nbytes >= 16); + + while (i + 128 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 128); + i += 128; + } + if (i + 64 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 64); + i += 64; + } + if (i + 32 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 32); + i += 32; + } + if (i + 16 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 16); + } + + i = nbytes - 16; + store_intrinreg16((void *)((char *)dst + i), zero); +} + +static inline void memcpy_sse_fixedlen(void *dst, const void *src, size_t nbytes) +{ + if (nbytes >= 16) + memcpy_gte16_sse_fixedlen(dst, src, nbytes); + else + memcpy_lte32_sse_fixedlen(dst, src, nbytes); +} + +static inline void memclr_sse_fixedlen(void *dst, size_t nbytes) +{ + if (nbytes >= 16) + memclr_gte16_sse_fixedlen(dst, nbytes); + else + memclr_lte32_sse_fixedlen(dst, nbytes); +} + +static inline void memcpy_sse_varlen(void *dst, const void *src, size_t nbytes) +{ + if (nbytes >= 16) + memcpy_gte16_sse_varlen(dst, src, nbytes); + else + memcpy_lte32_sse_varlen(dst, src, nbytes); +} + +static inline void memclr_sse_varlen(void *dst, size_t nbytes) +{ + if (nbytes >= 16) + memclr_gte16_sse_varlen(dst, nbytes); + else + memclr_lte32_sse_varlen(dst, nbytes); +} +#else +#define memcpy_varlen memcpy +#define memcpy_fixedlen memcpy + +#define memclr_varlen(dst,n) memset(dst,0,n) +#define memclr_fixedlen(dst,n) memset(dst,0,n) + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // __MEMCPY_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h new file mode 100644 index 000000000..eac3be031 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h @@ -0,0 +1,315 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_H_ +#define _MH_SHA1_H_ + +/** + * @file mh_sha1.h + * @brief mh_sha1 function prototypes and structures + * + * Interface for mh_sha1 functions + * + * mh_sha1 Init-Update..Update-Finalize + * + * This file defines the interface to optimized functions used in mh_sha1. + * The definition of multi-hash SHA1(mh_sha1, for short) is: Pad the buffer + * in SHA1 style until the total length is a multiple of 4*16*16 + * (words-width * parallel-segments * block-size); Hash the buffer in + * parallel, generating digests of 4*16*5 (words-width*parallel-segments* + * digest-size); Treat the set of digests as another data buffer, and + * generate a final SHA1 digest for it. + * + * + * Example + * \code + * uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; + * struct mh_sha1_ctx *ctx; + * + * ctx = malloc(sizeof(struct mh_sha1_ctx)); + * mh_sha1_init(ctx); + * mh_sha1_update(ctx, buff, block_len); + * mh_sha1_finalize(ctx, mh_sha1_digest); + * \endcode + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +// External Interface Definition +#define HASH_SEGS 16 +#define SHA1_BLOCK_SIZE 64 +#define MH_SHA1_BLOCK_SIZE (HASH_SEGS * SHA1_BLOCK_SIZE) +#define SHA1_DIGEST_WORDS 5 +#define AVX512_ALIGNED 64 + +/** @brief Holds info describing a single mh_sha1 + * + * It is better to use heap to allocate this data structure to avoid stack overflow. + * +*/ +struct mh_sha1_ctx { + uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; //!< the digest of multi-hash SHA1 + + uint64_t total_length; + //!< Parameters for update feature, describe the lengths of input buffers in bytes + uint8_t partial_block_buffer [MH_SHA1_BLOCK_SIZE * 2]; + //!< Padding the tail of input data for SHA1 + uint8_t mh_sha1_interim_digests[sizeof(uint32_t) * SHA1_DIGEST_WORDS * HASH_SEGS]; + //!< Storing the SHA1 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose. + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE + AVX512_ALIGNED]; + //!< Re-structure sha1 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose. +}; + +/** + * @enum mh_sha1_ctx_error + * @brief CTX error flags + */ +enum mh_sha1_ctx_error{ + MH_SHA1_CTX_ERROR_NONE = 0, //!< MH_SHA1_CTX_ERROR_NONE + MH_SHA1_CTX_ERROR_NULL = -1, //!< MH_SHA1_CTX_ERROR_NULL +}; + + +/******************************************************************* + * mh_sha1 API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the mh_sha1_ctx structure. + * + * @param ctx Structure holding mh_sha1 info + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_init (struct mh_sha1_ctx* ctx); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_update (struct mh_sha1_ctx * ctx, const void* buffer, uint32_t len); + +/** + * @brief Finalize the message digests for multi-hash sha1. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_finalize (struct mh_sha1_ctx* ctx, void* mh_sha1_digest); + +/******************************************************************* + * multi-types of mh_sha1 internal API + * + * XXXX The multi-binary version + * XXXX_base The C code version which used to display the algorithm + * XXXX_sse The version uses a ASM function optimized for SSE + * XXXX_avx The version uses a ASM function optimized for AVX + * XXXX_avx2 The version uses a ASM function optimized for AVX2 + * XXXX_avx512 The version uses a ASM function optimized for AVX512 + * + ******************************************************************/ + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * Base update() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_base (struct mh_sha1_ctx* ctx, const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires SSE + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_sse (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_avx (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX2 + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_avx2 (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX512 + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_avx512 (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + + +/** + * @brief Finalize the message digests for multi-hash sha1. + * + * Place the message digests in mh_sha1_digest, + * which must have enough space for the outputs. + * Base Finalize() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_base (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires SSE + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_sse (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires AVX + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_avx (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires AVX2 + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_avx2 (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires AVX512 + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_avx512 (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h new file mode 100644 index 000000000..1c07306ec --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h @@ -0,0 +1,327 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_MURMUR3_X64_128_H_ +#define _MH_SHA1_MURMUR3_X64_128_H_ + +/** + * @file mh_sha1_murmur3_x64_128.h + * @brief mh_sha1_murmur3_x64_128 function prototypes and structures + * + * Interface for mh_sha1_murmur3_x64_128 functions + * + * mh_sha1_murmur3_x64_128 Init-Update..Update-Finalize + * + * This file defines the interface to optimized functions used in mh_sha1 and + * mh_sha1_murmur3_x64_128. The definition of multi-hash SHA1(mh_sha1, + * for short) is: Pad the buffer in SHA1 style until the total length is a multiple + * of 4*16*16(words-width * parallel-segments * block-size); Hash the buffer + * in parallel, generating digests of 4*16*5 (words-width*parallel-segments* + * digest-size); Treat the set of digests as another data buffer, and generate + * a final SHA1 digest for it. mh_sha1_murmur3_x64_128 is a stitching function + * which will get a murmur3_x64_128 digest while generate mh_sha1 digest. + * + * + * Example + * \code + * uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; + * uint32_t murmur_digest[MURMUR3_x64_128_DIGEST_WORDS]; + * struct mh_sha1_murmur3_x64_128_ctx *ctx; + * + * ctx = malloc(sizeof(struct mh_sha1_murmur3_x64_128_ctx)); + * mh_sha1_murmur3_x64_128_init(ctx, 0); + * mh_sha1_murmur3_x64_128_update(ctx, buff, block_len); + * mh_sha1_murmur3_x64_128_finalize(ctx, mh_sha1_digest, + * murmur_digest); + * \endcode + */ + +#include +#include "mh_sha1.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +// External Interface Definition +// Add murmur3_x64_128 definition +#define MUR_BLOCK_SIZE (2 * sizeof(uint64_t)) +#define MURMUR3_x64_128_DIGEST_WORDS 4 + +/** @brief Holds info describing a single mh_sha1_murmur3_x64_128 + * + * It is better to use heap to allocate this data structure to avoid stack overflow. + * +*/ +struct mh_sha1_murmur3_x64_128_ctx { + uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; //!< the digest of multi-hash SHA1 + uint32_t murmur3_x64_128_digest[MURMUR3_x64_128_DIGEST_WORDS]; //!< the digest of murmur3_x64_128 + + uint64_t total_length; + //!< Parameters for update feature, describe the lengths of input buffers in bytes + uint8_t partial_block_buffer [MH_SHA1_BLOCK_SIZE * 2]; + //!< Padding the tail of input data for SHA1 + uint8_t mh_sha1_interim_digests[sizeof(uint32_t) * SHA1_DIGEST_WORDS * HASH_SEGS]; + //!< Storing the SHA1 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose. + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE + AVX512_ALIGNED]; + //!< Re-structure sha1 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose. +}; + +/** + * @enum mh_sha1_murmur3_ctx_error + * @brief CTX error flags + */ +enum mh_sha1_murmur3_ctx_error{ + MH_SHA1_MURMUR3_CTX_ERROR_NONE = 0, //!< MH_SHA1_MURMUR3_CTX_ERROR_NONE + MH_SHA1_MURMUR3_CTX_ERROR_NULL = -1, //! mh_sha256 Init-Update..Update-Finalize + * + * This file defines the interface to optimized functions used in mh_sha256. + * The definition of multi-hash SHA256(mh_sha256, for short) is: Pad the buffer + * in SHA256 style until the total length is a multiple of 4*16*16 + * (words-width * parallel-segments * block-size); Hash the buffer in + * parallel, generating digests of 4*16*8 (words-width*parallel-segments* + * digest-size); Treat the set of digests as another data buffer, and + * generate a final SHA256 digest for it. + * + * + * Example + * \code + * uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]; + * struct mh_sha256_ctx *ctx; + * + * ctx = malloc(sizeof(struct mh_sha256_ctx)); + * mh_sha256_init(ctx); + * mh_sha256_update(ctx, buff, block_len); + * mh_sha256_finalize(ctx, mh_sha256_digest); + * \endcode + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + + +// External Interface Definition +#define HASH_SEGS 16 +#define SHA256_BLOCK_SIZE 64 +#define MH_SHA256_BLOCK_SIZE (HASH_SEGS * SHA256_BLOCK_SIZE) +#define SHA256_DIGEST_WORDS 8 +#define AVX512_ALIGNED 64 + +/** @brief Holds info describing a single mh_sha256 + * + * It is better to use heap to allocate this data structure to avoid stack overflow. + * +*/ +struct mh_sha256_ctx { + uint32_t mh_sha256_digest[SHA256_DIGEST_WORDS]; //!< the digest of multi-hash SHA256 + + uint64_t total_length; + //!< Parameters for update feature, describe the lengths of input buffers in bytes + uint8_t partial_block_buffer [MH_SHA256_BLOCK_SIZE * 2]; + //!< Padding the tail of input data for SHA256 + uint8_t mh_sha256_interim_digests[sizeof(uint32_t) * SHA256_DIGEST_WORDS * HASH_SEGS]; + //!< Storing the SHA256 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose. + uint8_t frame_buffer[MH_SHA256_BLOCK_SIZE + AVX512_ALIGNED]; + //!< Re-structure sha256 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose. +}; + +/** + * @enum mh_sha256_ctx_error + * @brief CTX error flags + */ +enum mh_sha256_ctx_error{ + MH_SHA256_CTX_ERROR_NONE = 0, //!< MH_SHA256_CTX_ERROR_NONE + MH_SHA256_CTX_ERROR_NULL = -1, //!< MH_SHA256_CTX_ERROR_NULL +}; + + +/******************************************************************* + * mh_sha256 API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the mh_sha256_ctx structure. + * + * @param ctx Structure holding mh_sha256 info + * @returns int Return 0 if the function runs without errors + */ +int mh_sha256_init (struct mh_sha256_ctx* ctx); + +/** + * @brief Multi-hash sha256 update. + * + * Can be called repeatedly to update hashes with new input data. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha256 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + */ +int mh_sha256_update (struct mh_sha256_ctx * ctx, const void* buffer, uint32_t len); + +/** + * @brief Finalize the message digests for multi-hash sha256. + * + * Place the message digest in mh_sha256_digest which must have enough space + * for the outputs. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha256 info + * @param mh_sha256_digest The digest of mh_sha256 + * @returns int Return 0 if the function runs without errors + */ +int mh_sha256_finalize (struct mh_sha256_ctx* ctx, void* mh_sha256_digest); + +/******************************************************************* + * multi-types of mh_sha256 internal API + * + * XXXX The multi-binary version + * XXXX_base The C code version which used to display the algorithm + * XXXX_sse The version uses a ASM function optimized for SSE + * XXXX_avx The version uses a ASM function optimized for AVX + * XXXX_avx2 The version uses a ASM function optimized for AVX2 + * XXXX_avx512 The version uses a ASM function optimized for AVX512 + * + ******************************************************************/ + +/** + * @brief Multi-hash sha256 update. + * + * Can be called repeatedly to update hashes with new input data. + * Base update() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha256 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_update_base (struct mh_sha256_ctx* ctx, const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha256 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires SSE + * + * @param ctx Structure holding mh_sha256 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_update_sse (struct mh_sha256_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha256 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX + * + * @param ctx Structure holding mh_sha256 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_update_avx (struct mh_sha256_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha256 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX2 + * + * @param ctx Structure holding mh_sha256 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_update_avx2 (struct mh_sha256_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha256 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX512 + * + * @param ctx Structure holding mh_sha256 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_update_avx512 (struct mh_sha256_ctx * ctx, + const void* buffer, uint32_t len); + + +/** + * @brief Finalize the message digests for multi-hash sha256. + * + * Place the message digests in mh_sha256_digest, + * which must have enough space for the outputs. + * Base Finalize() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha256 info + * @param mh_sha256_digest The digest of mh_sha256 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_finalize_base (struct mh_sha256_ctx* ctx, + void* mh_sha256_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha256_digest which must have enough space + * for the outputs. + * + * @requires SSE + * + * @param ctx Structure holding mh_sha256 info + * @param mh_sha256_digest The digest of mh_sha256 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_finalize_sse (struct mh_sha256_ctx* ctx, + void* mh_sha256_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha256_digest which must have enough space + * for the outputs. + * + * @requires AVX + * + * @param ctx Structure holding mh_sha256 info + * @param mh_sha256_digest The digest of mh_sha256 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_finalize_avx (struct mh_sha256_ctx* ctx, + void* mh_sha256_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha256_digest which must have enough space + * for the outputs. + * + * @requires AVX2 + * + * @param ctx Structure holding mh_sha256 info + * @param mh_sha256_digest The digest of mh_sha256 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_finalize_avx2 (struct mh_sha256_ctx* ctx, + void* mh_sha256_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha256_digest which must have enough space + * for the outputs. + * + * @requires AVX512 + * + * @param ctx Structure holding mh_sha256 info + * @param mh_sha256_digest The digest of mh_sha256 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha256_finalize_avx512 (struct mh_sha256_ctx* ctx, + void* mh_sha256_digest); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h b/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h new file mode 100644 index 000000000..ac88f7b0a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h @@ -0,0 +1,112 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MULTI_BUFFER_H_ +#define _MULTI_BUFFER_H_ + +/** + * @file multi_buffer.h + * @brief Multi-buffer common fields + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @enum JOB_STS + * @brief Job return codes + */ + +typedef enum {STS_UNKNOWN = 0, //!< STS_UNKNOWN + STS_BEING_PROCESSED = 1,//!< STS_BEING_PROCESSED + STS_COMPLETED = 2, //!< STS_COMPLETED + STS_INTERNAL_ERROR, //!< STS_INTERNAL_ERROR + STS_ERROR //!< STS_ERROR +} JOB_STS; + +#define HASH_MB_NO_FLAGS 0 +#define HASH_MB_FIRST 1 +#define HASH_MB_LAST 2 + +/* Common flags for the new API only + * */ + +/** + * @enum HASH_CTX_FLAG + * @brief CTX job type + */ +typedef enum { + HASH_UPDATE = 0x00, //!< HASH_UPDATE + HASH_FIRST = 0x01, //!< HASH_FIRST + HASH_LAST = 0x02, //!< HASH_LAST + HASH_ENTIRE = 0x03, //!< HASH_ENTIRE +} HASH_CTX_FLAG; + +/** + * @enum HASH_CTX_STS + * @brief CTX status flags + */ +typedef enum { + HASH_CTX_STS_IDLE = 0x00, //!< HASH_CTX_STS_IDLE + HASH_CTX_STS_PROCESSING = 0x01, //!< HASH_CTX_STS_PROCESSING + HASH_CTX_STS_LAST = 0x02, //!< HASH_CTX_STS_LAST + HASH_CTX_STS_COMPLETE = 0x04, //!< HASH_CTX_STS_COMPLETE +} HASH_CTX_STS; + +/** + * @enum HASH_CTX_ERROR + * @brief CTX error flags + */ +typedef enum { + HASH_CTX_ERROR_NONE = 0, //!< HASH_CTX_ERROR_NONE + HASH_CTX_ERROR_INVALID_FLAGS = -1, //!< HASH_CTX_ERROR_INVALID_FLAGS + HASH_CTX_ERROR_ALREADY_PROCESSING = -2, //!< HASH_CTX_ERROR_ALREADY_PROCESSING + HASH_CTX_ERROR_ALREADY_COMPLETED = -3, //!< HASH_CTX_ERROR_ALREADY_COMPLETED +} HASH_CTX_ERROR; + + +#define hash_ctx_user_data(ctx) ((ctx)->user_data) +#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) +#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) +#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) +#define hash_ctx_status(ctx) ((ctx)->status) +#define hash_ctx_error(ctx) ((ctx)->error) +#define hash_ctx_init(ctx) \ + do { \ + (ctx)->error = HASH_CTX_ERROR_NONE; \ + (ctx)->status = HASH_CTX_STS_COMPLETE; \ + } while(0) + +#ifdef __cplusplus +} +#endif + +#endif // _MULTI_BUFFER_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm b/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm new file mode 100644 index 000000000..4dd019319 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm @@ -0,0 +1,517 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef _MULTIBINARY_ASM_ +%define _MULTIBINARY_ASM_ + +%ifidn __OUTPUT_FORMAT__, elf32 + %define mbin_def_ptr dd + %define mbin_ptr_sz dword + %define mbin_rdi edi + %define mbin_rsi esi + %define mbin_rax eax + %define mbin_rbx ebx + %define mbin_rcx ecx + %define mbin_rdx edx +%else + %define mbin_def_ptr dq + %define mbin_ptr_sz qword + %define mbin_rdi rdi + %define mbin_rsi rsi + %define mbin_rax rax + %define mbin_rbx rbx + %define mbin_rcx rcx + %define mbin_rdx rdx +%endif + +%ifndef AS_FEATURE_LEVEL +%define AS_FEATURE_LEVEL 4 +%endif + +;;;; +; multibinary macro: +; creates the visable entry point that uses HW optimized call pointer +; creates the init of the HW optimized call pointer +;;;; +%macro mbin_interface 1 + ;;;; + ; *_dispatched is defaulted to *_mbinit and replaced on first call. + ; Therefore, *_dispatch_init is only executed on first call. + ;;;; + section .data + %1_dispatched: + mbin_def_ptr %1_mbinit + + section .text + mk_global %1, function + %1_mbinit: + ;;; only called the first time to setup hardware match + call %1_dispatch_init + ;;; falls thru to execute the hw optimized code + %1: + jmp mbin_ptr_sz [%1_dispatched] +%endmacro + +;;;;; +; mbin_dispatch_init parameters +; Use this function when SSE/00/01 is a minimum requirement +; 1-> function name +; 2-> SSE/00/01 optimized function used as base +; 3-> AVX or AVX/02 opt func +; 4-> AVX2 or AVX/04 opt func +;;;;; +%macro mbin_dispatch_init 4 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01 + + mov eax, 1 + cpuid + and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func + jne _%1_init_done ; AVX is not available so end + mov mbin_rsi, mbin_rbx + + ;; Try for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func + cmovne mbin_rsi, mbin_rbx + + ;; Does it have xmm and ymm support + xor ecx, ecx + xgetbv + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + je _%1_init_done + lea mbin_rsi, [%2 WRT_OPT] + + _%1_init_done: + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +;;;;; +; mbin_dispatch_init2 parameters +; Cases where only base functions are available +; 1-> function name +; 2-> base function +;;;;; +%macro mbin_dispatch_init2 2 + section .text + %1_dispatch_init: + push mbin_rsi + lea mbin_rsi, [%2 WRT_OPT] ; Default + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +;;;;; +; mbin_dispatch_init5 parameters +; 1-> function name +; 2-> base function +; 3-> SSE4_1 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +;;;;; +%macro mbin_dispatch_init5 5 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + ; Test for SSE4.1 + test ecx, FLAG_CPUID1_ECX_SSE4_1 + lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func + cmovne mbin_rsi, mbin_rbx + + and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func + jne _%1_init_done ; AVX is not available so end + mov mbin_rsi, mbin_rbx + + ;; Try for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func + cmovne mbin_rsi, mbin_rbx + + ;; Does it have xmm and ymm support + xor ecx, ecx + xgetbv + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + je _%1_init_done + lea mbin_rsi, [%3 WRT_OPT] + + _%1_init_done: + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +%if AS_FEATURE_LEVEL >= 6 +;;;;; +; mbin_dispatch_init6 parameters +; 1-> function name +; 2-> base function +; 3-> SSE4_1 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +; 6-> AVX512/06 opt func +;;;;; +%macro mbin_dispatch_init6 6 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + push mbin_rdi + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + mov ebx, ecx ; save cpuid1.ecx + test ecx, FLAG_CPUID1_ECX_SSE4_1 + je _%1_init_done ; Use base function if no SSE4_1 + lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt + + ;; Test for XMM_YMM support/AVX + test ecx, FLAG_CPUID1_ECX_OSXSAVE + je _%1_init_done + xor ecx, ecx + xgetbv ; xcr -> edx:eax + mov edi, eax ; save xgetvb.eax + + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + jne _%1_init_done + test ebx, FLAG_CPUID1_ECX_AVX + je _%1_init_done + lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt + + ;; Test for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + je _%1_init_done ; No AVX2 possible + lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func + + ;; Test for AVX512 + and edi, FLAG_XGETBV_EAX_ZMM_OPM + cmp edi, FLAG_XGETBV_EAX_ZMM_OPM + jne _%1_init_done ; No AVX512 possible + and ebx, FLAGS_CPUID7_EBX_AVX512_G1 + cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 + lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt + cmove mbin_rsi, mbin_rbx + + _%1_init_done: + pop mbin_rdi + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +%else +%macro mbin_dispatch_init6 6 + mbin_dispatch_init5 %1, %2, %3, %4, %5 +%endmacro +%endif + +%if AS_FEATURE_LEVEL >= 10 +;;;;; +; mbin_dispatch_init7 parameters +; 1-> function name +; 2-> base function +; 3-> SSE4_2 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +; 6-> AVX512/06 opt func +; 7-> AVX512 Update/10 opt func +;;;;; +%macro mbin_dispatch_init7 7 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + push mbin_rdi + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + mov ebx, ecx ; save cpuid1.ecx + test ecx, FLAG_CPUID1_ECX_SSE4_2 + je _%1_init_done ; Use base function if no SSE4_2 + lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt + + ;; Test for XMM_YMM support/AVX + test ecx, FLAG_CPUID1_ECX_OSXSAVE + je _%1_init_done + xor ecx, ecx + xgetbv ; xcr -> edx:eax + mov edi, eax ; save xgetvb.eax + + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + jne _%1_init_done + test ebx, FLAG_CPUID1_ECX_AVX + je _%1_init_done + lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt + + ;; Test for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + je _%1_init_done ; No AVX2 possible + lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func + + ;; Test for AVX512 + and edi, FLAG_XGETBV_EAX_ZMM_OPM + cmp edi, FLAG_XGETBV_EAX_ZMM_OPM + jne _%1_init_done ; No AVX512 possible + and ebx, FLAGS_CPUID7_EBX_AVX512_G1 + cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 + lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt + cmove mbin_rsi, mbin_rbx + + and ecx, FLAGS_CPUID7_ECX_AVX512_G2 + cmp ecx, FLAGS_CPUID7_ECX_AVX512_G2 + lea mbin_rbx, [%7 WRT_OPT] ; AVX512/06 opt + cmove mbin_rsi, mbin_rbx + + _%1_init_done: + pop mbin_rdi + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro +%else +%macro mbin_dispatch_init7 7 + mbin_dispatch_init6 %1, %2, %3, %4, %5, %6 +%endmacro +%endif + +;;;;; +; mbin_dispatch_sse_to_avx2_shani parameters +; derived from mbin_dispatch_init +; Use this function when SSE/00/01 is a minimum requirement +; 1-> function name +; 2-> SSE/00/01 optimized function used as base +; 3-> AVX or AVX/02 opt func +; 4-> AVX2 or AVX/04 opt func +; 5-> SHANI opt for GLM +;;;;; +%macro mbin_dispatch_sse_to_avx2_shani 5 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01 + + mov eax, 1 + cpuid + and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func + jne _%1_shani_check ; AVX is not available so check shani + mov mbin_rsi, mbin_rbx + + ;; Try for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func + cmovne mbin_rsi, mbin_rbx + + ;; Does it have xmm and ymm support + xor ecx, ecx + xgetbv + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + je _%1_init_done + lea mbin_rsi, [%2 WRT_OPT] + + _%1_init_done: + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret + + _%1_shani_check: + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_SHA + lea mbin_rbx, [%5 WRT_OPT] ; SHANI opt func + cmovne mbin_rsi, mbin_rbx + jmp _%1_init_done ; end +%endmacro + +;;;;; +; mbin_dispatch_base_to_avx512_shani parameters +; derived from mbin_dispatch_init6 +; 1-> function name +; 2-> base function +; 3-> SSE4_2 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +; 6-> AVX512/06 opt func +; 7-> SHANI opt for GLM +; 8-> SHANI opt for CNL +;;;;; +%macro mbin_dispatch_base_to_avx512_shani 8 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + push mbin_rdi + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + mov ebx, ecx ; save cpuid1.ecx + test ecx, FLAG_CPUID1_ECX_SSE4_2 + je _%1_init_done ; Use base function if no SSE4_2 + lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt + + ;; Test for XMM_YMM support/AVX + test ecx, FLAG_CPUID1_ECX_OSXSAVE + je _%1_shani_check + xor ecx, ecx + xgetbv ; xcr -> edx:eax + mov edi, eax ; save xgetvb.eax + + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + jne _%1_shani_check + test ebx, FLAG_CPUID1_ECX_AVX + je _%1_shani_check + lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt + + ;; Test for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + je _%1_init_done ; No AVX2 possible + lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func + + ;; Test for AVX512 + and edi, FLAG_XGETBV_EAX_ZMM_OPM + cmp edi, FLAG_XGETBV_EAX_ZMM_OPM + jne _%1_init_done ; No AVX512 possible + and ebx, FLAGS_CPUID7_EBX_AVX512_G1 + cmp ebx, FLAGS_CPUID7_EBX_AVX512_G1 + lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt + cmove mbin_rsi, mbin_rbx + + ;; Test for SHANI + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_SHA + lea mbin_rbx, [%8 WRT_OPT] ; SHANI opt sse func + cmovne mbin_rsi, mbin_rbx + + _%1_init_done: + pop mbin_rdi + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret + + _%1_shani_check: + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_SHA + lea mbin_rbx, [%7 WRT_OPT] ; SHANI opt sse func + cmovne mbin_rsi, mbin_rbx + jmp _%1_init_done ; end +%endmacro + + + +%endif ; ifndef _MULTIBINARY_ASM_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm b/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm new file mode 100644 index 000000000..717dd0503 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm @@ -0,0 +1,442 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2019 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef _REG_SIZES_ASM_ +%define _REG_SIZES_ASM_ + +%ifndef AS_FEATURE_LEVEL +%define AS_FEATURE_LEVEL 4 +%endif + +%define EFLAGS_HAS_CPUID (1<<21) +%define FLAG_CPUID1_ECX_CLMUL (1<<1) +%define FLAG_CPUID1_EDX_SSE2 (1<<26) +%define FLAG_CPUID1_ECX_SSE3 (1) +%define FLAG_CPUID1_ECX_SSE4_1 (1<<19) +%define FLAG_CPUID1_ECX_SSE4_2 (1<<20) +%define FLAG_CPUID1_ECX_POPCNT (1<<23) +%define FLAG_CPUID1_ECX_AESNI (1<<25) +%define FLAG_CPUID1_ECX_OSXSAVE (1<<27) +%define FLAG_CPUID1_ECX_AVX (1<<28) +%define FLAG_CPUID1_EBX_AVX2 (1<<5) + +%define FLAG_CPUID7_EBX_AVX2 (1<<5) +%define FLAG_CPUID7_EBX_AVX512F (1<<16) +%define FLAG_CPUID7_EBX_AVX512DQ (1<<17) +%define FLAG_CPUID7_EBX_AVX512IFMA (1<<21) +%define FLAG_CPUID7_EBX_AVX512PF (1<<26) +%define FLAG_CPUID7_EBX_AVX512ER (1<<27) +%define FLAG_CPUID7_EBX_AVX512CD (1<<28) +%define FLAG_CPUID7_EBX_SHA (1<<29) +%define FLAG_CPUID7_EBX_AVX512BW (1<<30) +%define FLAG_CPUID7_EBX_AVX512VL (1<<31) + +%define FLAG_CPUID7_ECX_AVX512VBMI (1<<1) +%define FLAG_CPUID7_ECX_AVX512VBMI2 (1 << 6) +%define FLAG_CPUID7_ECX_GFNI (1 << 8) +%define FLAG_CPUID7_ECX_VAES (1 << 9) +%define FLAG_CPUID7_ECX_VPCLMULQDQ (1 << 10) +%define FLAG_CPUID7_ECX_VNNI (1 << 11) +%define FLAG_CPUID7_ECX_BITALG (1 << 12) +%define FLAG_CPUID7_ECX_VPOPCNTDQ (1 << 14) + +%define FLAGS_CPUID7_EBX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ) +%define FLAGS_CPUID7_ECX_AVX512_G2 (FLAG_CPUID7_ECX_AVX512VBMI2 | FLAG_CPUID7_ECX_GFNI | FLAG_CPUID7_ECX_VAES | FLAG_CPUID7_ECX_VPCLMULQDQ | FLAG_CPUID7_ECX_VNNI | FLAG_CPUID7_ECX_BITALG | FLAG_CPUID7_ECX_VPOPCNTDQ) + +%define FLAG_XGETBV_EAX_XMM (1<<1) +%define FLAG_XGETBV_EAX_YMM (1<<2) +%define FLAG_XGETBV_EAX_XMM_YMM 0x6 +%define FLAG_XGETBV_EAX_ZMM_OPM 0xe0 + +%define FLAG_CPUID1_EAX_AVOTON 0x000406d0 +%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0 + +; define d and w variants for registers + +%define raxd eax +%define raxw ax +%define raxb al + +%define rbxd ebx +%define rbxw bx +%define rbxb bl + +%define rcxd ecx +%define rcxw cx +%define rcxb cl + +%define rdxd edx +%define rdxw dx +%define rdxb dl + +%define rsid esi +%define rsiw si +%define rsib sil + +%define rdid edi +%define rdiw di +%define rdib dil + +%define rbpd ebp +%define rbpw bp +%define rbpb bpl + +%define zmm0x xmm0 +%define zmm1x xmm1 +%define zmm2x xmm2 +%define zmm3x xmm3 +%define zmm4x xmm4 +%define zmm5x xmm5 +%define zmm6x xmm6 +%define zmm7x xmm7 +%define zmm8x xmm8 +%define zmm9x xmm9 +%define zmm10x xmm10 +%define zmm11x xmm11 +%define zmm12x xmm12 +%define zmm13x xmm13 +%define zmm14x xmm14 +%define zmm15x xmm15 +%define zmm16x xmm16 +%define zmm17x xmm17 +%define zmm18x xmm18 +%define zmm19x xmm19 +%define zmm20x xmm20 +%define zmm21x xmm21 +%define zmm22x xmm22 +%define zmm23x xmm23 +%define zmm24x xmm24 +%define zmm25x xmm25 +%define zmm26x xmm26 +%define zmm27x xmm27 +%define zmm28x xmm28 +%define zmm29x xmm29 +%define zmm30x xmm30 +%define zmm31x xmm31 + +%define ymm0x xmm0 +%define ymm1x xmm1 +%define ymm2x xmm2 +%define ymm3x xmm3 +%define ymm4x xmm4 +%define ymm5x xmm5 +%define ymm6x xmm6 +%define ymm7x xmm7 +%define ymm8x xmm8 +%define ymm9x xmm9 +%define ymm10x xmm10 +%define ymm11x xmm11 +%define ymm12x xmm12 +%define ymm13x xmm13 +%define ymm14x xmm14 +%define ymm15x xmm15 +%define ymm16x xmm16 +%define ymm17x xmm17 +%define ymm18x xmm18 +%define ymm19x xmm19 +%define ymm20x xmm20 +%define ymm21x xmm21 +%define ymm22x xmm22 +%define ymm23x xmm23 +%define ymm24x xmm24 +%define ymm25x xmm25 +%define ymm26x xmm26 +%define ymm27x xmm27 +%define ymm28x xmm28 +%define ymm29x xmm29 +%define ymm30x xmm30 +%define ymm31x xmm31 + +%define xmm0x xmm0 +%define xmm1x xmm1 +%define xmm2x xmm2 +%define xmm3x xmm3 +%define xmm4x xmm4 +%define xmm5x xmm5 +%define xmm6x xmm6 +%define xmm7x xmm7 +%define xmm8x xmm8 +%define xmm9x xmm9 +%define xmm10x xmm10 +%define xmm11x xmm11 +%define xmm12x xmm12 +%define xmm13x xmm13 +%define xmm14x xmm14 +%define xmm15x xmm15 +%define xmm16x xmm16 +%define xmm17x xmm17 +%define xmm18x xmm18 +%define xmm19x xmm19 +%define xmm20x xmm20 +%define xmm21x xmm21 +%define xmm22x xmm22 +%define xmm23x xmm23 +%define xmm24x xmm24 +%define xmm25x xmm25 +%define xmm26x xmm26 +%define xmm27x xmm27 +%define xmm28x xmm28 +%define xmm29x xmm29 +%define xmm30x xmm30 +%define xmm31x xmm31 + +%define zmm0y ymm0 +%define zmm1y ymm1 +%define zmm2y ymm2 +%define zmm3y ymm3 +%define zmm4y ymm4 +%define zmm5y ymm5 +%define zmm6y ymm6 +%define zmm7y ymm7 +%define zmm8y ymm8 +%define zmm9y ymm9 +%define zmm10y ymm10 +%define zmm11y ymm11 +%define zmm12y ymm12 +%define zmm13y ymm13 +%define zmm14y ymm14 +%define zmm15y ymm15 +%define zmm16y ymm16 +%define zmm17y ymm17 +%define zmm18y ymm18 +%define zmm19y ymm19 +%define zmm20y ymm20 +%define zmm21y ymm21 +%define zmm22y ymm22 +%define zmm23y ymm23 +%define zmm24y ymm24 +%define zmm25y ymm25 +%define zmm26y ymm26 +%define zmm27y ymm27 +%define zmm28y ymm28 +%define zmm29y ymm29 +%define zmm30y ymm30 +%define zmm31y ymm31 + +%define xmm0y ymm0 +%define xmm1y ymm1 +%define xmm2y ymm2 +%define xmm3y ymm3 +%define xmm4y ymm4 +%define xmm5y ymm5 +%define xmm6y ymm6 +%define xmm7y ymm7 +%define xmm8y ymm8 +%define xmm9y ymm9 +%define xmm10y ymm10 +%define xmm11y ymm11 +%define xmm12y ymm12 +%define xmm13y ymm13 +%define xmm14y ymm14 +%define xmm15y ymm15 +%define xmm16y ymm16 +%define xmm17y ymm17 +%define xmm18y ymm18 +%define xmm19y ymm19 +%define xmm20y ymm20 +%define xmm21y ymm21 +%define xmm22y ymm22 +%define xmm23y ymm23 +%define xmm24y ymm24 +%define xmm25y ymm25 +%define xmm26y ymm26 +%define xmm27y ymm27 +%define xmm28y ymm28 +%define xmm29y ymm29 +%define xmm30y ymm30 +%define xmm31y ymm31 + +%define xmm0z zmm0 +%define xmm1z zmm1 +%define xmm2z zmm2 +%define xmm3z zmm3 +%define xmm4z zmm4 +%define xmm5z zmm5 +%define xmm6z zmm6 +%define xmm7z zmm7 +%define xmm8z zmm8 +%define xmm9z zmm9 +%define xmm10z zmm10 +%define xmm11z zmm11 +%define xmm12z zmm12 +%define xmm13z zmm13 +%define xmm14z zmm14 +%define xmm15z zmm15 +%define xmm16z zmm16 +%define xmm17z zmm17 +%define xmm18z zmm18 +%define xmm19z zmm19 +%define xmm20z zmm20 +%define xmm21z zmm21 +%define xmm22z zmm22 +%define xmm23z zmm23 +%define xmm24z zmm24 +%define xmm25z zmm25 +%define xmm26z zmm26 +%define xmm27z zmm27 +%define xmm28z zmm28 +%define xmm29z zmm29 +%define xmm30z zmm30 +%define xmm31z zmm31 + +%define ymm0z zmm0 +%define ymm1z zmm1 +%define ymm2z zmm2 +%define ymm3z zmm3 +%define ymm4z zmm4 +%define ymm5z zmm5 +%define ymm6z zmm6 +%define ymm7z zmm7 +%define ymm8z zmm8 +%define ymm9z zmm9 +%define ymm10z zmm10 +%define ymm11z zmm11 +%define ymm12z zmm12 +%define ymm13z zmm13 +%define ymm14z zmm14 +%define ymm15z zmm15 +%define ymm16z zmm16 +%define ymm17z zmm17 +%define ymm18z zmm18 +%define ymm19z zmm19 +%define ymm20z zmm20 +%define ymm21z zmm21 +%define ymm22z zmm22 +%define ymm23z zmm23 +%define ymm24z zmm24 +%define ymm25z zmm25 +%define ymm26z zmm26 +%define ymm27z zmm27 +%define ymm28z zmm28 +%define ymm29z zmm29 +%define ymm30z zmm30 +%define ymm31z zmm31 + +%define DWORD(reg) reg %+ d +%define WORD(reg) reg %+ w +%define BYTE(reg) reg %+ b + +%define XWORD(reg) reg %+ x +%define YWORD(reg) reg %+ y +%define ZWORD(reg) reg %+ z + +%ifidn __OUTPUT_FORMAT__,elf32 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%endif +%ifidn __OUTPUT_FORMAT__,elf64 + %define __x86_64__ +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%endif +%ifidn __OUTPUT_FORMAT__,win64 + %define __x86_64__ +%endif +%ifidn __OUTPUT_FORMAT__,macho64 + %define __x86_64__ +%endif + +%ifdef __x86_64__ + %define endbranch db 0xf3, 0x0f, 0x1e, 0xfa +%else + %define endbranch db 0xf3, 0x0f, 0x1e, 0xfb +%endif + +%ifdef REL_TEXT + %define WRT_OPT +%elifidn __OUTPUT_FORMAT__, elf64 + %define WRT_OPT wrt ..plt +%else + %define WRT_OPT +%endif + +%macro mk_global 1-3 + %ifdef __NASM_VER__ + %ifidn __OUTPUT_FORMAT__, macho64 + global %1 + %elifidn __OUTPUT_FORMAT__, win64 + global %1 + %else + global %1:%2 %3 + %endif + %else + global %1:%2 %3 + %endif +%endmacro + + +; Fixes for nasm lack of MS proc helpers +%ifdef __NASM_VER__ + %ifidn __OUTPUT_FORMAT__, win64 + %macro alloc_stack 1 + sub rsp, %1 + %endmacro + + %macro proc_frame 1 + %1: + %endmacro + + %macro save_xmm128 2 + movdqa [rsp + %2], %1 + %endmacro + + %macro save_reg 2 + mov [rsp + %2], %1 + %endmacro + + %macro rex_push_reg 1 + push %1 + %endmacro + + %macro push_reg 1 + push %1 + %endmacro + + %define end_prolog + %endif + + %define endproc_frame +%endif + +%ifidn __OUTPUT_FORMAT__, macho64 + %define elf64 macho64 + mac_equ equ 1 +%endif + +%macro slversion 4 + section .text + global %1_slver_%2%3%4 + global %1_slver + %1_slver: + %1_slver_%2%3%4: + dw 0x%4 + db 0x%3, 0x%2 +%endmacro + +%endif ; ifndef _REG_SIZES_ASM_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/rolling_hashx.h b/src/crypto/isa-l/isa-l_crypto/include/rolling_hashx.h new file mode 100644 index 000000000..035cf1701 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/rolling_hashx.h @@ -0,0 +1,114 @@ +/********************************************************************** + Copyright(c) 2011-2017 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @file rolling_hashx.h + * @brief Fingerprint functions based on rolling hash + * + * rolling_hash2 - checks hash in a sliding window based on random 64-bit hash. + */ + +#ifndef _ROLLING_HASHX_H_ +#define _ROLLING_HASHX_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + *@brief rolling hash return values + */ +enum { + FINGERPRINT_RET_HIT = 0, //!< Fingerprint trigger hit + FINGERPRINT_RET_MAX, //!< Fingerprint max length reached before hit + FINGERPRINT_RET_OTHER //!< Fingerprint function error returned +}; + +#define FINGERPRINT_MAX_WINDOW 48 + +/** + * @brief Context for rolling_hash2 functions + */ +struct rh_state2 { + uint8_t history[FINGERPRINT_MAX_WINDOW]; + uint64_t table1[256]; + uint64_t table2[256]; + uint64_t hash; + uint32_t w; +}; + +/** + * @brief Initialize state object for rolling hash2 + * + * @param state Structure holding state info on current rolling hash + * @param w Window width (1 <= w <= 32) + * @returns 0 - success, -1 - failure + */ +int rolling_hash2_init(struct rh_state2 *state, uint32_t w); + +/** + * @brief Reset the hash state history + * + * @param state Structure holding state info on current rolling hash + * @param init_bytes Optional window size buffer to pre-init hash + * @returns none + */ +void rolling_hash2_reset(struct rh_state2 *state, uint8_t * init_bytes); + +/** + * @brief Run rolling hash function until trigger met or max length reached + * + * Checks for trigger based on a random hash in a sliding window. + * @param state Structure holding state info on current rolling hash + * @param buffer Pointer to input buffer to run windowed hash on + * @param max_len Max length to run over input + * @param mask Mask bits ORed with hash before test with trigger + * @param trigger Match value to compare with windowed hash at each input byte + * @param offset Offset from buffer to match, set if match found + * @returns FINGERPRINT_RET_HIT - match found, FINGERPRINT_RET_MAX - exceeded max length + */ +int rolling_hash2_run(struct rh_state2 *state, uint8_t * buffer, uint32_t max_len, + uint32_t mask, uint32_t trigger, uint32_t * offset); + +/** + * @brief Generate an appropriate mask to target mean hit rate + * + * @param mean Target chunk size in bytes + * @param shift Bits to rotate result to get independent masks + * @returns 32-bit mask value + */ +uint32_t rolling_hashx_mask_gen(long mean, int shift); + +#ifdef __cplusplus +} +#endif + +#endif // _ROLLING_HASHX_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h new file mode 100644 index 000000000..3a41684b4 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h @@ -0,0 +1,450 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SHA1_MB_H_ +#define _SHA1_MB_H_ + +/** + * @file sha1_mb.h + * @brief Multi-buffer CTX API SHA1 function prototypes and structures + * + * Interface for multi-buffer SHA1 functions + * + * Multi-buffer SHA1 Entire or First-Update..Update-Last + * + * The interface to this multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the SHA1_HASH_CTX_MGR and + * SHA1_HASH_CTX objects. Numerous SHA1_HASH_CTX objects may be instantiated by the + * application for use with a single SHA1_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * Note: The submit function does not require data buffers to be block sized. + * + * The SHA1 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and + * AVX512. In addition, a multibinary interface is provided, which selects the appropriate + * architecture-specific function at runtime. + * + * Usage: The application creates a SHA1_HASH_CTX_MGR object and initializes it + * with a call to sha1_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the + * multibinary version). The SHA1_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 4 SHA1_HASH_CTX objects (or 8 in the AVX2 case, 16 in the AVX512) + * being processed at a time. + * + * Each SHA1_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the SHA1_HASH_CTX to a SHA1_HASH_CTX_MGR using the submit functions + * sha1_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA1_HASH_CTX is + * returned to the application (via this or a later call to sha1_ctx_mgr_submit*() or + * sha1_ctx_mgr_flush*()), the application can then re-submit it with another call to + * sha1_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, sha1_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a SHA1_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The SHA1_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + *
    + *
  • one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + *
  • two buffers: HASH_FIRST, HASH_LAST + *
  • three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + *
+ * + * The order in which SHA1_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + *
    + *
  • Submitting flags other than the allowed entire/first/update/last values + *
  • Submitting a context that is currently being managed by a SHA1_HASH_CTX_MGR. + *
  • Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + *
+ * + * These error conditions are reported by returning the SHA1_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the SHA1_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define SHA1_DIGEST_NWORDS 5 +#define SHA1_MAX_LANES 16 +#define SHA1_X8_LANES 8 +#define SHA1_MIN_LANES 4 +#define SHA1_BLOCK_SIZE 64 +#define SHA1_LOG2_BLOCK_SIZE 6 +#define SHA1_PADLENGTHFIELD_SIZE 8 +#define SHA1_INITIAL_DIGEST \ + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 + +typedef uint32_t sha1_digest_array[SHA1_DIGEST_NWORDS][SHA1_MAX_LANES]; +typedef uint32_t SHA1_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SHA1 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint32_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[SHA1_DIGEST_NWORDS],64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} SHA1_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SHA1 job */ + +typedef struct { + sha1_digest_array digest; + uint8_t* data_ptr[SHA1_MAX_LANES]; +} SHA1_MB_ARGS_X16; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SHA1_JOB *job_in_lane; +} SHA1_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SHA1 jobs */ + +typedef struct { + SHA1_MB_ARGS_X16 args; + uint32_t lens[SHA1_MAX_LANES]; + uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7 or 0...15) of unused lanes, nibble 4 or 8 is set to F as a flag + SHA1_LANE_DATA ldata[SHA1_MAX_LANES]; + uint32_t num_lanes_inuse; +} SHA1_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SHA1 jobs */ + +typedef struct { + SHA1_MB_JOB_MGR mgr; +} SHA1_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SHA1 job for the multi-buffer CTX manager */ + +typedef struct { + SHA1_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint64_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} SHA1_HASH_CTX; + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush (SHA1_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Context level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level SHA1 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_sse (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_sse (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_sse (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the context level SHA1 multi-buffer manager structure. + * @requires SSE4.1 and SHANI + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_sse_ni (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the context level multi-buffer manager. + * @requires SSE4.1 and SHANI + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_sse_ni (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires SSE4.1 and SHANI + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_sse_ni (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx2 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx2 (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx2 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx512 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx512 (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx512 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX512 and SHANI + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx512_ni (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX512 and SHANI + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx512_ni (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX512 and SHANI + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx512_ni (SHA1_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void sha1_mb_mgr_init_sse (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_sse (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_sse (SHA1_MB_JOB_MGR *state); + +#define sha1_mb_mgr_init_avx sha1_mb_mgr_init_sse +SHA1_JOB* sha1_mb_mgr_submit_avx (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx (SHA1_MB_JOB_MGR *state); + +void sha1_mb_mgr_init_avx2 (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_avx2 (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx2 (SHA1_MB_JOB_MGR *state); + +void sha1_mb_mgr_init_avx512 (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_avx512 (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx512 (SHA1_MB_JOB_MGR *state); + +void sha1_mb_mgr_init_sse_ni (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_sse_ni (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_sse_ni (SHA1_MB_JOB_MGR *state); + +void sha1_mb_mgr_init_avx512_ni (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_avx512_ni (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx512_ni (SHA1_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _SHA1_MB_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h new file mode 100644 index 000000000..8ef186b2d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h @@ -0,0 +1,451 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SHA256_MB_H_ +#define _SHA256_MB_H_ + +/** + * @file sha256_mb.h + * @brief Multi-buffer CTX API SHA256 function prototypes and structures + * + * Interface for multi-buffer SHA256 functions + * + * Multi-buffer SHA256 Entire or First-Update..Update-Last + * + * The interface to this multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the SHA256_HASH_CTX_MGR and + * SHA256_HASH_CTX objects. Numerous SHA256_HASH_CTX objects may be instantiated by the + * application for use with a single SHA256_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * Note: The submit function does not require data buffers to be block sized. + * + * The SHA256 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and + * AVX512. In addition, a multibinary interface is provided, which selects the appropriate + * architecture-specific function at runtime. + * + * Usage: The application creates a SHA256_HASH_CTX_MGR object and initializes it + * with a call to sha256_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the + * multibinary version). The SHA256_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 4 SHA256_HASH_CTX objects (or 8 in the AVX2 case, 16 in the AVX512) + * being processed at a time. + * + * Each SHA256_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the SHA256_HASH_CTX to a SHA256_HASH_CTX_MGR using the submit functions + * sha256_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA256_HASH_CTX is + * returned to the application (via this or a later call to sha256_ctx_mgr_submit*() or + * sha256_ctx_mgr_flush*()), the application can then re-submit it with another call to + * sha256_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, sha256_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a SHA256_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The SHA256_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + *
    + *
  • one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + *
  • two buffers: HASH_FIRST, HASH_LAST + *
  • three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + *
+ * + * The order in which SHA256_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + *
    + *
  • Submitting flags other than the allowed entire/first/update/last values + *
  • Submitting a context that is currently being managed by a SHA256_HASH_CTX_MGR. + *
  • Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + *
+ * + * These error conditions are reported by returning the SHA256_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the SHA256_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define SHA256_DIGEST_NWORDS 8 +#define SHA256_MAX_LANES 16 +#define SHA256_X8_LANES 8 +#define SHA256_MIN_LANES 4 +#define SHA256_BLOCK_SIZE 64 +#define SHA256_LOG2_BLOCK_SIZE 6 +#define SHA256_PADLENGTHFIELD_SIZE 8 +#define SHA256_INITIAL_DIGEST \ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, \ + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 + +typedef uint32_t sha256_digest_array[SHA256_DIGEST_NWORDS][SHA256_MAX_LANES]; +typedef uint32_t SHA256_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SHA256 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint64_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[SHA256_DIGEST_NWORDS], 64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} SHA256_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SHA256 job */ + +typedef struct { + sha256_digest_array digest; + uint8_t* data_ptr[SHA256_MAX_LANES]; +} SHA256_MB_ARGS_X16; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SHA256_JOB *job_in_lane; +} SHA256_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SHA256 jobs */ + +typedef struct { + SHA256_MB_ARGS_X16 args; + uint32_t lens[SHA256_MAX_LANES]; + uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7) of unused lanes, nibble 4 or 8 is set to F as a flag + SHA256_LANE_DATA ldata[SHA256_MAX_LANES]; + uint32_t num_lanes_inuse; +} SHA256_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SHA256 jobs */ + +typedef struct { + SHA256_MB_JOB_MGR mgr; +} SHA256_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SHA256 job for the multi-buffer CTX manager */ + +typedef struct { + SHA256_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint64_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} SHA256_HASH_CTX; + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush (SHA256_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * CTX level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level SHA256 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_sse (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_sse (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_sse (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the context level SHA256 multi-buffer manager structure. + * @requires SSE4.1 and SHANI + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_sse_ni (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the context level multi-buffer manager. + * @requires SSE4.1 and SHANI + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_sse_ni (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires SSE4.1 and SHANI + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_sse_ni (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx2 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx2 (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx2 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx512 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx512 (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx512 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX512 and SHANI + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx512_ni (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX512 and SHANI + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx512_ni (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX512 and SHANI + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx512_ni (SHA256_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void sha256_mb_mgr_init_sse (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_sse (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_sse (SHA256_MB_JOB_MGR *state); + +#define sha256_mb_mgr_init_avx sha256_mb_mgr_init_sse +SHA256_JOB* sha256_mb_mgr_submit_avx (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx (SHA256_MB_JOB_MGR *state); + +void sha256_mb_mgr_init_avx2 (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_avx2 (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx2 (SHA256_MB_JOB_MGR *state); + +void sha256_mb_mgr_init_avx512 (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_avx512 (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx512 (SHA256_MB_JOB_MGR *state); + +void sha256_mb_mgr_init_sse_ni (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_sse_ni (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_sse_ni (SHA256_MB_JOB_MGR *state); + +void sha256_mb_mgr_init_avx512_ni (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_avx512_ni (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx512_ni (SHA256_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _SHA256_MB_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h new file mode 100644 index 000000000..ce3950ad1 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h @@ -0,0 +1,422 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SHA512_MB_H_ +#define _SHA512_MB_H_ + +/** + * @file sha512_mb.h + * @brief Single/Multi-buffer CTX API SHA512 function prototypes and structures + * + * Interface for single and multi-buffer SHA512 functions + * + * Single/Multi-buffer SHA512 Entire or First-Update..Update-Last + * + * The interface to this single/multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the SHA512_HASH_CTX_MGR and + * SHA512_HASH_CTX objects. Numerous SHA512_HASH_CTX objects may be instantiated by the + * application for use with a single SHA512_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * Note: The submit function does not require data buffers to be block sized. + * + * The SHA512 CTX interface functions are available for 5 architectures: multi-buffer SSE, + * AVX, AVX2, AVX512 and single-buffer SSE4 (which is used in the same way as the + * multi-buffer code). In addition, a multibinary interface is provided, which selects the + * appropriate architecture-specific function at runtime. This multibinary interface + * selects the single buffer SSE4 functions when the platform is detected to be Silvermont. + * + * Usage: The application creates a SHA512_HASH_CTX_MGR object and initializes it + * with a call to sha512_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the + * multibinary version). The SHA512_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 2 SHA512_HASH_CTX objects (or 4 in the AVX2 case, 8 in the AVX512 + * case) being processed at a time. + * + * Each SHA512_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the SHA512_HASH_CTX to a SHA512_HASH_CTX_MGR using the submit functions + * sha512_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA512_HASH_CTX is + * returned to the application (via this or a later call to sha512_ctx_mgr_submit*() or + * sha512_ctx_mgr_flush*()), the application can then re-submit it with another call to + * sha512_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, sha512_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a SHA512_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The SHA512_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + *
    + *
  • one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + *
  • two buffers: HASH_FIRST, HASH_LAST + *
  • three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + *
+ * + * The order in which SHA512_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + *
    + *
  • Submitting flags other than the allowed entire/first/update/last values + *
  • Submitting a context that is currently being managed by a SHA512_HASH_CTX_MGR. (Note: + * This error case is not applicable to the single buffer SSE4 version) + *
  • Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + *
+ * + * These error conditions are reported by returning the SHA512_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the SHA512_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define SHA512_DIGEST_NWORDS 8 +#define SHA512_MAX_LANES 8 +#define SHA512_X4_LANES 4 +#define SHA512_MIN_LANES 2 +#define SHA512_BLOCK_SIZE 128 +#define SHA512_LOG2_BLOCK_SIZE 7 +#define SHA512_PADLENGTHFIELD_SIZE 16 +#define SHA512_INITIAL_DIGEST \ + 0x6a09e667f3bcc908,0xbb67ae8584caa73b,0x3c6ef372fe94f82b,0xa54ff53a5f1d36f1, \ + 0x510e527fade682d1,0x9b05688c2b3e6c1f,0x1f83d9abfb41bd6b,0x5be0cd19137e2179 + + +typedef uint64_t sha512_digest_array[SHA512_DIGEST_NWORDS][SHA512_MAX_LANES]; +typedef uint64_t SHA512_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SHA512 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint64_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint64_t result_digest[SHA512_DIGEST_NWORDS], 64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} SHA512_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SHA512 job */ + +typedef struct { + sha512_digest_array digest; + uint8_t* data_ptr[SHA512_MAX_LANES]; +} SHA512_MB_ARGS_X8; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SHA512_JOB *job_in_lane; +} SHA512_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SHA512 jobs */ + +typedef struct { + SHA512_MB_ARGS_X8 args; + uint64_t lens[SHA512_MAX_LANES]; + uint64_t unused_lanes; //!< each byte is index (00, 01 or 00...03) of unused lanes, byte 2 or 4 is set to FF as a flag + SHA512_LANE_DATA ldata[SHA512_MAX_LANES]; + uint32_t num_lanes_inuse; +} SHA512_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SHA512 jobs */ + +typedef struct { + SHA512_MB_JOB_MGR mgr; +} SHA512_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SHA512 job for the multi-buffer CTX manager */ + +typedef struct { + SHA512_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint64_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} SHA512_HASH_CTX; + +/******************************************************************* + * Context level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level SHA512 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_sse (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_sse (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_sse (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_avx (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_avx2 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx2 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx2 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_avx512 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx512 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx512 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires SSE4 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_sb_sse4 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires SSE4 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_sb_sse4 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires SSE4 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_sb_sse4 (SHA512_HASH_CTX_MGR* mgr); + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush (SHA512_HASH_CTX_MGR* mgr); + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void sha512_mb_mgr_init_sse (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_mb_mgr_submit_sse (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_sse (SHA512_MB_JOB_MGR *state); + +#define sha512_mb_mgr_init_avx sha512_mb_mgr_init_sse +SHA512_JOB* sha512_mb_mgr_submit_avx (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_avx (SHA512_MB_JOB_MGR *state); + +void sha512_mb_mgr_init_avx2 (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_mb_mgr_submit_avx2 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_avx2 (SHA512_MB_JOB_MGR *state); + +void sha512_mb_mgr_init_avx512 (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_mb_mgr_submit_avx512 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_avx512 (SHA512_MB_JOB_MGR *state); + +// Single buffer SHA512 APIs, optimized for SLM. +void sha512_sse4 (const void* M, void* D, uint64_t L); +// Note that these APIs comply with multi-buffer APIs' high level usage +void sha512_sb_mgr_init_sse4 (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_sb_mgr_submit_sse4 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_sb_mgr_flush_sse4 (SHA512_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _SHA512_MB_H_ + + diff --git a/src/crypto/isa-l/isa-l_crypto/include/sm3_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sm3_mb.h new file mode 100644 index 000000000..d9e7b4eed --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sm3_mb.h @@ -0,0 +1,155 @@ +/********************************************************************** + Copyright(c) 2011-2020 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SM3_MB_H_ +#define _SM3_MB_H_ + + +/** + * @file sm3_mb.h + * @brief Multi-buffer CTX API SM3 function prototypes and structures + * + * \warning Experimental interface with only base functions available at this + * time. + */ + +#include +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#define SM3_DIGEST_NWORDS 8 /* Word in SM3 is 32-bit */ +#define SM3_MAX_LANES 16 +#define SM3_X8_LANES 8 +#define SM3_BLOCK_SIZE 64 +#define SM3_LOG2_BLOCK_SIZE 6 +#define SM3_PADLENGTHFIELD_SIZE 8 +#define SM3_INITIAL_DIGEST \ + 0x7380166f, 0x4914b2b9, 0x172442d7, 0xda8a0600, \ + 0xa96f30bc, 0x163138aa, 0xe38dee4d, 0xb0fb0e4e + +typedef uint32_t sm3_digest_array[SM3_DIGEST_NWORDS][SM3_MAX_LANES]; +typedef uint32_t SM3_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SM3 job for the multi-buffer manager */ + +typedef struct { + uint8_t *buffer; //!< pointer to data buffer for this job + uint64_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[SM3_DIGEST_NWORDS], 64); + JOB_STS status; //!< output job status + void *user_data; //!< pointer for user's job-related data +} SM3_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SM3 job */ + +typedef struct { + sm3_digest_array digest; + uint8_t *data_ptr[SM3_MAX_LANES]; +} SM3_MB_ARGS_X16; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SM3_JOB *job_in_lane; +} SM3_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SM3 jobs */ + +typedef struct { + SM3_MB_ARGS_X16 args; + uint32_t lens[SM3_MAX_LANES]; + uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7) of unused lanes, nibble 4 or 8 is set to F as a flag + SM3_LANE_DATA ldata[SM3_MAX_LANES]; + uint32_t num_lanes_inuse; +} SM3_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SM3 jobs */ + +typedef struct { + SM3_MB_JOB_MGR mgr; +} SM3_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SM3 job for the multi-buffer CTX manager */ + +typedef struct { + SM3_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint64_t total_length; //!< Running counter of length processed for this CTX's job + const void *incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SM3_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void *user_data; //!< pointer for user to keep any job-related data +} SM3_HASH_CTX; + +/******************** multibinary function prototypes **********************/ + +/** +* @brief Initialize the SM3 multi-buffer manager structure. +* +* @param mgr Structure holding context level state info +* @returns void +*/ +void sm3_ctx_mgr_init(SM3_HASH_CTX_MGR * mgr); + +/** +* @brief Submit a new SM3 job to the multi-buffer manager. +* +* @param mgr Structure holding context level state info +* @param ctx Structure holding ctx job info +* @param buffer Pointer to buffer to be processed +* @param len Length of buffer (in bytes) to be processed +* @param flags Input flag specifying job type (first, update, last or entire) +* @returns NULL if no jobs complete or pointer to jobs structure. +*/ +SM3_HASH_CTX *sm3_ctx_mgr_submit(SM3_HASH_CTX_MGR * mgr, SM3_HASH_CTX * ctx, + const void *buffer, uint32_t len, + HASH_CTX_FLAG flags); + +/** +* @brief Finish all submitted SM3 jobs and return when complete. +* +* @param mgr Structure holding context level state info +* @returns NULL if no jobs to complete or pointer to jobs structure. +*/ +SM3_HASH_CTX *sm3_ctx_mgr_flush(SM3_HASH_CTX_MGR * mgr); + +#ifdef __cplusplus +} +#endif +#endif diff --git a/src/crypto/isa-l/isa-l_crypto/include/test.h b/src/crypto/isa-l/isa-l_crypto/include/test.h new file mode 100644 index 000000000..7b99390b8 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/test.h @@ -0,0 +1,111 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _TEST_H +#define _TEST_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include "endian_helper.h" + +// Use sys/time.h functions for time +#if defined (__unix__) || (__APPLE__) || (__MINGW32__) +# include +#endif + +#ifdef _MSC_VER +# define inline __inline +# include +# include +#endif + +#include +#include + +struct perf{ + struct timeval tv; +}; + + +#if defined (__unix__) || (__APPLE__) || (__MINGW32__) +static inline int perf_start(struct perf *p) +{ + return gettimeofday(&(p->tv), 0); +} +static inline int perf_stop(struct perf *p) +{ + return gettimeofday(&(p->tv), 0); +} + +static inline void perf_print(struct perf stop, struct perf start, long long dsize) +{ + long long secs = stop.tv.tv_sec - start.tv.tv_sec; + long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec; + + printf("runtime = %10lld usecs", usecs); + if (dsize != 0) { +#if 1 // not bug in printf for 32-bit + printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024), + ((double) usecs)/1000000, ((double) dsize) / (double)usecs); +#else + printf(", bandwidth %lld MB ", dsize/(1024*1024)); + printf("in %.4f sec ",(double)usecs/1000000); + printf("= %.2f MB/s\n", (double)dsize/usecs); +#endif + } + else + printf("\n"); +} +#endif + +static inline uint64_t get_filesize(FILE *fp) +{ + uint64_t file_size; + fpos_t pos, pos_curr; + + fgetpos(fp, &pos_curr); /* Save current position */ +#if defined(_WIN32) || defined(_WIN64) + _fseeki64(fp, 0, SEEK_END); +#else + fseeko(fp, 0, SEEK_END); +#endif + fgetpos(fp, &pos); + file_size = *(uint64_t *)&pos; + fsetpos(fp, &pos_curr); /* Restore position */ + + return file_size; +} + +#ifdef __cplusplus +} +#endif + +#endif // _TEST_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/types.h b/src/crypto/isa-l/isa-l_crypto/include/types.h new file mode 100644 index 000000000..de452557a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/types.h @@ -0,0 +1,100 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +/** + * @file types.h + * @brief Defines common align and debug macros + * + */ + +#ifndef __TYPES_H +#define __TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined __unix__ || defined __APPLE__ +# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) +# define __forceinline static inline +# define aligned_free(x) free(x) +#else +# ifdef __MINGW32__ +# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) +# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn))) +# define aligned_free(x) _aligned_free(x) +# else +# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl +# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn))) +# define aligned_free(x) _aligned_free(x) +# endif +#endif + +#ifdef DEBUG +# define DEBUG_PRINT(x) printf x +#else +# define DEBUG_PRINT(x) do {} while (0) +#endif + + +#ifndef __has_feature +# define __has_feature(x) 0 +#endif +#ifndef __has_extension +# define __has_extension __has_feature +#endif +#define ISAL_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +#if (defined(__ICC) || defined( __GNUC__ ) || defined(__clang__)) && !defined(ISAL_UNIT_TEST) +# if __has_extension(attribute_deprecated_with_message) \ + || (ISAL_GCC_VERSION >= 40500) \ + || (__INTEL_COMPILER >= 1100) +# define ISAL_DEPRECATED(message) __attribute__(( deprecated( message ))) +# else +# define ISAL_DEPRECATED(message) __attribute__(( deprecated )) +# endif +#elif (defined( __ICL ) || defined(_MSC_VER)) +# if (__INTEL_COMPILER >= 1100) || (_MSC_FULL_VER >= 140050727) +# define ISAL_DEPRECATED(message) __declspec( deprecated ( message )) +# else +# define ISAL_DEPRECATED(message) __declspec( deprecated ) +# endif +#else +# define ISAL_DEPRECATED(message) +#endif + +#define ISAL_EXPERIMENTAL(message) ISAL_DEPRECATED("Experimental: " message) + +#ifdef __cplusplus +} +#endif + +#endif //__TYPES_H -- cgit v1.2.3