diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/crypto/isa-l/isa-l_crypto/include | |
parent | Initial commit. (diff) | |
download | ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.tar.xz ceph-483eb2f56657e8e7f419ab1a4fab8dce9ade8609.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/include')
19 files changed, 4549 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h b/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h new file mode 100644 index 00000000..bff4a62d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h @@ -0,0 +1,161 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @file aes_cbc.h + * @brief AES CBC encryption/decryption function prototypes. + * +; References: + */ +#ifndef _AES_CBC_h +#define _AES_CBC_h + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { + +#endif + +typedef enum cbc_key_size { CBC_128_BITS = 16, CBC_192_BITS = 24, CBC_256_BITS = 32} cbc_key_size; +#define CBC_ROUND_KEY_LEN (16) +#define CBC_128_KEY_ROUNDS (10+1) /*expanded key holds 10 key rounds plus original key*/ +#define CBC_192_KEY_ROUNDS (12+1) /*expanded key holds 12 key rounds plus original key*/ +#define CBC_256_KEY_ROUNDS (14+1) /*expanded key holds 14 key rounds plus original key*/ +#define CBC_MAX_KEYS_SIZE (CBC_ROUND_KEY_LEN * CBC_256_KEY_ROUNDS) + +#define CBC_IV_DATA_LEN (16) + +/** @brief holds intermediate key data used in encryption/decryption + * + */ +struct cbc_key_data { // must be 16 byte aligned + uint8_t enc_keys[CBC_MAX_KEYS_SIZE]; + uint8_t dec_keys[CBC_MAX_KEYS_SIZE]; +}; + +/** @brief CBC-AES key pre-computation done once for a key + * + * @requires SSE4.1 and AESNI + * + * arg 1: in: pointer to key + * arg 2: OUT: pointer to a key expanded data + */ +int aes_cbc_precomp( + uint8_t *key, + int key_size, + struct cbc_key_data *keys_blk +); + +/** @brief CBC-AES 128 bit key Decryption + * + * @requires SSE4.1 and AESNI + * + * arg 1: in: pointer to input (cipher text) + * arg 2: IV: pointer to IV, Must be 16 bytes aligned to a 16 byte boundary + * arg 3: keys: pointer to keys, Must be on a 16 byte boundary and length of key size * key rounds + * arg 4: OUT: pointer to output (plain text ... in-place allowed) + * arg 5: len_bytes: length in bytes (multiple of 16) + */ +void aes_cbc_dec_128( + void *in, + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, + uint64_t len_bytes); //!< Must be a multiple of 16 bytes + +/** @brief CBC-AES 192 bit key Decryption + * +* @requires SSE4.1 and AESNI +* +*/ +void aes_cbc_dec_192( + void *in, + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, + uint64_t len_bytes); //!< Must be a multiple of 16 bytes + +/** @brief CBC-AES 256 bit key Decryption + * +* @requires SSE4.1 and AESNI +* +*/ +void aes_cbc_dec_256( + void *in, + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data + void *out, + uint64_t len_bytes); //!< Must be a multiple of 16 bytes + +/** @brief CBC-AES 128 bit key Encryption + * + * @requires SSE4.1 and AESNI + * + * arg 1: in: pointer to input (plain text) + * arg 2: IV: pointer to IV, Must be 16 bytes aligned to a 16 byte boundary + * arg 3: keys: pointer to keys, Must be on a 16 byte boundary and length of key size * key rounds + * arg 4: OUT: pointer to output (cipher text ... in-place allowed) + * arg 5: len_bytes: length in bytes (multiple of 16) + */ +int aes_cbc_enc_128( + void *in, + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data + void *out, + uint64_t len_bytes); //!< Must be a multiple of 16 bytes + +/** @brief CBC-AES 192 bit key Encryption + * +* @requires SSE4.1 and AESNI +* +*/ +int aes_cbc_enc_192( + void *in, + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data + void *out, + uint64_t len_bytes); //!< Must be a multiple of 16 bytes + +/** @brief CBC-AES 256 bit key Encryption + * +* @requires SSE4.1 and AESNI +* +*/ +int aes_cbc_enc_256( + void *in, + uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary + uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data + void *out, + uint64_t len_bytes); //!< Must be a multiple of 16 bytes + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif //ifndef _AES_CBC_h diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h b/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h new file mode 100644 index 00000000..1e7127e9 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h @@ -0,0 +1,340 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +/** + * @file aes_gcm.h + * @brief AES GCM encryption/decryption function prototypes. + * + * At build time there is an option to use non-temporal loads and stores + * selected by defining the compile time option NT_LDST. The use of this option + * places the following restriction on the gcm encryption functions: + * + * - The plaintext and cyphertext buffers must be aligned on a 16 byte boundary. + * + * - When using the streaming API, all partial input buffers must be a multiple + * of 16 bytes long except for the last input buffer. + * + * - In-place encryption/decryption is not recommended. + * + */ + +/* +; References: +; This code was derived and highly optimized from the code described in paper: +; Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on Intel Architecture Processors. August, 2010 +; +; For the shift-based reductions used in this code, we used the method described in paper: +; Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode. January, 2010. +; +; +; +; Assumptions: Support for SSE4.1 or greater, AVX or AVX2 +; +; +; iv: +; 0 1 2 3 +; 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Salt (From the SA) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | Initialization Vector | +; | (This is the sequence number from IPSec header) | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; | 0x1 | +; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +; +; TLen: +; from the definition of the spec, TLen can only be 8, 12 or 16 bytes. +; + */ +#ifndef _AES_GCM_h +#define _AES_GCM_h + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/* Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. */ +#define MAX_TAG_LEN (16) +// +// IV data is limited to 16 bytes. The last DWORD (4 bytes) must be 0x1 +// +#define GCM_IV_LEN (16) +#define GCM_IV_DATA_LEN (12) +#define GCM_IV_END_MARK {0x00, 0x00, 0x00, 0x01}; +#define GCM_IV_END_START (12) + +#define LONGEST_TESTED_AAD_LENGTH (2* 1024) + +// Key lengths of 128 and 256 supported +#define GCM_128_KEY_LEN (16) +#define GCM_256_KEY_LEN (32) + +#define GCM_BLOCK_LEN 16 +#define GCM_ENC_KEY_LEN 16 +#define GCM_KEY_SETS (15) /*exp key + 14 exp round keys*/ +/** @brief holds intermediate key data needed to improve performance + * + * gcm_data hold internal key information used by gcm128 and gcm256. + */ +struct gcm_data { + uint8_t expanded_keys[GCM_ENC_KEY_LEN * GCM_KEY_SETS]; + uint8_t shifted_hkey_1[GCM_ENC_KEY_LEN]; // store HashKey <<1 mod poly here + uint8_t shifted_hkey_2[GCM_ENC_KEY_LEN]; // store HashKey^2 <<1 mod poly here + uint8_t shifted_hkey_3[GCM_ENC_KEY_LEN]; // store HashKey^3 <<1 mod poly here + uint8_t shifted_hkey_4[GCM_ENC_KEY_LEN]; // store HashKey^4 <<1 mod poly here + uint8_t shifted_hkey_5[GCM_ENC_KEY_LEN]; // store HashKey^5 <<1 mod poly here + uint8_t shifted_hkey_6[GCM_ENC_KEY_LEN]; // store HashKey^6 <<1 mod poly here + uint8_t shifted_hkey_7[GCM_ENC_KEY_LEN]; // store HashKey^7 <<1 mod poly here + uint8_t shifted_hkey_8[GCM_ENC_KEY_LEN]; // store HashKey^8 <<1 mod poly here + uint8_t shifted_hkey_1_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_2_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_3_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_4_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_5_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_6_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_7_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes) + uint8_t shifted_hkey_8_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes) + // init, update and finalize context data + uint8_t aad_hash[GCM_BLOCK_LEN]; + uint64_t aad_length; + uint64_t in_length; + uint8_t partial_block_enc_key[GCM_BLOCK_LEN]; + uint8_t orig_IV[GCM_BLOCK_LEN]; + uint8_t current_counter[GCM_BLOCK_LEN]; + uint64_t partial_block_length; +}; + +/** + * @brief GCM-AES Encryption using 128 bit keys + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_enc(struct gcm_data *my_ctx_data, + uint8_t * out, //!< Ciphertext output. Encrypt in-place is allowed. + uint8_t const *in, //!< Plaintext input + uint64_t plaintext_len, //!< Length of data in Bytes for encryption. + uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer. + uint8_t const *aad, //!< Additional Authentication Data (AAD). + uint64_t aad_len, //!< Length of AAD. + uint8_t * auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8. + ); + + +/** + * @brief GCM-AES Decryption using 128 bit keys + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_dec(struct gcm_data *my_ctx_data, + uint8_t * out, //!< Plaintext output. Decrypt in-place is allowed. + uint8_t const *in, //!< Ciphertext input + uint64_t plaintext_len, //!< Length of data in Bytes for encryption. + uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer. + uint8_t const *aad, //!< Additional Authentication Data (AAD). + uint64_t aad_len, //!< Length of AAD. + uint8_t * auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8. + ); + +/** + * @brief start a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_init( struct gcm_data *my_ctx_data, + uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer. + uint8_t const *aad, //!< Additional Authentication Data (AAD). + uint64_t aad_len //!< Length of AAD. + ); + +/** + * @brief encrypt a block of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_enc_update( struct gcm_data *my_ctx_data, + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t plaintext_len //!< Length of data in Bytes for encryption. + ); + +/** + * @brief decrypt a block of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_dec_update( struct gcm_data *my_ctx_data, + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t plaintext_len //!< Length of data in Bytes for encryption. + ); + +/** + * @brief End encryption of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_enc_finalize( struct gcm_data *my_ctx_data, + uint8_t *auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. + ); + +/** + * @brief End decryption of a AES-128-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm128_dec_finalize( struct gcm_data *my_ctx_data, + uint8_t *auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. + ); + +/** + * @brief pre-processes key data + * + * Prefills the gcm data with key values for each round and the initial sub hash key for tag encoding + */ +void aesni_gcm128_pre(uint8_t * key, struct gcm_data *gdata + ); + +/** + * @brief GCM-AES Encryption using 256 bit keys + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_enc(struct gcm_data *my_ctx_data, + uint8_t * out, //!< Ciphertext output. Encrypt in-place is allowed. + uint8_t const *in, //!< Plaintext input + uint64_t plaintext_len, //!< Length of data in Bytes for encryption. + uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer. + uint8_t const *aad, //!< Additional Authentication Data (AAD). + uint64_t aad_len, //!< Length of AAD. + uint8_t * auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8. + ); + + +/** + * @brief GCM-AES Decryption using 256 bit keys + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_dec(struct gcm_data *my_ctx_data, + uint8_t * out, //!< Plaintext output. Decrypt in-place is allowed. + uint8_t const *in, //!< Ciphertext input + uint64_t plaintext_len, //!< Length of data in Bytes for encryption. + uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer. + uint8_t const *aad, //!< Additional Authentication Data (AAD). + uint64_t aad_len, //!< Length of AAD. + uint8_t * auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8. + ); + +/** + * @brief start a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_init( struct gcm_data *my_ctx_data, + uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer. + uint8_t const *aad, //!< Additional Authentication Data (AAD). + uint64_t aad_len //!< Length of AAD. + ); + +/** + * @brief encrypt a block of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_enc_update( struct gcm_data *my_ctx_data, + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t plaintext_len //!< Length of data in Bytes for encryption. + ); + +/** + * @brief decrypt a block of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_dec_update( struct gcm_data *my_ctx_data, + uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed. + const uint8_t *in, //!< Plaintext input + uint64_t plaintext_len //!< Length of data in Bytes for encryption. + ); + +/** + * @brief End encryption of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_enc_finalize( struct gcm_data *my_ctx_data, + uint8_t *auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. + ); + +/** + * @brief End decryption of a AES-256-GCM Encryption message + * + * @requires SSE4.1 and AESNI + * + */ +void aesni_gcm256_dec_finalize( struct gcm_data *my_ctx_data, + uint8_t *auth_tag, //!< Authenticated Tag output. + uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. + ); + +/** + * @brief pre-processes key data + * + * Prefills the gcm data with key values for each round and the initial sub hash key for tag encoding + */ +void aesni_gcm256_pre(uint8_t * key, struct gcm_data *gdata); + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif //ifndef _AES_GCM_h diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h b/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h new file mode 100644 index 00000000..dbbe25d1 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h @@ -0,0 +1,76 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _KEYEXP_128_H +#define _KEYEXP_128_H + +/** + * @file aes_keyexp.h + * @brief AES key expansion functions + * + * This defines the interface to key expansion functions. + */ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief AES key expansion 128 bit +* @requires SSE4.1 +*/ +void aes_keyexp_128( + uint8_t *key, //!< input key for AES-128, 16 bytes + uint8_t *exp_key_enc, //!< expanded encryption keys, 16*11 bytes + uint8_t *exp_key_dec //!< expanded decryption keys, 16*11 bytes + ); + +/** @brief AES key expansion 192 bit +* @requires SSE4.1 +*/ +void aes_keyexp_192( + uint8_t *key, //!< input key for AES-192, 16*1.5 bytes + uint8_t *exp_key_enc, //!< expanded encryption keys, 16*13 bytes + uint8_t *exp_key_dec //!< expanded decryption keys, 16*13 bytes + ); + +/** @brief AES key expansion 256 bit +* @requires SSE4.1 +*/ +void aes_keyexp_256( + uint8_t *key, //!< input key for AES-256, 16*2 bytes + uint8_t *exp_key_enc, //!< expanded encryption keys, 16*15 bytes + uint8_t *exp_key_dec //!< expanded decryption keys, 16*15 bytes + ); + +#ifdef __cplusplus +} +#endif //__cplusplus +#endif //ifndef _KEYEXP_128_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h b/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h new file mode 100644 index 00000000..b2d56985 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h @@ -0,0 +1,214 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#ifndef _AES_XTS_H +#define _AES_XTS_H + +/** + * @file aes_xts.h + * @brief AES XTS encryption function prototypes. + * + * This defines the interface to optimized AES XTS functions + +<b>Pre-expanded keys</b> + +For key encryption, pre-expanded keys are stored in the order that they will be +used. As an example, if Key[0] is the 128-bit initial key used for an AES-128 +encryption, the rest of the keys are stored as follows: + +<ul> + <li> Key[0] : Initial encryption key + <li> Key[1] : Round 1 encryption key + <li> Key[2] : Round 2 encryption key + <li> ... + <li> Key[10] : Round 10 encryption key +</ul> + +For decryption, the order of keys is reversed. However, we apply the +necessary aesimc instructions before storing the expanded keys. For the same key +used above, the pre-expanded keys will be stored as follows: + +<ul> + <li> Key[0] : Round 10 encryption key + <li> Key[1] : aesimc(Round 9 encryption key) + <li> Key[2] : aesimc(Round 8 encryption key) + <li> ... + <li> Key[9] : aesimc(Round 1 encryption key) + <li> Key[10] : Initial encryption key +</ul> + +<b>Note:</b> The expanded key decryption requires a decryption key only for the block +decryption step. The tweak step in the expanded key decryption requires the same expanded +encryption key that is used in the expanded key encryption. + +<b>Input and Output Buffers </b> + +The input and output buffers can be overlapping as long as the output buffer +pointer is not less than the input buffer pointer. If the two pointers are the +same, then encryption/decryption will occur in-place. + +<b>Data Length</b> + +<ul> + <li> The functions support data length of any bytes greater than or equal to 16 bytes. + <li> Data length is a 64-bit value, which makes the largest possible data length + 2^64 - 1 bytes. + <li> For data lengths from 0 to 15 bytes, the functions return without any error + codes, without reading or writing any data. + <li> The functions only support byte lengths, not bits. +</ul> + +<b>Initial Tweak</b> + +The functions accept a 128-bit initial tweak value. The user is responsible for +padding the initial tweak value to this length. + +<b>Data Alignment</b> + +The input and output buffers, keys, pre-expanded keys and initial tweak value +are not required to be aligned to 16 bytes, any alignment works. + + */ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + +/** @brief XTS-AES-128 Encryption + * @requires AES-NI + */ + +void XTS_AES_128_enc( + uint8_t *k2, //!< key used for tweaking, 16 bytes + uint8_t *k1, //!< key used for encryption of tweaked plaintext, 16 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-128 Encryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_128_enc_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*11 bytes + uint8_t *k1, //!< expanded key used for encryption of tweaked plaintext, 16*11 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-128 Decryption + * @requires AES-NI + */ + +void XTS_AES_128_dec( + uint8_t *k2, //!< key used for tweaking, 16 bytes + uint8_t *k1, //!< key used for decryption of tweaked ciphertext, 16 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +/** @brief XTS-AES-128 Decryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_128_dec_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*11 bytes - encryption key is used + uint8_t *k1, //!< expanded decryption key used for decryption of tweaked ciphertext, 16*11 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +/** @brief XTS-AES-256 Encryption + * @requires AES-NI + */ + +void XTS_AES_256_enc( + uint8_t *k2, //!< key used for tweaking, 16*2 bytes + uint8_t *k1, //!< key used for encryption of tweaked plaintext, 16*2 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-256 Encryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_256_enc_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*15 bytes + uint8_t *k1, //!< expanded key used for encryption of tweaked plaintext, 16*15 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *pt, //!< plaintext sector input data + uint8_t *ct //!< ciphertext sector output data + ); + +/** @brief XTS-AES-256 Decryption + * @requires AES-NI + */ + +void XTS_AES_256_dec( + uint8_t *k2, //!< key used for tweaking, 16*2 bytes + uint8_t *k1, //!< key used for decryption of tweaked ciphertext, 16*2 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +/** @brief XTS-AES-256 Decryption with pre-expanded keys + * @requires AES-NI + */ + +void XTS_AES_256_dec_expanded_key( + uint8_t *k2, //!< expanded key used for tweaking, 16*15 bytes - encryption key is used + uint8_t *k1, //!< expanded decryption key used for decryption of tweaked ciphertext, 16*15 bytes + uint8_t *TW_initial, //!< initial tweak value, 16 bytes + uint64_t N, //!< sector size, in bytes + const uint8_t *ct, //!< ciphertext sector input data + uint8_t *pt //!< plaintext sector output data + ); + +#ifdef __cplusplus +} +#endif + +#endif //_AES_XTS_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm b/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm new file mode 100644 index 00000000..882e497f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm @@ -0,0 +1,79 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; Macros for defining data structures + +; Usage example + +;START_FIELDS ; JOB_AES +;;; name size align +;FIELD _plaintext, 8, 8 ; pointer to plaintext +;FIELD _ciphertext, 8, 8 ; pointer to ciphertext +;FIELD _IV, 16, 8 ; IV +;FIELD _keys, 8, 8 ; pointer to keys +;FIELD _len, 4, 4 ; length in bytes +;FIELD _status, 4, 4 ; status enumeration +;FIELD _user_data, 8, 8 ; pointer to user data +;UNION _union, size1, align1, \ + size2, align2, \ + size3, align3, \ + ... +;END_FIELDS +;%assign _JOB_AES_size _FIELD_OFFSET +;%assign _JOB_AES_align _STRUCT_ALIGN + +%ifndef _DATASTRUCT_ASM_ +%define _DATASTRUCT_ASM_ + +;; START_FIELDS +%macro START_FIELDS 0 +%assign _FIELD_OFFSET 0 +%assign _STRUCT_ALIGN 0 +%endm + +;; FIELD name size align +%macro FIELD 3 +%define %%name %1 +%define %%size %2 +%define %%align %3 + +%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1)) +%%name equ _FIELD_OFFSET +%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size) +%if (%%align > _STRUCT_ALIGN) +%assign _STRUCT_ALIGN %%align +%endif +%endm + +;; END_FIELDS +%macro END_FIELDS 0 +%assign _FIELD_OFFSET (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1)) +%endm + +%endif ; end ifdef _DATASTRUCT_ASM_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h b/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h new file mode 100644 index 00000000..8ddf3b1a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h @@ -0,0 +1,82 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +/** + * @file intrinreg.h + * @brief Defines intrinsic types used by the new hashing API + * + */ + +#ifndef _IA64_REGS_H_ +#define _IA64_REGS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef _MSC_VER +# define inline __inline +#endif + +#ifndef __SSE__ +# define __ERR_COMPILER_FLAGS +#endif + +#ifdef __ERR_COMPILER_FLAGS + +#ifdef __GNUC__ +# error "The compiler must be configured to optimize for speed and support at least SSE instructions." +# error "Please specify the following gcc flags: -O3 [-mavx]" +#endif + +#ifdef _MSC_VER +# error "Please compile with ''/O2 /D__SSE__ /arch:SSE'' -OR- ''/O2 /D__AVX__ /arch:AVX''" +#endif + +#endif + +#include <stdint.h> +#include <immintrin.h> + +// Define available register types uniformly. +/// @cond +typedef struct{ uint8_t dummy; } intrinreg1; +typedef struct{ uint16_t dummy; } intrinreg2; +typedef struct{ uint32_t dummy; } intrinreg4; +typedef struct{ uint64_t dummy; } intrinreg8; +typedef __m128 intrinreg16; +/// @endcond + + +#ifdef __cplusplus +} +#endif + +#endif // _IA64_REGS_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h b/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h new file mode 100644 index 00000000..17a1b36e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h @@ -0,0 +1,372 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MD5_MB_H_ +#define _MD5_MB_H_ + +/** + * @file md5_mb.h + * @brief Multi-buffer CTX API MD5 function prototypes and structures + * + * Interface for multi-buffer MD5 functions + * + * <b> Multi-buffer MD5 Entire or First-Update..Update-Last </b> + * + * The interface to this multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the MD5_HASH_CTX_MGR and + * MD5_HASH_CTX objects. Numerous MD5_HASH_CTX objects may be instantiated by the + * application for use with a single MD5_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * <b>Note:</b> The submit function does not require data buffers to be block sized. + * + * The MD5 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and + * AVX512. In addition, a multibinary interface is provided, which selects the appropriate + * architecture-specific function at runtime. + * + * <b>Usage:</b> The application creates a MD5_HASH_CTX_MGR object and initializes it + * with a call to md5_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512 (or no suffix for the + * multibinary version). The MD5_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 8 MD5_HASH_CTX objects (or 16 in AVX2 case, 32 in AVX512 case) + * being processed at a time. + * + * Each MD5_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the MD5_HASH_CTX to a MD5_HASH_CTX_MGR using the submit functions + * md5_ctx_mgr_submit*() with the HASH_FIRST flag set. When the MD5_HASH_CTX is + * returned to the application (via this or a later call to md5_ctx_mgr_submit*() or + * md5_ctx_mgr_flush*()), the application can then re-submit it with another call to + * md5_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, md5_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a MD5_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The MD5_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + * <ul> + * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + * <li> two buffers: HASH_FIRST, HASH_LAST + * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + * </ul> + * + * The order in which MD5_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + * <ul> + * <li> Submitting flags other than the allowed entire/first/update/last values + * <li> Submitting a context that is currently being managed by a MD5_HASH_CTX_MGR. + * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + * </ul> + * + * These error conditions are reported by returning the MD5_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the MD5_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include <stdint.h> +#include "multi_buffer.h" +#include "types.h" + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define MD5_DIGEST_NWORDS 4 +#define MD5_MAX_LANES 32 +#define MD5_MIN_LANES 8 +#define MD5_BLOCK_SIZE 64 +#define MD5_LOG2_BLOCK_SIZE 6 +#define MD5_PADLENGTHFIELD_SIZE 8 +#define MD5_INITIAL_DIGEST \ + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476 + +typedef uint32_t md5_digest_array[MD5_DIGEST_NWORDS][MD5_MAX_LANES]; +typedef uint32_t MD5_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single MD5 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint32_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[MD5_DIGEST_NWORDS],64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} MD5_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted MD5 job */ + +typedef struct { + md5_digest_array digest; + uint8_t* data_ptr[MD5_MAX_LANES]; +} MD5_MB_ARGS_X32; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + MD5_JOB *job_in_lane; +} MD5_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer MD5 jobs */ + +typedef struct { + MD5_MB_ARGS_X32 args; + uint32_t lens[MD5_MAX_LANES]; + uint64_t unused_lanes[4]; //!< each byte or nibble is index (0...31 or 15) of unused lanes. + MD5_LANE_DATA ldata[MD5_MAX_LANES]; + uint32_t num_lanes_inuse; +} MD5_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer MD5 jobs */ + +typedef struct { + MD5_MB_JOB_MGR mgr; +} MD5_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single MD5 job for the multi-buffer CTX manager */ + +typedef struct { + MD5_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint32_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[MD5_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} MD5_HASH_CTX; + +/******************************************************************* + * CTX level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level MD5 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_sse (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_sse (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_sse (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_avx (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_avx (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_avx (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_avx2 (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_avx2 (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_avx2 (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init_avx512 (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit_avx512 (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush_avx512 (MD5_HASH_CTX_MGR* mgr); + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the MD5 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void md5_ctx_mgr_init (MD5_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new MD5 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_submit (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted MD5 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +MD5_HASH_CTX* md5_ctx_mgr_flush (MD5_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void md5_mb_mgr_init_sse (MD5_MB_JOB_MGR *state); +MD5_JOB* md5_mb_mgr_submit_sse (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_sse (MD5_MB_JOB_MGR *state); + +#define md5_mb_mgr_init_avx md5_mb_mgr_init_sse +MD5_JOB* md5_mb_mgr_submit_avx (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_avx (MD5_MB_JOB_MGR *state); + +void md5_mb_mgr_init_avx2 (MD5_MB_JOB_MGR *state); +MD5_JOB* md5_mb_mgr_submit_avx2 (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_avx2 (MD5_MB_JOB_MGR *state); + +void md5_mb_mgr_init_avx512 (MD5_MB_JOB_MGR *state); +MD5_JOB* md5_mb_mgr_submit_avx512 (MD5_MB_JOB_MGR *state, MD5_JOB* job); +MD5_JOB* md5_mb_mgr_flush_avx512 (MD5_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _MD5_MB_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm b/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm new file mode 100644 index 00000000..1342dc1c --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm @@ -0,0 +1,346 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +%ifndef __MEMCPY_ASM__ +%define __MEMCPY_ASM__ + +%include "reg_sizes.asm" + + +; This file defines a series of macros to copy small to medium amounts +; of data from memory to memory, where the size is variable but limited. +; +; The macros are all called as: +; memcpy DST, SRC, SIZE, TMP0, TMP1, XTMP0, XTMP1, XTMP2, XTMP3 +; with the parameters defined as: +; DST : register: pointer to dst (not modified) +; SRC : register: pointer to src (not modified) +; SIZE : register: length in bytes (not modified) +; TMP0 : 64-bit temp GPR (clobbered) +; TMP1 : 64-bit temp GPR (clobbered) +; XTMP0 : temp XMM (clobbered) +; XTMP1 : temp XMM (clobbered) +; XTMP2 : temp XMM (clobbered) +; XTMP3 : temp XMM (clobbered) +; +; The name indicates the options. The name is of the form: +; memcpy_<VEC>_<SZ><ZERO><RET> +; where: +; <VEC> is either "sse" or "avx" or "avx2" +; <SZ> is either "64" or "128" and defines largest value of SIZE +; <ZERO> is blank or "_1". If "_1" then the min SIZE is 1 (otherwise 0) +; <RET> is blank or "_ret". If blank, the code falls through. If "ret" +; it does a "ret" at the end +; +; For the avx2 versions, the temp XMM registers need to be YMM registers +; If the SZ is 64, then only two YMM temps are needed, i.e. it is called as: +; memcpy_avx2_64 DST, SRC, SIZE, TMP0, TMP1, YTMP0, YTMP1 +; memcpy_avx2_128 DST, SRC, SIZE, TMP0, TMP1, YTMP0, YTMP1, YTMP2, YTMP3 +; +; For example: +; memcpy_sse_64 : SSE, 0 <= size < 64, falls through +; memcpy_avx_64_1 : AVX1, 1 <= size < 64, falls through +; memcpy_sse_128_ret : SSE, 0 <= size < 128, ends with ret +; mempcy_avx_128_1_ret : AVX1, 1 <= size < 128, ends with ret +; + +%macro memcpy_sse_64 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 0, 0 +%endm + +%macro memcpy_sse_64_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 0, 0 +%endm + +%macro memcpy_sse_128 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 0, 0 +%endm + +%macro memcpy_sse_128_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 0, 0 +%endm + +%macro memcpy_sse_64_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 1, 0 +%endm + +%macro memcpy_sse_64_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 1, 0 +%endm + +%macro memcpy_sse_128_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 1, 0 +%endm + +%macro memcpy_sse_128_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 1, 0 +%endm + + +%macro memcpy_sse_16 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 0, 16, 0, 0 +%endm + +%macro memcpy_sse_16_1 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 1, 16, 0, 0 +%endm + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%macro memcpy_avx_64 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 0, 1 +%endm + +%macro memcpy_avx_64_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 0, 1 +%endm + +%macro memcpy_avx_128 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 0, 1 +%endm + +%macro memcpy_avx_128_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 0, 1 +%endm + +%macro memcpy_avx_64_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 1, 1 +%endm + +%macro memcpy_avx_64_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 1, 1 +%endm + +%macro memcpy_avx_128_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 1, 1 +%endm + +%macro memcpy_avx_128_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 1, 1 +%endm + + +%macro memcpy_avx_16 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 0, 16, 0, 1 +%endm + +%macro memcpy_avx_16_1 5 + __memcpy_int %1,%2,%3,%4,%5,,,,, 1, 16, 0, 1 +%endm + + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%macro memcpy_avx2_64 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 64, 0, 2 +%endm + +%macro memcpy_avx2_64_1 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 64, 0, 2 +%endm + +%macro memcpy_avx2_128 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7, %8, %9, 0, 128, 0, 2 +%endm + +%macro memcpy_avx2_128_1 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7, %8, %9, 1, 128, 0, 2 +%endm + +%macro memcpy_avx2_64_ret 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 64, 1, 2 +%endm + +%macro memcpy_avx2_64_1_ret 7 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 64, 1, 2 +%endm + +%macro memcpy_avx2_128_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 128, 1, 2 +%endm + +%macro memcpy_avx2_128_1_ret 9 + __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 128, 1, 2 +%endm + + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + + +%macro __memcpy_int 13 +%define %%DST %1 ; register: pointer to dst (not modified) +%define %%SRC %2 ; register: pointer to src (not modified) +%define %%SIZE %3 ; register: length in bytes (not modified) +%define %%TMP0 %4 ; 64-bit temp GPR (clobbered) +%define %%TMP1 %5 ; 64-bit temp GPR (clobbered) +%define %%XTMP0 %6 ; temp XMM (clobbered) +%define %%XTMP1 %7 ; temp XMM (clobbered) +%define %%XTMP2 %8 ; temp XMM (clobbered) +%define %%XTMP3 %9 ; temp XMM (clobbered) +%define %%NOT0 %10 ; if not 0, then assume size cannot be zero +%define %%MAXSIZE %11 ; 128, 64, etc +%define %%USERET %12 ; if not 0, use "ret" at end +%define %%USEAVX %13 ; 0 = SSE, 1 = AVX1, 2 = AVX2 + +%if (%%USERET != 0) + %define %%DONE ret +%else + %define %%DONE jmp %%end +%endif + +%if (%%USEAVX != 0) + %define %%MOVDQU vmovdqu +%else + %define %%MOVDQU movdqu +%endif + +%if (%%MAXSIZE >= 128) + test %%SIZE, 64 + jz %%lt64 + %if (%%USEAVX >= 2) + %%MOVDQU %%XTMP0, [%%SRC + 0*32] + %%MOVDQU %%XTMP1, [%%SRC + 1*32] + %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*32] + %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*32] + + %%MOVDQU [%%DST + 0*32], %%XTMP0 + %%MOVDQU [%%DST + 1*32], %%XTMP1 + %%MOVDQU [%%DST + %%SIZE - 2*32], %%XTMP2 + %%MOVDQU [%%DST + %%SIZE - 1*32], %%XTMP3 + %else + %%MOVDQU %%XTMP0, [%%SRC + 0*16] + %%MOVDQU %%XTMP1, [%%SRC + 1*16] + %%MOVDQU %%XTMP2, [%%SRC + 2*16] + %%MOVDQU %%XTMP3, [%%SRC + 3*16] + %%MOVDQU [%%DST + 0*16], %%XTMP0 + %%MOVDQU [%%DST + 1*16], %%XTMP1 + %%MOVDQU [%%DST + 2*16], %%XTMP2 + %%MOVDQU [%%DST + 3*16], %%XTMP3 + + %%MOVDQU %%XTMP0, [%%SRC + %%SIZE - 4*16] + %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 3*16] + %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*16] + %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + %%SIZE - 4*16], %%XTMP0 + %%MOVDQU [%%DST + %%SIZE - 3*16], %%XTMP1 + %%MOVDQU [%%DST + %%SIZE - 2*16], %%XTMP2 + %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP3 + %endif + %%DONE +%endif + +%if (%%MAXSIZE >= 64) +%%lt64 + test %%SIZE, 32 + jz %%lt32 + %if (%%USEAVX >= 2) + %%MOVDQU %%XTMP0, [%%SRC + 0*32] + %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 1*32] + %%MOVDQU [%%DST + 0*32], %%XTMP0 + %%MOVDQU [%%DST + %%SIZE - 1*32], %%XTMP1 + %else + %%MOVDQU %%XTMP0, [%%SRC + 0*16] + %%MOVDQU %%XTMP1, [%%SRC + 1*16] + %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*16] + %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + 0*16], %%XTMP0 + %%MOVDQU [%%DST + 1*16], %%XTMP1 + %%MOVDQU [%%DST + %%SIZE - 2*16], %%XTMP2 + %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP3 + %endif + %%DONE +%endif + +%if (%%MAXSIZE >= 32) +%%lt32: + test %%SIZE, 16 + jz %%lt16 + %if (%%USEAVX >= 2) + %%MOVDQU XWORD(%%XTMP0), [%%SRC + 0*16] + %%MOVDQU XWORD(%%XTMP1), [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + 0*16], XWORD(%%XTMP0) + %%MOVDQU [%%DST + %%SIZE - 1*16], XWORD(%%XTMP1) + %else + %%MOVDQU %%XTMP0, [%%SRC + 0*16] + %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 1*16] + %%MOVDQU [%%DST + 0*16], %%XTMP0 + %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP1 + %endif + %%DONE +%endif + +%if (%%MAXSIZE >= 16) +%%lt16: + test %%SIZE, 8 + jz %%lt8 + mov %%TMP0, [%%SRC] + mov %%TMP1, [%%SRC + %%SIZE - 8] + mov [%%DST], %%TMP0 + mov [%%DST + %%SIZE - 8], %%TMP1 + %%DONE +%endif + +%if (%%MAXSIZE >= 8) +%%lt8: + test %%SIZE, 4 + jz %%lt4 + mov DWORD(%%TMP0), [%%SRC] + mov DWORD(%%TMP1), [%%SRC + %%SIZE - 4] + mov [%%DST], DWORD(%%TMP0) + mov [%%DST + %%SIZE - 4], DWORD(%%TMP1) + %%DONE +%endif + +%if (%%MAXSIZE >= 4) +%%lt4: + test %%SIZE, 2 + jz %%lt2 + movzx DWORD(%%TMP0), word [%%SRC] + movzx DWORD(%%TMP1), byte [%%SRC + %%SIZE - 1] + mov [%%DST], WORD(%%TMP0) + mov [%%DST + %%SIZE - 1], BYTE(%%TMP1) + %%DONE +%endif + +%%lt2: +%if (%%NOT0 == 0) + test %%SIZE, 1 + jz %%end +%endif + movzx DWORD(%%TMP0), byte [%%SRC] + mov [%%DST], BYTE(%%TMP0) +%%end: +%if (%%USERET != 0) + ret +%endif +%endm + +%endif ; ifndef __MEMCPY_ASM__ diff --git a/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h b/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h new file mode 100644 index 00000000..b15da3cb --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h @@ -0,0 +1,363 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +/** + * @file memcpy_inline.h + * @brief Defines intrinsic memcpy functions used by the new hashing API + * + */ + +#ifndef _MEMCPY_H_ +#define _MEMCPY_H_ + +#include "intrinreg.h" +#include <assert.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#define memcpy_varlen memcpy_sse_varlen +#define memcpy_fixedlen memcpy_sse_fixedlen + +#define memclr_varlen memclr_sse_varlen +#define memclr_fixedlen memclr_sse_fixedlen + +static inline void memcpy_lte32_sse_fixedlen(void* dst, const void* src, size_t nbytes); +static inline void memcpy_gte16_sse_fixedlen(void* dst, const void* src, size_t nbytes); +static inline void memcpy_sse_fixedlen (void* dst, const void* src, size_t nbytes); + +static inline void memcpy_lte32_sse_varlen (void* dst, const void* src, size_t nbytes); +static inline void memcpy_gte16_sse_varlen (void* dst, const void* src, size_t nbytes); +static inline void memcpy_sse_varlen (void* dst, const void* src, size_t nbytes); + + +static inline void memclr_lte32_sse_fixedlen(void* dst, size_t nbytes); +static inline void memclr_gte16_sse_fixedlen(void* dst, size_t nbytes); +static inline void memclr_sse_fixedlen (void* dst, size_t nbytes); + +static inline void memclr_lte32_sse_varlen (void* dst, size_t nbytes); +static inline void memclr_gte16_sse_varlen (void* dst, size_t nbytes); +static inline void memclr_sse_varlen (void* dst, size_t nbytes); + +#define MEMCPY_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, src, nbytes) \ + do { \ + intrinreg##N head; \ + intrinreg##N tail; \ + assert(N <= nbytes && nbytes <= 2*N); \ + if(N == 1 || (fixedwidth && nbytes==N) ) { \ + head = load_intrinreg##N(src); \ + store_intrinreg##N(dst, head); \ + } \ + else { \ + head = load_intrinreg##N(src); \ + tail = load_intrinreg##N((const void*)((const char*)src + (nbytes - N))); \ + store_intrinreg##N(dst, head); \ + store_intrinreg##N((void*)((char*)dst + (nbytes - N)), tail); \ + } \ + } while(0) + +#define MEMCLR_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, nbytes) \ + do { \ + const intrinreg##N zero = {0}; \ + assert(N <= nbytes && nbytes <= 2*N); \ + if(N == 1 || (fixedwidth && nbytes==N) ) { \ + store_intrinreg##N(dst, zero); \ + } \ + else { \ + store_intrinreg##N(dst, zero); \ + store_intrinreg##N((void*)((char*)dst + (nbytes - N)), zero); \ + } \ + } while(0) + +// Define load/store functions uniformly. + +#define load_intrinreg16(src) _mm_loadu_ps((const float*) src) +#define store_intrinreg16(dst,val) _mm_storeu_ps((float*) dst, val) + +static inline intrinreg8 load_intrinreg8(const void *src) +{ + return *(intrinreg8 *) src; +} + +static inline void store_intrinreg8(void *dst, intrinreg8 val) +{ + *(intrinreg8 *) dst = val; +} + +static inline intrinreg4 load_intrinreg4(const void *src) +{ + return *(intrinreg4 *) src; +} + +static inline void store_intrinreg4(void *dst, intrinreg4 val) +{ + *(intrinreg4 *) dst = val; +} + +static inline intrinreg2 load_intrinreg2(const void *src) +{ + return *(intrinreg2 *) src; +} + +static inline void store_intrinreg2(void *dst, intrinreg2 val) +{ + *(intrinreg2 *) dst = val; +} + +static inline intrinreg1 load_intrinreg1(const void *src) +{ + return *(intrinreg1 *) src; +} + +static inline void store_intrinreg1(void *dst, intrinreg1 val) +{ + *(intrinreg1 *) dst = val; +} + +static inline void memcpy_gte16_sse_fixedlen(void *dst, const void *src, size_t nbytes) +{ + size_t i; + size_t j; + intrinreg16 pool[4]; + size_t remaining_moves; + size_t tail_offset; + int do_tail; + assert(nbytes >= 16); + + for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4) { + for (j = 0; j < 4; j++) + pool[j] = + load_intrinreg16((const void *)((const char *)src + i + 16 * j)); + for (j = 0; j < 4; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]); + } + + remaining_moves = (nbytes - i) / 16; + tail_offset = nbytes - 16; + do_tail = (tail_offset & (16 - 1)); + + for (j = 0; j < remaining_moves; j++) + pool[j] = load_intrinreg16((const void *)((const char *)src + i + 16 * j)); + + if (do_tail) + pool[j] = load_intrinreg16((const void *)((const char *)src + tail_offset)); + + for (j = 0; j < remaining_moves; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]); + + if (do_tail) + store_intrinreg16((void *)((char *)dst + tail_offset), pool[j]); +} + +static inline void memclr_gte16_sse_fixedlen(void *dst, size_t nbytes) +{ + size_t i; + size_t j; + const intrinreg16 zero = { 0 }; + size_t remaining_moves; + size_t tail_offset; + int do_tail; + assert(nbytes >= 16); + + for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4) + for (j = 0; j < 4; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), zero); + + remaining_moves = (nbytes - i) / 16; + tail_offset = nbytes - 16; + do_tail = (tail_offset & (16 - 1)); + + for (j = 0; j < remaining_moves; j++) + store_intrinreg16((void *)((char *)dst + i + 16 * j), zero); + + if (do_tail) + store_intrinreg16((void *)((char *)dst + tail_offset), zero); +} + +static inline void memcpy_lte32_sse_fixedlen(void *dst, const void *src, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 1, dst, src, nbytes); + else if (nbytes >= 8) + MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 1, dst, src, nbytes); + else if (nbytes >= 4) + MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 1, dst, src, nbytes); + else if (nbytes >= 2) + MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 1, dst, src, nbytes); + else if (nbytes >= 1) + MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 1, dst, src, nbytes); +} + +static inline void memclr_lte32_sse_fixedlen(void *dst, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 1, dst, nbytes); + else if (nbytes >= 8) + MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 1, dst, nbytes); + else if (nbytes >= 4) + MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 1, dst, nbytes); + else if (nbytes >= 2) + MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 1, dst, nbytes); + else if (nbytes >= 1) + MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 1, dst, nbytes); +} + +static inline void memcpy_lte32_sse_varlen(void *dst, const void *src, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 0, dst, src, nbytes); + else if (nbytes >= 8) + MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 0, dst, src, nbytes); + else if (nbytes >= 4) + MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 0, dst, src, nbytes); + else if (nbytes >= 2) + MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 0, dst, src, nbytes); + else if (nbytes >= 1) + MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 0, dst, src, nbytes); +} + +static inline void memclr_lte32_sse_varlen(void *dst, size_t nbytes) +{ + assert(nbytes <= 32); + if (nbytes >= 16) + MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 0, dst, nbytes); + else if (nbytes >= 8) + MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 0, dst, nbytes); + else if (nbytes >= 4) + MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 0, dst, nbytes); + else if (nbytes >= 2) + MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 0, dst, nbytes); + else if (nbytes >= 1) + MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 0, dst, nbytes); +} + +static inline void memcpy_gte16_sse_varlen(void *dst, const void *src, size_t nbytes) +{ + size_t i = 0; + intrinreg16 tail; + + assert(nbytes >= 16); + + while (i + 128 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 128); + i += 128; + } + if (i + 64 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 64); + i += 64; + } + if (i + 32 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 32); + i += 32; + } + if (i + 16 <= nbytes) { + memcpy_gte16_sse_fixedlen((void *)((char *)dst + i), + (const void *)((const char *)src + i), 16); + i += 16; + } + + i = nbytes - 16; + tail = load_intrinreg16((const void *)((const char *)src + i)); + store_intrinreg16((void *)((char *)dst + i), tail); +} + +static inline void memclr_gte16_sse_varlen(void *dst, size_t nbytes) +{ + size_t i = 0; + const intrinreg16 zero = { 0 }; + + assert(nbytes >= 16); + + while (i + 128 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 128); + i += 128; + } + if (i + 64 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 64); + i += 64; + } + if (i + 32 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 32); + i += 32; + } + if (i + 16 <= nbytes) { + memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 16); + i += 16; + } + + i = nbytes - 16; + store_intrinreg16((void *)((char *)dst + i), zero); +} + +static inline void memcpy_sse_fixedlen(void *dst, const void *src, size_t nbytes) +{ + if (nbytes >= 16) + memcpy_gte16_sse_fixedlen(dst, src, nbytes); + else + memcpy_lte32_sse_fixedlen(dst, src, nbytes); +} + +static inline void memclr_sse_fixedlen(void *dst, size_t nbytes) +{ + if (nbytes >= 16) + memclr_gte16_sse_fixedlen(dst, nbytes); + else + memclr_lte32_sse_fixedlen(dst, nbytes); +} + +static inline void memcpy_sse_varlen(void *dst, const void *src, size_t nbytes) +{ + if (nbytes >= 16) + memcpy_gte16_sse_varlen(dst, src, nbytes); + else + memcpy_lte32_sse_varlen(dst, src, nbytes); +} + +static inline void memclr_sse_varlen(void *dst, size_t nbytes) +{ + if (nbytes >= 16) + memclr_gte16_sse_varlen(dst, nbytes); + else + memclr_lte32_sse_varlen(dst, nbytes); +} + + +#ifdef __cplusplus +} +#endif + +#endif // __MEMCPY_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h new file mode 100644 index 00000000..ea9bb9ac --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h @@ -0,0 +1,315 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_H_ +#define _MH_SHA1_H_ + +/** + * @file mh_sha1.h + * @brief mh_sha1 function prototypes and structures + * + * Interface for mh_sha1 functions + * + * <b> mh_sha1 Init-Update..Update-Finalize </b> + * + * This file defines the interface to optimized functions used in mh_sha1. + * The definition of multi-hash SHA1(mh_sha1, for short) is: Pad the buffer + * in SHA1 style until the total length is a multiple of 4*16*16 + * (words-width * parallel-segments * block-size); Hash the buffer in + * parallel, generating digests of 4*16*5 (words-width*parallel-segments* + * digest-size); Treat the set of digests as another data buffer, and + * generate a final SHA1 digest for it. + * + * + * Example + * \code + * uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; + * struct mh_sha1_ctx *ctx; + * + * ctx = malloc(sizeof(struct mh_sha1_ctx)); + * mh_sha1_init(ctx); + * mh_sha1_update(ctx, buff, block_len); + * mh_sha1_finalize(ctx, mh_sha1_digest); + * \endcode + */ + +#include <stdint.h> + +#ifdef __cplusplus +extern "C" { +#endif + + +// External Interface Definition +#define HASH_SEGS 16 +#define SHA1_BLOCK_SIZE 64 +#define MH_SHA1_BLOCK_SIZE (HASH_SEGS * SHA1_BLOCK_SIZE) +#define SHA1_DIGEST_WORDS 5 +#define AVX512_ALIGNED 64 + +/** @brief Holds info describing a single mh_sha1 + * + * It is better to use heap to allocate this data structure to avoid stack overflow. + * +*/ +struct mh_sha1_ctx { + uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; //!< the digest of multi-hash SHA1 + + uint64_t total_length; + //!< Parameters for update feature, describe the lengths of input buffers in bytes + uint8_t partial_block_buffer [MH_SHA1_BLOCK_SIZE * 2]; + //!< Padding the tail of input data for SHA1 + uint8_t mh_sha1_interim_digests[sizeof(uint32_t) * SHA1_DIGEST_WORDS * HASH_SEGS]; + //!< Storing the SHA1 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose. + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE + AVX512_ALIGNED]; + //!< Re-structure sha1 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose. +}; + +/** + * @enum mh_sha1_ctx_error + * @brief CTX error flags + */ +enum mh_sha1_ctx_error{ + MH_SHA1_CTX_ERROR_NONE = 0, //!< MH_SHA1_MURMUR3_CTX_ERROR_NONE + MH_SHA1_CTX_ERROR_NULL = -1, //!< MH_SHA1_MURMUR3_CTX_ERROR_NULL +}; + + +/******************************************************************* + * mh_sha1 API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the mh_sha1_ctx structure. + * + * @param ctx Structure holding mh_sha1 info + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_init (struct mh_sha1_ctx* ctx); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_update (struct mh_sha1_ctx * ctx, const void* buffer, uint32_t len); + +/** + * @brief Finalize the message digests for multi-hash sha1. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_finalize (struct mh_sha1_ctx* ctx, void* mh_sha1_digest); + +/******************************************************************* + * multi-types of mh_sha1 internal API + * + * XXXX The multi-binary version + * XXXX_base The C code version which used to display the algorithm + * XXXX_sse The version uses a ASM function optimized for SSE + * XXXX_avx The version uses a ASM function optimized for AVX + * XXXX_avx2 The version uses a ASM function optimized for AVX2 + * XXXX_avx512 The version uses a ASM function optimized for AVX512 + * + ******************************************************************/ + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * Base update() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_base (struct mh_sha1_ctx* ctx, const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires SSE + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_sse (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_avx (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX2 + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_avx2 (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Multi-hash sha1 update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX512 + * + * @param ctx Structure holding mh_sha1 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_update_avx512 (struct mh_sha1_ctx * ctx, + const void* buffer, uint32_t len); + + +/** + * @brief Finalize the message digests for multi-hash sha1. + * + * Place the message digests in mh_sha1_digest, + * which must have enough space for the outputs. + * Base Finalize() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_base (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires SSE + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_sse (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires AVX + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_avx (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires AVX2 + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_avx2 (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digest in mh_sha1_digest which must have enough space + * for the outputs. + * + * @requires AVX512 + * + * @param ctx Structure holding mh_sha1 info + * @param mh_sha1_digest The digest of mh_sha1 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_finalize_avx512 (struct mh_sha1_ctx* ctx, + void* mh_sha1_digest); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h new file mode 100644 index 00000000..0c84650e --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h @@ -0,0 +1,327 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MH_SHA1_MURMUR3_X64_128_H_ +#define _MH_SHA1_MURMUR3_X64_128_H_ + +/** + * @file mh_sha1_murmur3_x64_128.h + * @brief mh_sha1_murmur3_x64_128 function prototypes and structures + * + * Interface for mh_sha1_murmur3_x64_128 functions + * + * <b> mh_sha1_murmur3_x64_128 Init-Update..Update-Finalize </b> + * + * This file defines the interface to optimized functions used in mh_sha1 and + * mh_sha1_murmur3_x64_128. The definition of multi-hash SHA1(mh_sha1, + * for short) is: Pad the buffer in SHA1 style until the total length is a multiple + * of 4*16*16(words-width * parallel-segments * block-size); Hash the buffer + * in parallel, generating digests of 4*16*5 (words-width*parallel-segments* + * digest-size); Treat the set of digests as another data buffer, and generate + * a final SHA1 digest for it. mh_sha1_murmur3_x64_128 is a stitching function + * which will get a murmur3_x64_128 digest while generate mh_sha1 digest. + * + * + * Example + * \code + * uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; + * uint32_t murmur_digest[MURMUR3_x64_128_DIGEST_WORDS]; + * struct mh_sha1_murmur3_x64_128_ctx *ctx; + * + * ctx = malloc(sizeof(struct mh_sha1_murmur3_x64_128_ctx)); + * mh_sha1_murmur3_x64_128_init(ctx, 0); + * mh_sha1_murmur3_x64_128_update(ctx, buff, block_len); + * mh_sha1_murmur3_x64_128_finalize(ctx, mh_sha1_digest, + * murmur_digest); + * \endcode + */ + +#include <stdint.h> +#include "mh_sha1.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +// External Interface Definition +// Add murmur3_x64_128 definition +#define MUR_BLOCK_SIZE (2 * sizeof(uint64_t)) +#define MURMUR3_x64_128_DIGEST_WORDS 4 + +/** @brief Holds info describing a single mh_sha1_murmur3_x64_128 + * + * It is better to use heap to allocate this data structure to avoid stack overflow. + * +*/ +struct mh_sha1_murmur3_x64_128_ctx { + uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; //!< the digest of multi-hash SHA1 + uint32_t murmur3_x64_128_digest[MURMUR3_x64_128_DIGEST_WORDS]; //!< the digest of murmur3_x64_128 + + uint64_t total_length; + //!< Parameters for update feature, describe the lengths of input buffers in bytes + uint8_t partial_block_buffer [MH_SHA1_BLOCK_SIZE * 2]; + //!< Padding the tail of input data for SHA1 + uint8_t mh_sha1_interim_digests[sizeof(uint32_t) * SHA1_DIGEST_WORDS * HASH_SEGS]; + //!< Storing the SHA1 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose. + uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE + AVX512_ALIGNED]; + //!< Re-structure sha1 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose. +}; + +/** + * @enum mh_sha1_murmur3_ctx_error + * @brief CTX error flags + */ +enum mh_sha1_murmur3_ctx_error{ + MH_SHA1_MURMUR3_CTX_ERROR_NONE = 0, //!< MH_SHA1_MURMUR3_CTX_ERROR_NONE + MH_SHA1_MURMUR3_CTX_ERROR_NULL = -1, //!<MH_SHA1_MURMUR3_CTX_ERROR_NULL +}; + + +/******************************************************************* + * mh_sha1_murmur3_x64_128 API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the mh_sha1_murmur3_x64_128_ctx structure. + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param murmur_seed Seed as an initial digest of murmur3 + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_murmur3_x64_128_init (struct mh_sha1_murmur3_x64_128_ctx* ctx, + uint64_t murmur_seed); + +/** + * @brief Combined multi-hash and murmur hash update. + * + * Can be called repeatedly to update hashes with new input data. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_murmur3_x64_128_update (struct mh_sha1_murmur3_x64_128_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest, + * which must have enough space for the outputs. + * This function determines what instruction sets are enabled and selects the + * appropriate version at runtime. + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param mh_sha1_digest The digest of mh_sha1 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns int Return 0 if the function runs without errors + */ +int mh_sha1_murmur3_x64_128_finalize (struct mh_sha1_murmur3_x64_128_ctx* ctx, + void* mh_sha1_digest, void* murmur3_x64_128_digest); + +/******************************************************************* + * multi-types of mh_sha1_murmur3_x64_128 internal API + * + * XXXX The multi-binary version + * XXXX_base The C code version which used to display the algorithm + * XXXX_sse The version uses a ASM function optimized for SSE + * XXXX_avx The version uses a ASM function optimized for AVX + * XXXX_avx2 The version uses a ASM function optimized for AVX2 + * + ******************************************************************/ + +/** + * @brief Combined multi-hash and murmur hash update. + * + * Can be called repeatedly to update hashes with new input data. + * Base update() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_update_base (struct mh_sha1_murmur3_x64_128_ctx* ctx, + const void* buffer, uint32_t len); + +/** + * @brief Combined multi-hash and murmur hash update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires SSE + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_update_sse (struct mh_sha1_murmur3_x64_128_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Combined multi-hash and murmur hash update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_update_avx (struct mh_sha1_murmur3_x64_128_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Combined multi-hash and murmur hash update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX2 + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_update_avx2 (struct mh_sha1_murmur3_x64_128_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Combined multi-hash and murmur hash update. + * + * Can be called repeatedly to update hashes with new input data. + * @requires AVX512 + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_update_avx512 (struct mh_sha1_murmur3_x64_128_ctx * ctx, + const void* buffer, uint32_t len); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest, + * which must have enough space for the outputs. + * Base Finalize() function that does not require SIMD support. + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param mh_sha1_digest The digest of mh_sha1 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_finalize_base (struct mh_sha1_murmur3_x64_128_ctx* ctx, + void* mh_sha1_digest, void* murmur3_x64_128_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest, + * which must have enough space for the outputs. + * + * @requires SSE + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param mh_sha1_digest The digest of mh_sha1 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_finalize_sse (struct mh_sha1_murmur3_x64_128_ctx* ctx, + void* mh_sha1_digest, void* murmur3_x64_128_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest, + * which must have enough space for the outputs. + * + * @requires AVX + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param mh_sha1_digest The digest of mh_sha1 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_finalize_avx (struct mh_sha1_murmur3_x64_128_ctx* ctx, + void* mh_sha1_digest, void* murmur3_x64_128_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest, + * which must have enough space for the outputs. + * + * @requires AVX2 + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param mh_sha1_digest The digest of mh_sha1 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_finalize_avx2 (struct mh_sha1_murmur3_x64_128_ctx* ctx, + void* mh_sha1_digest, void* murmur3_x64_128_digest); + +/** + * @brief Finalize the message digests for combined multi-hash and murmur. + * + * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest, + * which must have enough space for the outputs. + * + * @requires AVX512 + * + * @param ctx Structure holding mh_sha1_murmur3_x64_128 info + * @param mh_sha1_digest The digest of mh_sha1 + * @param murmur3_x64_128_digest The digest of murmur3_x64_128 + * @returns int Return 0 if the function runs without errors + * + */ +int mh_sha1_murmur3_x64_128_finalize_avx512 (struct mh_sha1_murmur3_x64_128_ctx* ctx, + void* mh_sha1_digest, void* murmur3_x64_128_digest); + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h b/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h new file mode 100644 index 00000000..d9b71357 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h @@ -0,0 +1,127 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _MULTI_BUFFER_H_ +#define _MULTI_BUFFER_H_ + +/** + * @file multi_buffer.h + * @brief Multi-buffer common fields + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef __FreeBSD__ +#include <sys/types.h> +#include <sys/endian.h> +# define _byteswap_uint64(x) bswap64(x) +# define _byteswap_ulong(x) bswap32(x) +#elif defined (__APPLE__) +#include <libkern/OSByteOrder.h> +# define _byteswap_uint64(x) OSSwapInt64(x) +# define _byteswap_ulong(x) OSSwapInt32(x) +#elif defined (__GNUC__) && !defined (__MINGW32__) +# include <byteswap.h> +# define _byteswap_uint64(x) bswap_64(x) +# define _byteswap_ulong(x) bswap_32(x) +#endif + +/** + * @enum JOB_STS + * @brief Job return codes + */ + +typedef enum {STS_UNKNOWN = 0, //!< STS_UNKNOWN + STS_BEING_PROCESSED = 1,//!< STS_BEING_PROCESSED + STS_COMPLETED = 2, //!< STS_COMPLETED + STS_INTERNAL_ERROR, //!< STS_INTERNAL_ERROR + STS_ERROR //!< STS_ERROR +} JOB_STS; + +#define HASH_MB_NO_FLAGS 0 +#define HASH_MB_FIRST 1 +#define HASH_MB_LAST 2 + +/* Common flags for the new API only + * */ + +/** + * @enum HASH_CTX_FLAG + * @brief CTX job type + */ +typedef enum { + HASH_UPDATE = 0x00, //!< HASH_UPDATE + HASH_FIRST = 0x01, //!< HASH_FIRST + HASH_LAST = 0x02, //!< HASH_LAST + HASH_ENTIRE = 0x03, //!< HASH_ENTIRE +} HASH_CTX_FLAG; + +/** + * @enum HASH_CTX_STS + * @brief CTX status flags + */ +typedef enum { + HASH_CTX_STS_IDLE = 0x00, //!< HASH_CTX_STS_IDLE + HASH_CTX_STS_PROCESSING = 0x01, //!< HASH_CTX_STS_PROCESSING + HASH_CTX_STS_LAST = 0x02, //!< HASH_CTX_STS_LAST + HASH_CTX_STS_COMPLETE = 0x04, //!< HASH_CTX_STS_COMPLETE +} HASH_CTX_STS; + +/** + * @enum HASH_CTX_ERROR + * @brief CTX error flags + */ +typedef enum { + HASH_CTX_ERROR_NONE = 0, //!< HASH_CTX_ERROR_NONE + HASH_CTX_ERROR_INVALID_FLAGS = -1, //!< HASH_CTX_ERROR_INVALID_FLAGS + HASH_CTX_ERROR_ALREADY_PROCESSING = -2, //!< HASH_CTX_ERROR_ALREADY_PROCESSING + HASH_CTX_ERROR_ALREADY_COMPLETED = -3, //!< HASH_CTX_ERROR_ALREADY_COMPLETED +} HASH_CTX_ERROR; + + +#define hash_ctx_user_data(ctx) ((ctx)->user_data) +#define hash_ctx_digest(ctx) ((ctx)->job.result_digest) +#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING) +#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE) +#define hash_ctx_status(ctx) ((ctx)->status) +#define hash_ctx_error(ctx) ((ctx)->error) +#define hash_ctx_init(ctx) \ + do { \ + (ctx)->error = HASH_CTX_ERROR_NONE; \ + (ctx)->status = HASH_CTX_STS_COMPLETE; \ + } while(0) + +#ifdef __cplusplus +} +#endif + +#endif // _MULTI_BUFFER_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm b/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm new file mode 100644 index 00000000..45f87a9b --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm @@ -0,0 +1,271 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef _MULTIBINARY_ASM_ +%define _MULTIBINARY_ASM_ + +%ifidn __OUTPUT_FORMAT__, elf32 + %define mbin_def_ptr dd + %define mbin_ptr_sz dword + %define mbin_rdi edi + %define mbin_rsi esi + %define mbin_rax eax + %define mbin_rbx ebx + %define mbin_rcx ecx + %define mbin_rdx edx +%else + %define mbin_def_ptr dq + %define mbin_ptr_sz qword + %define mbin_rdi rdi + %define mbin_rsi rsi + %define mbin_rax rax + %define mbin_rbx rbx + %define mbin_rcx rcx + %define mbin_rdx rdx +%endif + +;;;; +; multibinary macro: +; creates the visable entry point that uses HW optimized call pointer +; creates the init of the HW optimized call pointer +;;;; +%macro mbin_interface 1 + ;;;; + ; *_dispatched is defaulted to *_mbinit and replaced on first call. + ; Therefore, *_dispatch_init is only executed on first call. + ;;;; + section .data + %1_dispatched: + mbin_def_ptr %1_mbinit + + section .text + global %1:function + %1_mbinit: + ;;; only called the first time to setup hardware match + call %1_dispatch_init + ;;; falls thru to execute the hw optimized code + %1: + jmp mbin_ptr_sz [%1_dispatched] +%endmacro + +;;;;; +; mbin_dispatch_init parameters +; Use this function when SSE/00/01 is a minimum requirement +; 1-> function name +; 2-> SSE/00/01 optimized function used as base +; 3-> AVX or AVX/02 opt func +; 4-> AVX2 or AVX/04 opt func +;;;;; +%macro mbin_dispatch_init 4 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01 + + mov eax, 1 + cpuid + and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func + jne _%1_init_done ; AVX is not available so end + mov mbin_rsi, mbin_rbx + + ;; Try for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func + cmovne mbin_rsi, mbin_rbx + + ;; Does it have xmm and ymm support + xor ecx, ecx + xgetbv + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + je _%1_init_done + lea mbin_rsi, [%2 WRT_OPT] + + _%1_init_done: + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +;;;;; +; mbin_dispatch_init2 parameters +; Cases where only base functions are available +; 1-> function name +; 2-> base function +;;;;; +%macro mbin_dispatch_init2 2 + section .text + %1_dispatch_init: + push mbin_rsi + lea mbin_rsi, [%2 WRT_OPT] ; Default + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +;;;;; +; mbin_dispatch_init5 parameters +; 1-> function name +; 2-> base function +; 3-> SSE4_1 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +;;;;; +%macro mbin_dispatch_init5 5 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + ; Test for SSE4.1 + test ecx, FLAG_CPUID1_ECX_SSE4_1 + lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func + cmovne mbin_rsi, mbin_rbx + + and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE) + lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func + jne _%1_init_done ; AVX is not available so end + mov mbin_rsi, mbin_rbx + + ;; Try for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func + cmovne mbin_rsi, mbin_rbx + + ;; Does it have xmm and ymm support + xor ecx, ecx + xgetbv + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + je _%1_init_done + lea mbin_rsi, [%3 WRT_OPT] + + _%1_init_done: + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +;;;;; +; mbin_dispatch_init6 parameters +; 1-> function name +; 2-> base function +; 3-> SSE4_1 or 00/01 optimized function +; 4-> AVX/02 opt func +; 5-> AVX2/04 opt func +; 6-> AVX512/06 opt func +;;;;; +%macro mbin_dispatch_init6 6 + section .text + %1_dispatch_init: + push mbin_rsi + push mbin_rax + push mbin_rbx + push mbin_rcx + push mbin_rdx + push mbin_rdi + lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function + + mov eax, 1 + cpuid + mov ebx, ecx ; save cpuid1.ecx + test ecx, FLAG_CPUID1_ECX_SSE4_1 + je _%1_init_done ; Use base function if no SSE4_1 + lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt + + ;; Test for XMM_YMM support/AVX + test ecx, FLAG_CPUID1_ECX_OSXSAVE + je _%1_init_done + xor ecx, ecx + xgetbv ; xcr -> edx:eax + mov edi, eax ; save xgetvb.eax + + and eax, FLAG_XGETBV_EAX_XMM_YMM + cmp eax, FLAG_XGETBV_EAX_XMM_YMM + jne _%1_init_done + test ebx, FLAG_CPUID1_ECX_AVX + je _%1_init_done + lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt + + ;; Test for AVX2 + xor ecx, ecx + mov eax, 7 + cpuid + test ebx, FLAG_CPUID7_EBX_AVX2 + je _%1_init_done ; No AVX2 possible + lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func + + ;; Test for AVX512 + and edi, FLAG_XGETBV_EAX_ZMM_OPM + cmp edi, FLAG_XGETBV_EAX_ZMM_OPM + jne _%1_init_done ; No AVX512 possible + and ebx, FLAGS_CPUID7_ECX_AVX512_G1 + cmp ebx, FLAGS_CPUID7_ECX_AVX512_G1 + lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt + cmove mbin_rsi, mbin_rbx + + _%1_init_done: + pop mbin_rdi + pop mbin_rdx + pop mbin_rcx + pop mbin_rbx + pop mbin_rax + mov [%1_dispatched], mbin_rsi + pop mbin_rsi + ret +%endmacro + +%endif ; ifndef _MULTIBINARY_ASM_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm b/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm new file mode 100644 index 00000000..64064a1a --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm @@ -0,0 +1,149 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +; Copyright(c) 2011-2016 Intel Corporation All rights reserved. +; +; Redistribution and use in source and binary forms, with or without +; modification, are permitted provided that the following conditions +; are met: +; * Redistributions of source code must retain the above copyright +; notice, this list of conditions and the following disclaimer. +; * Redistributions in binary form must reproduce the above copyright +; notice, this list of conditions and the following disclaimer in +; the documentation and/or other materials provided with the +; distribution. +; * Neither the name of Intel Corporation nor the names of its +; contributors may be used to endorse or promote products derived +; from this software without specific prior written permission. +; +; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +%ifndef _REG_SIZES_ASM_ +%define _REG_SIZES_ASM_ + +%ifdef __NASM_VER__ +%ifidn __OUTPUT_FORMAT__, win64 +%error nasm not supported in windows +%else +%define endproc_frame +%endif +%endif + +%define EFLAGS_HAS_CPUID (1<<21) +%define FLAG_CPUID1_ECX_CLMUL (1<<1) +%define FLAG_CPUID1_EDX_SSE2 (1<<26) +%define FLAG_CPUID1_ECX_SSE3 (1) +%define FLAG_CPUID1_ECX_SSE4_1 (1<<19) +%define FLAG_CPUID1_ECX_SSE4_2 (1<<20) +%define FLAG_CPUID1_ECX_POPCNT (1<<23) +%define FLAG_CPUID1_ECX_AESNI (1<<25) +%define FLAG_CPUID1_ECX_OSXSAVE (1<<27) +%define FLAG_CPUID1_ECX_AVX (1<<28) +%define FLAG_CPUID1_EBX_AVX2 (1<<5) + +%define FLAG_CPUID7_EBX_AVX2 (1<<5) +%define FLAG_CPUID7_EBX_AVX512F (1<<16) +%define FLAG_CPUID7_EBX_AVX512DQ (1<<17) +%define FLAG_CPUID7_EBX_AVX512IFMA (1<<21) +%define FLAG_CPUID7_EBX_AVX512PF (1<<26) +%define FLAG_CPUID7_EBX_AVX512ER (1<<27) +%define FLAG_CPUID7_EBX_AVX512CD (1<<28) +%define FLAG_CPUID7_EBX_AVX512BW (1<<30) +%define FLAG_CPUID7_EBX_AVX512VL (1<<31) +%define FLAG_CPUID7_ECX_AVX512VBMI (1<<1) + +%define FLAGS_CPUID7_ECX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ) + +%define FLAG_XGETBV_EAX_XMM (1<<1) +%define FLAG_XGETBV_EAX_YMM (1<<2) +%define FLAG_XGETBV_EAX_XMM_YMM 0x6 +%define FLAG_XGETBV_EAX_ZMM_OPM 0xe0 + +%define FLAG_CPUID1_EAX_AVOTON 0x000406d0 +%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0 + +; define d and w variants for registers + +%define raxd eax +%define raxw ax +%define raxb al + +%define rbxd ebx +%define rbxw bx +%define rbxb bl + +%define rcxd ecx +%define rcxw cx +%define rcxb cl + +%define rdxd edx +%define rdxw dx +%define rdxb dl + +%define rsid esi +%define rsiw si +%define rsib sil + +%define rdid edi +%define rdiw di +%define rdib dil + +%define rbpd ebp +%define rbpw bp +%define rbpb bpl + +%define ymm0x xmm0 +%define ymm1x xmm1 +%define ymm2x xmm2 +%define ymm3x xmm3 +%define ymm4x xmm4 +%define ymm5x xmm5 +%define ymm6x xmm6 +%define ymm7x xmm7 +%define ymm8x xmm8 +%define ymm9x xmm9 +%define ymm10x xmm10 +%define ymm11x xmm11 +%define ymm12x xmm12 +%define ymm13x xmm13 +%define ymm14x xmm14 +%define ymm15x xmm15 + +%define DWORD(reg) reg %+ d +%define WORD(reg) reg %+ w +%define BYTE(reg) reg %+ b + +%define XWORD(reg) reg %+ x + +%ifidn __OUTPUT_FORMAT__,elf32 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%endif +%ifidn __OUTPUT_FORMAT__,elf64 +section .note.GNU-stack noalloc noexec nowrite progbits +section .text +%endif +%ifidn __OUTPUT_FORMAT__, macho64 +%define elf64 macho64 +%endif + +%macro slversion 4 + section .text + global %1_slver_%2%3%4 + global %1_slver + %1_slver: + %1_slver_%2%3%4: + dw 0x%4 + db 0x%3, 0x%2 +%endmacro + +%endif ; ifndef _REG_SIZES_ASM_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h new file mode 100644 index 00000000..7ddeb45f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h @@ -0,0 +1,377 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SHA1_MB_H_ +#define _SHA1_MB_H_ + +/** + * @file sha1_mb.h + * @brief Multi-buffer CTX API SHA1 function prototypes and structures + * + * Interface for multi-buffer SHA1 functions + * + * <b> Multi-buffer SHA1 Entire or First-Update..Update-Last </b> + * + * The interface to this multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the SHA1_HASH_CTX_MGR and + * SHA1_HASH_CTX objects. Numerous SHA1_HASH_CTX objects may be instantiated by the + * application for use with a single SHA1_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * <b>Note:</b> The submit function does not require data buffers to be block sized. + * + * The SHA1 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and + * AVX512. In addition, a multibinary interface is provided, which selects the appropriate + * architecture-specific function at runtime. + * + * <b>Usage:</b> The application creates a SHA1_HASH_CTX_MGR object and initializes it + * with a call to sha1_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the + * multibinary version). The SHA1_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 4 SHA1_HASH_CTX objects (or 8 in the AVX2 case, 16 in the AVX512) + * being processed at a time. + * + * Each SHA1_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the SHA1_HASH_CTX to a SHA1_HASH_CTX_MGR using the submit functions + * sha1_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA1_HASH_CTX is + * returned to the application (via this or a later call to sha1_ctx_mgr_submit*() or + * sha1_ctx_mgr_flush*()), the application can then re-submit it with another call to + * sha1_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, sha1_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a SHA1_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The SHA1_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + * <ul> + * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + * <li> two buffers: HASH_FIRST, HASH_LAST + * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + * </ul> + * + * The order in which SHA1_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + * <ul> + * <li> Submitting flags other than the allowed entire/first/update/last values + * <li> Submitting a context that is currently being managed by a SHA1_HASH_CTX_MGR. + * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + * </ul> + * + * These error conditions are reported by returning the SHA1_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the SHA1_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include <stdint.h> +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include <stdbool.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define SHA1_DIGEST_NWORDS 5 +#define SHA1_MAX_LANES 16 +#define SHA1_X8_LANES 8 +#define SHA1_MIN_LANES 4 +#define SHA1_BLOCK_SIZE 64 +#define SHA1_LOG2_BLOCK_SIZE 6 +#define SHA1_PADLENGTHFIELD_SIZE 8 +#define SHA1_INITIAL_DIGEST \ + 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0 + +typedef uint32_t sha1_digest_array[SHA1_DIGEST_NWORDS][SHA1_MAX_LANES]; +typedef uint32_t SHA1_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SHA1 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint32_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[SHA1_DIGEST_NWORDS],64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} SHA1_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SHA1 job */ + +typedef struct { + sha1_digest_array digest; + uint8_t* data_ptr[SHA1_MAX_LANES]; +} SHA1_MB_ARGS_X16; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SHA1_JOB *job_in_lane; +} SHA1_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SHA1 jobs */ + +typedef struct { + SHA1_MB_ARGS_X16 args; + uint32_t lens[SHA1_MAX_LANES]; + uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7 or 0...15) of unused lanes, nibble 4 or 8 is set to F as a flag + SHA1_LANE_DATA ldata[SHA1_MAX_LANES]; + uint32_t num_lanes_inuse; +} SHA1_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SHA1 jobs */ + +typedef struct { + SHA1_MB_JOB_MGR mgr; +} SHA1_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SHA1 job for the multi-buffer CTX manager */ + +typedef struct { + SHA1_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint32_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} SHA1_HASH_CTX; + +/******************************************************************* + * Context level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level SHA1 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_sse (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_sse (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_sse (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx2 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx2 (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx2 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init_avx512 (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx512 (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx512 (SHA1_HASH_CTX_MGR* mgr); + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the SHA1 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha1_ctx_mgr_init (SHA1_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA1 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_submit (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA1 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA1_HASH_CTX* sha1_ctx_mgr_flush (SHA1_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void sha1_mb_mgr_init_sse (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_sse (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_sse (SHA1_MB_JOB_MGR *state); + +#define sha1_mb_mgr_init_avx sha1_mb_mgr_init_sse +SHA1_JOB* sha1_mb_mgr_submit_avx (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx (SHA1_MB_JOB_MGR *state); + +void sha1_mb_mgr_init_avx2 (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_avx2 (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx2 (SHA1_MB_JOB_MGR *state); + +void sha1_mb_mgr_init_avx512 (SHA1_MB_JOB_MGR *state); +SHA1_JOB* sha1_mb_mgr_submit_avx512 (SHA1_MB_JOB_MGR *state, SHA1_JOB* job); +SHA1_JOB* sha1_mb_mgr_flush_avx512 (SHA1_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _SHA1_MB_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h new file mode 100644 index 00000000..cd48508d --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h @@ -0,0 +1,376 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SHA256_MB_H_ +#define _SHA256_MB_H_ + +/** + * @file sha256_mb.h + * @brief Multi-buffer CTX API SHA256 function prototypes and structures + * + * Interface for multi-buffer SHA256 functions + * + * <b> Multi-buffer SHA256 Entire or First-Update..Update-Last </b> + * + * The interface to this multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the SHA256_HASH_CTX_MGR and + * SHA256_HASH_CTX objects. Numerous SHA256_HASH_CTX objects may be instantiated by the + * application for use with a single SHA256_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * <b>Note:</b> The submit function does not require data buffers to be block sized. + * + * The SHA256 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and + * AVX512. In addition, a multibinary interface is provided, which selects the appropriate + * architecture-specific function at runtime. + * + * <b>Usage:</b> The application creates a SHA256_HASH_CTX_MGR object and initializes it + * with a call to sha256_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the + * multibinary version). The SHA256_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 4 SHA256_HASH_CTX objects (or 8 in the AVX2 case, 16 in the AVX512) + * being processed at a time. + * + * Each SHA256_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the SHA256_HASH_CTX to a SHA256_HASH_CTX_MGR using the submit functions + * sha256_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA256_HASH_CTX is + * returned to the application (via this or a later call to sha256_ctx_mgr_submit*() or + * sha256_ctx_mgr_flush*()), the application can then re-submit it with another call to + * sha256_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, sha256_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a SHA256_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The SHA256_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + * <ul> + * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + * <li> two buffers: HASH_FIRST, HASH_LAST + * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + * </ul> + * + * The order in which SHA256_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + * <ul> + * <li> Submitting flags other than the allowed entire/first/update/last values + * <li> Submitting a context that is currently being managed by a SHA256_HASH_CTX_MGR. + * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + * </ul> + * + * These error conditions are reported by returning the SHA256_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the SHA256_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include <stdint.h> +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include <stdbool.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define SHA256_DIGEST_NWORDS 8 +#define SHA256_MAX_LANES 16 +#define SHA256_X8_LANES 8 +#define SHA256_MIN_LANES 4 +#define SHA256_BLOCK_SIZE 64 +#define SHA256_LOG2_BLOCK_SIZE 6 +#define SHA256_PADLENGTHFIELD_SIZE 8 +#define SHA256_INITIAL_DIGEST \ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, \ + 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19 + +typedef uint32_t sha256_digest_array[SHA256_DIGEST_NWORDS][SHA256_MAX_LANES]; +typedef uint32_t SHA256_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SHA256 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint64_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint32_t result_digest[SHA256_DIGEST_NWORDS], 64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} SHA256_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SHA256 job */ + +typedef struct { + sha256_digest_array digest; + uint8_t* data_ptr[SHA256_MAX_LANES]; +} SHA256_MB_ARGS_X16; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SHA256_JOB *job_in_lane; +} SHA256_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SHA256 jobs */ + +typedef struct { + SHA256_MB_ARGS_X16 args; + uint32_t lens[SHA256_MAX_LANES]; + uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7) of unused lanes, nibble 4 or 8 is set to F as a flag + SHA256_LANE_DATA ldata[SHA256_MAX_LANES]; + uint32_t num_lanes_inuse; +} SHA256_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SHA256 jobs */ + +typedef struct { + SHA256_MB_JOB_MGR mgr; +} SHA256_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SHA256 job for the multi-buffer CTX manager */ + +typedef struct { + SHA256_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint32_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} SHA256_HASH_CTX; + +/******************************************************************* + * CTX level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level SHA256 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_sse (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_sse (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_sse (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx2 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx2 (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx2 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init_avx512 (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx512 (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx512 (SHA256_HASH_CTX_MGR* mgr); +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the SHA256 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha256_ctx_mgr_init (SHA256_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA256 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_submit (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA256 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA256_HASH_CTX* sha256_ctx_mgr_flush (SHA256_HASH_CTX_MGR* mgr); + + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void sha256_mb_mgr_init_sse(SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_sse(SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_sse(SHA256_MB_JOB_MGR *state); + +#define sha256_mb_mgr_init_avx sha256_mb_mgr_init_sse +SHA256_JOB* sha256_mb_mgr_submit_avx (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx (SHA256_MB_JOB_MGR *state); + +void sha256_mb_mgr_init_avx2 (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_avx2 (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx2 (SHA256_MB_JOB_MGR *state); + +void sha256_mb_mgr_init_avx512 (SHA256_MB_JOB_MGR *state); +SHA256_JOB* sha256_mb_mgr_submit_avx512 (SHA256_MB_JOB_MGR *state, SHA256_JOB* job); +SHA256_JOB* sha256_mb_mgr_flush_avx512 (SHA256_MB_JOB_MGR *state); +#ifdef __cplusplus +} +#endif + +#endif // _SHA256_MB_H_ diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h new file mode 100644 index 00000000..aee6156f --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h @@ -0,0 +1,422 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + +#ifndef _SHA512_MB_H_ +#define _SHA512_MB_H_ + +/** + * @file sha512_mb.h + * @brief Single/Multi-buffer CTX API SHA512 function prototypes and structures + * + * Interface for single and multi-buffer SHA512 functions + * + * <b> Single/Multi-buffer SHA512 Entire or First-Update..Update-Last </b> + * + * The interface to this single/multi-buffer hashing code is carried out through the + * context-level (CTX) init, submit and flush functions and the SHA512_HASH_CTX_MGR and + * SHA512_HASH_CTX objects. Numerous SHA512_HASH_CTX objects may be instantiated by the + * application for use with a single SHA512_HASH_CTX_MGR. + * + * The CTX interface functions carry out the initialization and padding of the jobs + * entered by the user and add them to the multi-buffer manager. The lower level "scheduler" + * layer then processes the jobs in an out-of-order manner. The scheduler layer functions + * are internal and are not intended to be invoked directly. Jobs can be submitted + * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial + * jobs which can be started using the HASH_FIRST flag, and later resumed or finished + * using the HASH_UPDATE and HASH_LAST flags respectively. + * + * <b>Note:</b> The submit function does not require data buffers to be block sized. + * + * The SHA512 CTX interface functions are available for 5 architectures: multi-buffer SSE, + * AVX, AVX2, AVX512 and single-buffer SSE4 (which is used in the same way as the + * multi-buffer code). In addition, a multibinary interface is provided, which selects the + * appropriate architecture-specific function at runtime. This multibinary interface + * selects the single buffer SSE4 functions when the platform is detected to be Silvermont. + * + * <b>Usage:</b> The application creates a SHA512_HASH_CTX_MGR object and initializes it + * with a call to sha512_ctx_mgr_init*() function, where henceforth "*" stands for the + * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the + * multibinary version). The SHA512_HASH_CTX_MGR object will be used to schedule processor + * resources, with up to 2 SHA512_HASH_CTX objects (or 4 in the AVX2 case, 8 in the AVX512 + * case) being processed at a time. + * + * Each SHA512_HASH_CTX must be initialized before first use by the hash_ctx_init macro + * defined in multi_buffer.h. After initialization, the application may begin computing + * a hash by giving the SHA512_HASH_CTX to a SHA512_HASH_CTX_MGR using the submit functions + * sha512_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA512_HASH_CTX is + * returned to the application (via this or a later call to sha512_ctx_mgr_submit*() or + * sha512_ctx_mgr_flush*()), the application can then re-submit it with another call to + * sha512_ctx_mgr_submit*(), but without the HASH_FIRST flag set. + * + * Ideally, on the last buffer for that hash, sha512_ctx_mgr_submit_sse is called with + * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero + * length if necessary. When a SHA512_HASH_CTX is returned after having been submitted with + * HASH_LAST, it will contain a valid hash. The SHA512_HASH_CTX can be reused immediately + * by submitting with HASH_FIRST. + * + * For example, you would submit hashes with the following flags for the following numbers + * of buffers: + * <ul> + * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE) + * <li> two buffers: HASH_FIRST, HASH_LAST + * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST + * etc. + * </ul> + * + * The order in which SHA512_CTX objects are returned is in general different from the order + * in which they are submitted. + * + * A few possible error conditions exist: + * <ul> + * <li> Submitting flags other than the allowed entire/first/update/last values + * <li> Submitting a context that is currently being managed by a SHA512_HASH_CTX_MGR. (Note: + * This error case is not applicable to the single buffer SSE4 version) + * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set. + * </ul> + * + * These error conditions are reported by returning the SHA512_HASH_CTX immediately after + * a submit with its error member set to a non-zero error code (defined in + * multi_buffer.h). No changes are made to the SHA512_HASH_CTX_MGR in the case of an + * error; no processing is done for other hashes. + * + */ + +#include <stdint.h> +#include "multi_buffer.h" +#include "types.h" + +#ifndef _MSC_VER +#include <stdbool.h> +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +// Hash Constants and Typedefs +#define SHA512_DIGEST_NWORDS 8 +#define SHA512_MAX_LANES 8 +#define SHA512_X4_LANES 4 +#define SHA512_MIN_LANES 2 +#define SHA512_BLOCK_SIZE 128 +#define SHA512_LOG2_BLOCK_SIZE 7 +#define SHA512_PADLENGTHFIELD_SIZE 16 +#define SHA512_INITIAL_DIGEST \ + 0x6a09e667f3bcc908,0xbb67ae8584caa73b,0x3c6ef372fe94f82b,0xa54ff53a5f1d36f1, \ + 0x510e527fade682d1,0x9b05688c2b3e6c1f,0x1f83d9abfb41bd6b,0x5be0cd19137e2179 + + +typedef uint64_t sha512_digest_array[SHA512_DIGEST_NWORDS][SHA512_MAX_LANES]; +typedef uint64_t SHA512_WORD_T; + +/** @brief Scheduler layer - Holds info describing a single SHA512 job for the multi-buffer manager */ + +typedef struct { + uint8_t* buffer; //!< pointer to data buffer for this job + uint64_t len; //!< length of buffer for this job in blocks. + DECLARE_ALIGNED(uint64_t result_digest[SHA512_DIGEST_NWORDS], 64); + JOB_STS status; //!< output job status + void* user_data; //!< pointer for user's job-related data +} SHA512_JOB; + +/** @brief Scheduler layer - Holds arguments for submitted SHA512 job */ + +typedef struct { + sha512_digest_array digest; + uint8_t* data_ptr[SHA512_MAX_LANES]; +} SHA512_MB_ARGS_X8; + +/** @brief Scheduler layer - Lane data */ + +typedef struct { + SHA512_JOB *job_in_lane; +} SHA512_LANE_DATA; + +/** @brief Scheduler layer - Holds state for multi-buffer SHA512 jobs */ + +typedef struct { + SHA512_MB_ARGS_X8 args; + uint64_t lens[SHA512_MAX_LANES]; + uint64_t unused_lanes; //!< each byte is index (00, 01 or 00...03) of unused lanes, byte 2 or 4 is set to FF as a flag + SHA512_LANE_DATA ldata[SHA512_MAX_LANES]; + uint32_t num_lanes_inuse; +} SHA512_MB_JOB_MGR; + +/** @brief Context layer - Holds state for multi-buffer SHA512 jobs */ + +typedef struct { + SHA512_MB_JOB_MGR mgr; +} SHA512_HASH_CTX_MGR; + +/** @brief Context layer - Holds info describing a single SHA512 job for the multi-buffer CTX manager */ + +typedef struct { + SHA512_JOB job; // Must be at struct offset 0. + HASH_CTX_STS status; //!< Context status flag + HASH_CTX_ERROR error; //!< Context error flag + uint32_t total_length; //!< Running counter of length processed for this CTX's job + const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job + uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes. + uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; //!< CTX partial blocks + uint32_t partial_block_buffer_length; + void* user_data; //!< pointer for user to keep any job-related data +} SHA512_HASH_CTX; + +/******************************************************************* + * Context level API function prototypes + ******************************************************************/ + +/** + * @brief Initialize the context level SHA512 multi-buffer manager structure. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_sse (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the context level multi-buffer manager. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_sse (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires SSE4.1 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_sse (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_avx (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires AVX + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_avx2 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx2 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires AVX2 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx2 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_avx512 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx512 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx512 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires SSE4 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init_sb_sse4 (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires SSE4 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit_sb_sse4 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires SSE4 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush_sb_sse4 (SHA512_HASH_CTX_MGR* mgr); + +/******************** multibinary function prototypes **********************/ + +/** + * @brief Initialize the SHA512 multi-buffer manager structure. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns void + */ +void sha512_ctx_mgr_init (SHA512_HASH_CTX_MGR* mgr); + +/** + * @brief Submit a new SHA512 job to the multi-buffer manager. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @param ctx Structure holding ctx job info + * @param buffer Pointer to buffer to be processed + * @param len Length of buffer (in bytes) to be processed + * @param flags Input flag specifying job type (first, update, last or entire) + * @returns NULL if no jobs complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_submit (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx, + const void* buffer, uint32_t len, HASH_CTX_FLAG flags); + +/** + * @brief Finish all submitted SHA512 jobs and return when complete. + * @requires SSE4.1 or AVX or AVX2 or AVX512 + * + * @param mgr Structure holding context level state info + * @returns NULL if no jobs to complete or pointer to jobs structure. + */ +SHA512_HASH_CTX* sha512_ctx_mgr_flush (SHA512_HASH_CTX_MGR* mgr); + +/******************************************************************* + * Scheduler (internal) level out-of-order function prototypes + ******************************************************************/ + +void sha512_mb_mgr_init_sse (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_mb_mgr_submit_sse (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_sse (SHA512_MB_JOB_MGR *state); + +#define sha512_mb_mgr_init_avx sha512_mb_mgr_init_sse +SHA512_JOB* sha512_mb_mgr_submit_avx (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_avx (SHA512_MB_JOB_MGR *state); + +void sha512_mb_mgr_init_avx2 (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_mb_mgr_submit_avx2 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_avx2 (SHA512_MB_JOB_MGR *state); + +void sha512_mb_mgr_init_avx512 (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_mb_mgr_submit_avx512 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_mb_mgr_flush_avx512 (SHA512_MB_JOB_MGR *state); + +// Single buffer SHA512 APIs, optimized for SLM. +void sha512_sse4 (const void* M, void* D, uint64_t L); +// Note that these APIs comply with multi-buffer APIs' high level usage +void sha512_sb_mgr_init_sse4 (SHA512_MB_JOB_MGR *state); +SHA512_JOB* sha512_sb_mgr_submit_sse4 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job); +SHA512_JOB* sha512_sb_mgr_flush_sse4 (SHA512_MB_JOB_MGR *state); + +#ifdef __cplusplus +} +#endif + +#endif // _SHA512_MB_H_ + + diff --git a/src/crypto/isa-l/isa-l_crypto/include/test.h b/src/crypto/isa-l/isa-l_crypto/include/test.h new file mode 100644 index 00000000..41a21626 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/test.h @@ -0,0 +1,81 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +#ifndef _TEST_H +#define _TEST_H + +#ifdef __cplusplus +extern "C" { +#endif + +// Use sys/time.h functions for time + +#include <sys/time.h> + +struct perf{ + struct timeval tv; +}; + + +inline int perf_start(struct perf *p) +{ + return gettimeofday(&(p->tv), 0); +} +inline int perf_stop(struct perf *p) +{ + return gettimeofday(&(p->tv), 0); +} + +inline void perf_print(struct perf stop, struct perf start, long long dsize) +{ + long long secs = stop.tv.tv_sec - start.tv.tv_sec; + long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec; + + printf("runtime = %10lld usecs", usecs); + if (dsize != 0) { +#if 1 // not bug in printf for 32-bit + printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024), + ((double) usecs)/1000000, ((double) dsize) / (double)usecs); +#else + printf(", bandwidth %lld MB ", dsize/(1024*1024)); + printf("in %.4f sec ",(double)usecs/1000000); + printf("= %.2f MB/s\n", (double)dsize/usecs); +#endif + } + else + printf("\n"); +} + + +#ifdef __cplusplus +} +#endif + +#endif // _TEST_H diff --git a/src/crypto/isa-l/isa-l_crypto/include/types.h b/src/crypto/isa-l/isa-l_crypto/include/types.h new file mode 100644 index 00000000..caf3bac5 --- /dev/null +++ b/src/crypto/isa-l/isa-l_crypto/include/types.h @@ -0,0 +1,71 @@ +/********************************************************************** + Copyright(c) 2011-2016 Intel Corporation All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +**********************************************************************/ + + +/** + * @file types.h + * @brief Defines common align and debug macros + * + */ + +#ifndef __TYPES_H +#define __TYPES_H + +#ifdef __cplusplus +extern "C" { +#endif + + +#if defined __unix__ || defined __APPLE__ +# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) +# define __forceinline static inline +# define aligned_free(x) free(x) +#else +# ifdef __MINGW32__ +# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval))) +# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn))) +# define aligned_free(x) _aligned_free(x) +# else +# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl +# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn))) +# define aligned_free(x) _aligned_free(x) +# endif +#endif + +#ifdef DEBUG +# define DEBUG_PRINT(x) printf x +#else +# define DEBUG_PRINT(x) do {} while (0) +#endif + +#ifdef __cplusplus +} +#endif + +#endif //__TYPES_H |