summaryrefslogtreecommitdiffstats
path: root/src/crypto/isa-l/isa-l_crypto/include
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/crypto/isa-l/isa-l_crypto/include
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/crypto/isa-l/isa-l_crypto/include')
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h161
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h340
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h76
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/aes_xts.h214
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/datastruct.asm79
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/intrinreg.h82
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/md5_mb.h372
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/memcpy.asm346
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h363
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h315
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h327
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h127
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/multibinary.asm271
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm149
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h377
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h376
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h422
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/test.h81
-rw-r--r--src/crypto/isa-l/isa-l_crypto/include/types.h71
19 files changed, 4549 insertions, 0 deletions
diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h b/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h
new file mode 100644
index 000000000..bff4a62d8
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/aes_cbc.h
@@ -0,0 +1,161 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/**
+ * @file aes_cbc.h
+ * @brief AES CBC encryption/decryption function prototypes.
+ *
+; References:
+ */
+#ifndef _AES_CBC_h
+#define _AES_CBC_h
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+
+#endif
+
+typedef enum cbc_key_size { CBC_128_BITS = 16, CBC_192_BITS = 24, CBC_256_BITS = 32} cbc_key_size;
+#define CBC_ROUND_KEY_LEN (16)
+#define CBC_128_KEY_ROUNDS (10+1) /*expanded key holds 10 key rounds plus original key*/
+#define CBC_192_KEY_ROUNDS (12+1) /*expanded key holds 12 key rounds plus original key*/
+#define CBC_256_KEY_ROUNDS (14+1) /*expanded key holds 14 key rounds plus original key*/
+#define CBC_MAX_KEYS_SIZE (CBC_ROUND_KEY_LEN * CBC_256_KEY_ROUNDS)
+
+#define CBC_IV_DATA_LEN (16)
+
+/** @brief holds intermediate key data used in encryption/decryption
+ *
+ */
+struct cbc_key_data { // must be 16 byte aligned
+ uint8_t enc_keys[CBC_MAX_KEYS_SIZE];
+ uint8_t dec_keys[CBC_MAX_KEYS_SIZE];
+};
+
+/** @brief CBC-AES key pre-computation done once for a key
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ * arg 1: in: pointer to key
+ * arg 2: OUT: pointer to a key expanded data
+ */
+int aes_cbc_precomp(
+ uint8_t *key,
+ int key_size,
+ struct cbc_key_data *keys_blk
+);
+
+/** @brief CBC-AES 128 bit key Decryption
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ * arg 1: in: pointer to input (cipher text)
+ * arg 2: IV: pointer to IV, Must be 16 bytes aligned to a 16 byte boundary
+ * arg 3: keys: pointer to keys, Must be on a 16 byte boundary and length of key size * key rounds
+ * arg 4: OUT: pointer to output (plain text ... in-place allowed)
+ * arg 5: len_bytes: length in bytes (multiple of 16)
+ */
+void aes_cbc_dec_128(
+ void *in,
+ uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary
+ uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data
+ void *out,
+ uint64_t len_bytes); //!< Must be a multiple of 16 bytes
+
+/** @brief CBC-AES 192 bit key Decryption
+ *
+* @requires SSE4.1 and AESNI
+*
+*/
+void aes_cbc_dec_192(
+ void *in,
+ uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary
+ uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data
+ void *out,
+ uint64_t len_bytes); //!< Must be a multiple of 16 bytes
+
+/** @brief CBC-AES 256 bit key Decryption
+ *
+* @requires SSE4.1 and AESNI
+*
+*/
+void aes_cbc_dec_256(
+ void *in,
+ uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary
+ uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or dec_keys of cbc_key_data
+ void *out,
+ uint64_t len_bytes); //!< Must be a multiple of 16 bytes
+
+/** @brief CBC-AES 128 bit key Encryption
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ * arg 1: in: pointer to input (plain text)
+ * arg 2: IV: pointer to IV, Must be 16 bytes aligned to a 16 byte boundary
+ * arg 3: keys: pointer to keys, Must be on a 16 byte boundary and length of key size * key rounds
+ * arg 4: OUT: pointer to output (cipher text ... in-place allowed)
+ * arg 5: len_bytes: length in bytes (multiple of 16)
+ */
+int aes_cbc_enc_128(
+ void *in,
+ uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary
+ uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data
+ void *out,
+ uint64_t len_bytes); //!< Must be a multiple of 16 bytes
+
+/** @brief CBC-AES 192 bit key Encryption
+ *
+* @requires SSE4.1 and AESNI
+*
+*/
+int aes_cbc_enc_192(
+ void *in,
+ uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary
+ uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data
+ void *out,
+ uint64_t len_bytes); //!< Must be a multiple of 16 bytes
+
+/** @brief CBC-AES 256 bit key Encryption
+ *
+* @requires SSE4.1 and AESNI
+*
+*/
+int aes_cbc_enc_256(
+ void *in,
+ uint8_t *IV, //!< Must be 16 bytes aligned to a 16 byte boundary
+ uint8_t *keys, //!< Must be on a 16 byte boundary and length of key size * key rounds or enc_keys of cbc_key_data
+ void *out,
+ uint64_t len_bytes); //!< Must be a multiple of 16 bytes
+
+#ifdef __cplusplus
+}
+#endif //__cplusplus
+#endif //ifndef _AES_CBC_h
diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h b/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h
new file mode 100644
index 000000000..1e7127e95
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/aes_gcm.h
@@ -0,0 +1,340 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+/**
+ * @file aes_gcm.h
+ * @brief AES GCM encryption/decryption function prototypes.
+ *
+ * At build time there is an option to use non-temporal loads and stores
+ * selected by defining the compile time option NT_LDST. The use of this option
+ * places the following restriction on the gcm encryption functions:
+ *
+ * - The plaintext and cyphertext buffers must be aligned on a 16 byte boundary.
+ *
+ * - When using the streaming API, all partial input buffers must be a multiple
+ * of 16 bytes long except for the last input buffer.
+ *
+ * - In-place encryption/decryption is not recommended.
+ *
+ */
+
+/*
+; References:
+; This code was derived and highly optimized from the code described in paper:
+; Vinodh Gopal et. al. Optimized Galois-Counter-Mode Implementation on Intel Architecture Processors. August, 2010
+;
+; For the shift-based reductions used in this code, we used the method described in paper:
+; Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication Instruction and its Usage for Computing the GCM Mode. January, 2010.
+;
+;
+;
+; Assumptions: Support for SSE4.1 or greater, AVX or AVX2
+;
+;
+; iv:
+; 0 1 2 3
+; 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+; | Salt (From the SA) |
+; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+; | Initialization Vector |
+; | (This is the sequence number from IPSec header) |
+; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+; | 0x1 |
+; +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+;
+; TLen:
+; from the definition of the spec, TLen can only be 8, 12 or 16 bytes.
+;
+ */
+#ifndef _AES_GCM_h
+#define _AES_GCM_h
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8. */
+#define MAX_TAG_LEN (16)
+//
+// IV data is limited to 16 bytes. The last DWORD (4 bytes) must be 0x1
+//
+#define GCM_IV_LEN (16)
+#define GCM_IV_DATA_LEN (12)
+#define GCM_IV_END_MARK {0x00, 0x00, 0x00, 0x01};
+#define GCM_IV_END_START (12)
+
+#define LONGEST_TESTED_AAD_LENGTH (2* 1024)
+
+// Key lengths of 128 and 256 supported
+#define GCM_128_KEY_LEN (16)
+#define GCM_256_KEY_LEN (32)
+
+#define GCM_BLOCK_LEN 16
+#define GCM_ENC_KEY_LEN 16
+#define GCM_KEY_SETS (15) /*exp key + 14 exp round keys*/
+/** @brief holds intermediate key data needed to improve performance
+ *
+ * gcm_data hold internal key information used by gcm128 and gcm256.
+ */
+struct gcm_data {
+ uint8_t expanded_keys[GCM_ENC_KEY_LEN * GCM_KEY_SETS];
+ uint8_t shifted_hkey_1[GCM_ENC_KEY_LEN]; // store HashKey <<1 mod poly here
+ uint8_t shifted_hkey_2[GCM_ENC_KEY_LEN]; // store HashKey^2 <<1 mod poly here
+ uint8_t shifted_hkey_3[GCM_ENC_KEY_LEN]; // store HashKey^3 <<1 mod poly here
+ uint8_t shifted_hkey_4[GCM_ENC_KEY_LEN]; // store HashKey^4 <<1 mod poly here
+ uint8_t shifted_hkey_5[GCM_ENC_KEY_LEN]; // store HashKey^5 <<1 mod poly here
+ uint8_t shifted_hkey_6[GCM_ENC_KEY_LEN]; // store HashKey^6 <<1 mod poly here
+ uint8_t shifted_hkey_7[GCM_ENC_KEY_LEN]; // store HashKey^7 <<1 mod poly here
+ uint8_t shifted_hkey_8[GCM_ENC_KEY_LEN]; // store HashKey^8 <<1 mod poly here
+ uint8_t shifted_hkey_1_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_2_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^2 <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_3_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^3 <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_4_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^4 <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_5_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^5 <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_6_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^6 <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_7_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^7 <<1 mod poly here (for Karatsuba purposes)
+ uint8_t shifted_hkey_8_k[GCM_ENC_KEY_LEN]; // store XOR of High 64 bits and Low 64 bits of HashKey^8 <<1 mod poly here (for Karatsuba purposes)
+ // init, update and finalize context data
+ uint8_t aad_hash[GCM_BLOCK_LEN];
+ uint64_t aad_length;
+ uint64_t in_length;
+ uint8_t partial_block_enc_key[GCM_BLOCK_LEN];
+ uint8_t orig_IV[GCM_BLOCK_LEN];
+ uint8_t current_counter[GCM_BLOCK_LEN];
+ uint64_t partial_block_length;
+};
+
+/**
+ * @brief GCM-AES Encryption using 128 bit keys
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_enc(struct gcm_data *my_ctx_data,
+ uint8_t * out, //!< Ciphertext output. Encrypt in-place is allowed.
+ uint8_t const *in, //!< Plaintext input
+ uint64_t plaintext_len, //!< Length of data in Bytes for encryption.
+ uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer.
+ uint8_t const *aad, //!< Additional Authentication Data (AAD).
+ uint64_t aad_len, //!< Length of AAD.
+ uint8_t * auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8.
+ );
+
+
+/**
+ * @brief GCM-AES Decryption using 128 bit keys
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_dec(struct gcm_data *my_ctx_data,
+ uint8_t * out, //!< Plaintext output. Decrypt in-place is allowed.
+ uint8_t const *in, //!< Ciphertext input
+ uint64_t plaintext_len, //!< Length of data in Bytes for encryption.
+ uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer.
+ uint8_t const *aad, //!< Additional Authentication Data (AAD).
+ uint64_t aad_len, //!< Length of AAD.
+ uint8_t * auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8.
+ );
+
+/**
+ * @brief start a AES-128-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_init( struct gcm_data *my_ctx_data,
+ uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer.
+ uint8_t const *aad, //!< Additional Authentication Data (AAD).
+ uint64_t aad_len //!< Length of AAD.
+ );
+
+/**
+ * @brief encrypt a block of a AES-128-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_enc_update( struct gcm_data *my_ctx_data,
+ uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed.
+ const uint8_t *in, //!< Plaintext input
+ uint64_t plaintext_len //!< Length of data in Bytes for encryption.
+ );
+
+/**
+ * @brief decrypt a block of a AES-128-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_dec_update( struct gcm_data *my_ctx_data,
+ uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed.
+ const uint8_t *in, //!< Plaintext input
+ uint64_t plaintext_len //!< Length of data in Bytes for encryption.
+ );
+
+/**
+ * @brief End encryption of a AES-128-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_enc_finalize( struct gcm_data *my_ctx_data,
+ uint8_t *auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8.
+ );
+
+/**
+ * @brief End decryption of a AES-128-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm128_dec_finalize( struct gcm_data *my_ctx_data,
+ uint8_t *auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8.
+ );
+
+/**
+ * @brief pre-processes key data
+ *
+ * Prefills the gcm data with key values for each round and the initial sub hash key for tag encoding
+ */
+void aesni_gcm128_pre(uint8_t * key, struct gcm_data *gdata
+ );
+
+/**
+ * @brief GCM-AES Encryption using 256 bit keys
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_enc(struct gcm_data *my_ctx_data,
+ uint8_t * out, //!< Ciphertext output. Encrypt in-place is allowed.
+ uint8_t const *in, //!< Plaintext input
+ uint64_t plaintext_len, //!< Length of data in Bytes for encryption.
+ uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer.
+ uint8_t const *aad, //!< Additional Authentication Data (AAD).
+ uint64_t aad_len, //!< Length of AAD.
+ uint8_t * auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8.
+ );
+
+
+/**
+ * @brief GCM-AES Decryption using 256 bit keys
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_dec(struct gcm_data *my_ctx_data,
+ uint8_t * out, //!< Plaintext output. Decrypt in-place is allowed.
+ uint8_t const *in, //!< Ciphertext input
+ uint64_t plaintext_len, //!< Length of data in Bytes for encryption.
+ uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialisation Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer.
+ uint8_t const *aad, //!< Additional Authentication Data (AAD).
+ uint64_t aad_len, //!< Length of AAD.
+ uint8_t * auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes (must be a multiple of 4 bytes). Valid values are 16 (most likely), 12 or 8.
+ );
+
+/**
+ * @brief start a AES-256-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_init( struct gcm_data *my_ctx_data,
+ uint8_t * iv, //!< Pre-counter block j0: 4 byte salt (from Security Association) concatenated with 8 byte Initialization Vector (from IPSec ESP Payload) concatenated with 0x00000001. 16-byte pointer.
+ uint8_t const *aad, //!< Additional Authentication Data (AAD).
+ uint64_t aad_len //!< Length of AAD.
+ );
+
+/**
+ * @brief encrypt a block of a AES-256-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_enc_update( struct gcm_data *my_ctx_data,
+ uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed.
+ const uint8_t *in, //!< Plaintext input
+ uint64_t plaintext_len //!< Length of data in Bytes for encryption.
+ );
+
+/**
+ * @brief decrypt a block of a AES-256-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_dec_update( struct gcm_data *my_ctx_data,
+ uint8_t *out, //!< Ciphertext output. Encrypt in-place is allowed.
+ const uint8_t *in, //!< Plaintext input
+ uint64_t plaintext_len //!< Length of data in Bytes for encryption.
+ );
+
+/**
+ * @brief End encryption of a AES-256-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_enc_finalize( struct gcm_data *my_ctx_data,
+ uint8_t *auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8.
+ );
+
+/**
+ * @brief End decryption of a AES-256-GCM Encryption message
+ *
+ * @requires SSE4.1 and AESNI
+ *
+ */
+void aesni_gcm256_dec_finalize( struct gcm_data *my_ctx_data,
+ uint8_t *auth_tag, //!< Authenticated Tag output.
+ uint64_t auth_tag_len //!< Authenticated Tag Length in bytes. Valid values are 16 (most likely), 12 or 8.
+ );
+
+/**
+ * @brief pre-processes key data
+ *
+ * Prefills the gcm data with key values for each round and the initial sub hash key for tag encoding
+ */
+void aesni_gcm256_pre(uint8_t * key, struct gcm_data *gdata);
+
+#ifdef __cplusplus
+}
+#endif //__cplusplus
+#endif //ifndef _AES_GCM_h
diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h b/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h
new file mode 100644
index 000000000..dbbe25d14
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/aes_keyexp.h
@@ -0,0 +1,76 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _KEYEXP_128_H
+#define _KEYEXP_128_H
+
+/**
+ * @file aes_keyexp.h
+ * @brief AES key expansion functions
+ *
+ * This defines the interface to key expansion functions.
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @brief AES key expansion 128 bit
+* @requires SSE4.1
+*/
+void aes_keyexp_128(
+ uint8_t *key, //!< input key for AES-128, 16 bytes
+ uint8_t *exp_key_enc, //!< expanded encryption keys, 16*11 bytes
+ uint8_t *exp_key_dec //!< expanded decryption keys, 16*11 bytes
+ );
+
+/** @brief AES key expansion 192 bit
+* @requires SSE4.1
+*/
+void aes_keyexp_192(
+ uint8_t *key, //!< input key for AES-192, 16*1.5 bytes
+ uint8_t *exp_key_enc, //!< expanded encryption keys, 16*13 bytes
+ uint8_t *exp_key_dec //!< expanded decryption keys, 16*13 bytes
+ );
+
+/** @brief AES key expansion 256 bit
+* @requires SSE4.1
+*/
+void aes_keyexp_256(
+ uint8_t *key, //!< input key for AES-256, 16*2 bytes
+ uint8_t *exp_key_enc, //!< expanded encryption keys, 16*15 bytes
+ uint8_t *exp_key_dec //!< expanded decryption keys, 16*15 bytes
+ );
+
+#ifdef __cplusplus
+}
+#endif //__cplusplus
+#endif //ifndef _KEYEXP_128_H
diff --git a/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h b/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h
new file mode 100644
index 000000000..b2d569851
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/aes_xts.h
@@ -0,0 +1,214 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _AES_XTS_H
+#define _AES_XTS_H
+
+/**
+ * @file aes_xts.h
+ * @brief AES XTS encryption function prototypes.
+ *
+ * This defines the interface to optimized AES XTS functions
+
+<b>Pre-expanded keys</b>
+
+For key encryption, pre-expanded keys are stored in the order that they will be
+used. As an example, if Key[0] is the 128-bit initial key used for an AES-128
+encryption, the rest of the keys are stored as follows:
+
+<ul>
+ <li> Key[0] : Initial encryption key
+ <li> Key[1] : Round 1 encryption key
+ <li> Key[2] : Round 2 encryption key
+ <li> ...
+ <li> Key[10] : Round 10 encryption key
+</ul>
+
+For decryption, the order of keys is reversed. However, we apply the
+necessary aesimc instructions before storing the expanded keys. For the same key
+used above, the pre-expanded keys will be stored as follows:
+
+<ul>
+ <li> Key[0] : Round 10 encryption key
+ <li> Key[1] : aesimc(Round 9 encryption key)
+ <li> Key[2] : aesimc(Round 8 encryption key)
+ <li> ...
+ <li> Key[9] : aesimc(Round 1 encryption key)
+ <li> Key[10] : Initial encryption key
+</ul>
+
+<b>Note:</b> The expanded key decryption requires a decryption key only for the block
+decryption step. The tweak step in the expanded key decryption requires the same expanded
+encryption key that is used in the expanded key encryption.
+
+<b>Input and Output Buffers </b>
+
+The input and output buffers can be overlapping as long as the output buffer
+pointer is not less than the input buffer pointer. If the two pointers are the
+same, then encryption/decryption will occur in-place.
+
+<b>Data Length</b>
+
+<ul>
+ <li> The functions support data length of any bytes greater than or equal to 16 bytes.
+ <li> Data length is a 64-bit value, which makes the largest possible data length
+ 2^64 - 1 bytes.
+ <li> For data lengths from 0 to 15 bytes, the functions return without any error
+ codes, without reading or writing any data.
+ <li> The functions only support byte lengths, not bits.
+</ul>
+
+<b>Initial Tweak</b>
+
+The functions accept a 128-bit initial tweak value. The user is responsible for
+padding the initial tweak value to this length.
+
+<b>Data Alignment</b>
+
+The input and output buffers, keys, pre-expanded keys and initial tweak value
+are not required to be aligned to 16 bytes, any alignment works.
+
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/** @brief XTS-AES-128 Encryption
+ * @requires AES-NI
+ */
+
+void XTS_AES_128_enc(
+ uint8_t *k2, //!< key used for tweaking, 16 bytes
+ uint8_t *k1, //!< key used for encryption of tweaked plaintext, 16 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *pt, //!< plaintext sector input data
+ uint8_t *ct //!< ciphertext sector output data
+ );
+
+/** @brief XTS-AES-128 Encryption with pre-expanded keys
+ * @requires AES-NI
+ */
+
+void XTS_AES_128_enc_expanded_key(
+ uint8_t *k2, //!< expanded key used for tweaking, 16*11 bytes
+ uint8_t *k1, //!< expanded key used for encryption of tweaked plaintext, 16*11 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *pt, //!< plaintext sector input data
+ uint8_t *ct //!< ciphertext sector output data
+ );
+
+/** @brief XTS-AES-128 Decryption
+ * @requires AES-NI
+ */
+
+void XTS_AES_128_dec(
+ uint8_t *k2, //!< key used for tweaking, 16 bytes
+ uint8_t *k1, //!< key used for decryption of tweaked ciphertext, 16 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *ct, //!< ciphertext sector input data
+ uint8_t *pt //!< plaintext sector output data
+ );
+
+/** @brief XTS-AES-128 Decryption with pre-expanded keys
+ * @requires AES-NI
+ */
+
+void XTS_AES_128_dec_expanded_key(
+ uint8_t *k2, //!< expanded key used for tweaking, 16*11 bytes - encryption key is used
+ uint8_t *k1, //!< expanded decryption key used for decryption of tweaked ciphertext, 16*11 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *ct, //!< ciphertext sector input data
+ uint8_t *pt //!< plaintext sector output data
+ );
+
+/** @brief XTS-AES-256 Encryption
+ * @requires AES-NI
+ */
+
+void XTS_AES_256_enc(
+ uint8_t *k2, //!< key used for tweaking, 16*2 bytes
+ uint8_t *k1, //!< key used for encryption of tweaked plaintext, 16*2 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *pt, //!< plaintext sector input data
+ uint8_t *ct //!< ciphertext sector output data
+ );
+
+/** @brief XTS-AES-256 Encryption with pre-expanded keys
+ * @requires AES-NI
+ */
+
+void XTS_AES_256_enc_expanded_key(
+ uint8_t *k2, //!< expanded key used for tweaking, 16*15 bytes
+ uint8_t *k1, //!< expanded key used for encryption of tweaked plaintext, 16*15 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *pt, //!< plaintext sector input data
+ uint8_t *ct //!< ciphertext sector output data
+ );
+
+/** @brief XTS-AES-256 Decryption
+ * @requires AES-NI
+ */
+
+void XTS_AES_256_dec(
+ uint8_t *k2, //!< key used for tweaking, 16*2 bytes
+ uint8_t *k1, //!< key used for decryption of tweaked ciphertext, 16*2 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *ct, //!< ciphertext sector input data
+ uint8_t *pt //!< plaintext sector output data
+ );
+
+/** @brief XTS-AES-256 Decryption with pre-expanded keys
+ * @requires AES-NI
+ */
+
+void XTS_AES_256_dec_expanded_key(
+ uint8_t *k2, //!< expanded key used for tweaking, 16*15 bytes - encryption key is used
+ uint8_t *k1, //!< expanded decryption key used for decryption of tweaked ciphertext, 16*15 bytes
+ uint8_t *TW_initial, //!< initial tweak value, 16 bytes
+ uint64_t N, //!< sector size, in bytes
+ const uint8_t *ct, //!< ciphertext sector input data
+ uint8_t *pt //!< plaintext sector output data
+ );
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //_AES_XTS_H
diff --git a/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm b/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm
new file mode 100644
index 000000000..882e497f8
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/datastruct.asm
@@ -0,0 +1,79 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+; Macros for defining data structures
+
+; Usage example
+
+;START_FIELDS ; JOB_AES
+;;; name size align
+;FIELD _plaintext, 8, 8 ; pointer to plaintext
+;FIELD _ciphertext, 8, 8 ; pointer to ciphertext
+;FIELD _IV, 16, 8 ; IV
+;FIELD _keys, 8, 8 ; pointer to keys
+;FIELD _len, 4, 4 ; length in bytes
+;FIELD _status, 4, 4 ; status enumeration
+;FIELD _user_data, 8, 8 ; pointer to user data
+;UNION _union, size1, align1, \
+ size2, align2, \
+ size3, align3, \
+ ...
+;END_FIELDS
+;%assign _JOB_AES_size _FIELD_OFFSET
+;%assign _JOB_AES_align _STRUCT_ALIGN
+
+%ifndef _DATASTRUCT_ASM_
+%define _DATASTRUCT_ASM_
+
+;; START_FIELDS
+%macro START_FIELDS 0
+%assign _FIELD_OFFSET 0
+%assign _STRUCT_ALIGN 0
+%endm
+
+;; FIELD name size align
+%macro FIELD 3
+%define %%name %1
+%define %%size %2
+%define %%align %3
+
+%assign _FIELD_OFFSET (_FIELD_OFFSET + (%%align) - 1) & (~ ((%%align)-1))
+%%name equ _FIELD_OFFSET
+%assign _FIELD_OFFSET _FIELD_OFFSET + (%%size)
+%if (%%align > _STRUCT_ALIGN)
+%assign _STRUCT_ALIGN %%align
+%endif
+%endm
+
+;; END_FIELDS
+%macro END_FIELDS 0
+%assign _FIELD_OFFSET (_FIELD_OFFSET + _STRUCT_ALIGN-1) & (~ (_STRUCT_ALIGN-1))
+%endm
+
+%endif ; end ifdef _DATASTRUCT_ASM_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h b/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h
new file mode 100644
index 000000000..8ddf3b1a5
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/intrinreg.h
@@ -0,0 +1,82 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ * @file intrinreg.h
+ * @brief Defines intrinsic types used by the new hashing API
+ *
+ */
+
+#ifndef _IA64_REGS_H_
+#define _IA64_REGS_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+# define inline __inline
+#endif
+
+#ifndef __SSE__
+# define __ERR_COMPILER_FLAGS
+#endif
+
+#ifdef __ERR_COMPILER_FLAGS
+
+#ifdef __GNUC__
+# error "The compiler must be configured to optimize for speed and support at least SSE instructions."
+# error "Please specify the following gcc flags: -O3 [-mavx]"
+#endif
+
+#ifdef _MSC_VER
+# error "Please compile with ''/O2 /D__SSE__ /arch:SSE'' -OR- ''/O2 /D__AVX__ /arch:AVX''"
+#endif
+
+#endif
+
+#include <stdint.h>
+#include <immintrin.h>
+
+// Define available register types uniformly.
+/// @cond
+typedef struct{ uint8_t dummy; } intrinreg1;
+typedef struct{ uint16_t dummy; } intrinreg2;
+typedef struct{ uint32_t dummy; } intrinreg4;
+typedef struct{ uint64_t dummy; } intrinreg8;
+typedef __m128 intrinreg16;
+/// @endcond
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _IA64_REGS_H_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h b/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h
new file mode 100644
index 000000000..17a1b36ef
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/md5_mb.h
@@ -0,0 +1,372 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MD5_MB_H_
+#define _MD5_MB_H_
+
+/**
+ * @file md5_mb.h
+ * @brief Multi-buffer CTX API MD5 function prototypes and structures
+ *
+ * Interface for multi-buffer MD5 functions
+ *
+ * <b> Multi-buffer MD5 Entire or First-Update..Update-Last </b>
+ *
+ * The interface to this multi-buffer hashing code is carried out through the
+ * context-level (CTX) init, submit and flush functions and the MD5_HASH_CTX_MGR and
+ * MD5_HASH_CTX objects. Numerous MD5_HASH_CTX objects may be instantiated by the
+ * application for use with a single MD5_HASH_CTX_MGR.
+ *
+ * The CTX interface functions carry out the initialization and padding of the jobs
+ * entered by the user and add them to the multi-buffer manager. The lower level "scheduler"
+ * layer then processes the jobs in an out-of-order manner. The scheduler layer functions
+ * are internal and are not intended to be invoked directly. Jobs can be submitted
+ * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial
+ * jobs which can be started using the HASH_FIRST flag, and later resumed or finished
+ * using the HASH_UPDATE and HASH_LAST flags respectively.
+ *
+ * <b>Note:</b> The submit function does not require data buffers to be block sized.
+ *
+ * The MD5 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and
+ * AVX512. In addition, a multibinary interface is provided, which selects the appropriate
+ * architecture-specific function at runtime.
+ *
+ * <b>Usage:</b> The application creates a MD5_HASH_CTX_MGR object and initializes it
+ * with a call to md5_ctx_mgr_init*() function, where henceforth "*" stands for the
+ * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512 (or no suffix for the
+ * multibinary version). The MD5_HASH_CTX_MGR object will be used to schedule processor
+ * resources, with up to 8 MD5_HASH_CTX objects (or 16 in AVX2 case, 32 in AVX512 case)
+ * being processed at a time.
+ *
+ * Each MD5_HASH_CTX must be initialized before first use by the hash_ctx_init macro
+ * defined in multi_buffer.h. After initialization, the application may begin computing
+ * a hash by giving the MD5_HASH_CTX to a MD5_HASH_CTX_MGR using the submit functions
+ * md5_ctx_mgr_submit*() with the HASH_FIRST flag set. When the MD5_HASH_CTX is
+ * returned to the application (via this or a later call to md5_ctx_mgr_submit*() or
+ * md5_ctx_mgr_flush*()), the application can then re-submit it with another call to
+ * md5_ctx_mgr_submit*(), but without the HASH_FIRST flag set.
+ *
+ * Ideally, on the last buffer for that hash, md5_ctx_mgr_submit_sse is called with
+ * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero
+ * length if necessary. When a MD5_HASH_CTX is returned after having been submitted with
+ * HASH_LAST, it will contain a valid hash. The MD5_HASH_CTX can be reused immediately
+ * by submitting with HASH_FIRST.
+ *
+ * For example, you would submit hashes with the following flags for the following numbers
+ * of buffers:
+ * <ul>
+ * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE)
+ * <li> two buffers: HASH_FIRST, HASH_LAST
+ * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST
+ * etc.
+ * </ul>
+ *
+ * The order in which MD5_CTX objects are returned is in general different from the order
+ * in which they are submitted.
+ *
+ * A few possible error conditions exist:
+ * <ul>
+ * <li> Submitting flags other than the allowed entire/first/update/last values
+ * <li> Submitting a context that is currently being managed by a MD5_HASH_CTX_MGR.
+ * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set.
+ * </ul>
+ *
+ * These error conditions are reported by returning the MD5_HASH_CTX immediately after
+ * a submit with its error member set to a non-zero error code (defined in
+ * multi_buffer.h). No changes are made to the MD5_HASH_CTX_MGR in the case of an
+ * error; no processing is done for other hashes.
+ *
+ */
+
+#include <stdint.h>
+#include "multi_buffer.h"
+#include "types.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Hash Constants and Typedefs
+#define MD5_DIGEST_NWORDS 4
+#define MD5_MAX_LANES 32
+#define MD5_MIN_LANES 8
+#define MD5_BLOCK_SIZE 64
+#define MD5_LOG2_BLOCK_SIZE 6
+#define MD5_PADLENGTHFIELD_SIZE 8
+#define MD5_INITIAL_DIGEST \
+ 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476
+
+typedef uint32_t md5_digest_array[MD5_DIGEST_NWORDS][MD5_MAX_LANES];
+typedef uint32_t MD5_WORD_T;
+
+/** @brief Scheduler layer - Holds info describing a single MD5 job for the multi-buffer manager */
+
+typedef struct {
+ uint8_t* buffer; //!< pointer to data buffer for this job
+ uint32_t len; //!< length of buffer for this job in blocks.
+ DECLARE_ALIGNED(uint32_t result_digest[MD5_DIGEST_NWORDS],64);
+ JOB_STS status; //!< output job status
+ void* user_data; //!< pointer for user's job-related data
+} MD5_JOB;
+
+/** @brief Scheduler layer - Holds arguments for submitted MD5 job */
+
+typedef struct {
+ md5_digest_array digest;
+ uint8_t* data_ptr[MD5_MAX_LANES];
+} MD5_MB_ARGS_X32;
+
+/** @brief Scheduler layer - Lane data */
+
+typedef struct {
+ MD5_JOB *job_in_lane;
+} MD5_LANE_DATA;
+
+/** @brief Scheduler layer - Holds state for multi-buffer MD5 jobs */
+
+typedef struct {
+ MD5_MB_ARGS_X32 args;
+ uint32_t lens[MD5_MAX_LANES];
+ uint64_t unused_lanes[4]; //!< each byte or nibble is index (0...31 or 15) of unused lanes.
+ MD5_LANE_DATA ldata[MD5_MAX_LANES];
+ uint32_t num_lanes_inuse;
+} MD5_MB_JOB_MGR;
+
+/** @brief Context layer - Holds state for multi-buffer MD5 jobs */
+
+typedef struct {
+ MD5_MB_JOB_MGR mgr;
+} MD5_HASH_CTX_MGR;
+
+/** @brief Context layer - Holds info describing a single MD5 job for the multi-buffer CTX manager */
+
+typedef struct {
+ MD5_JOB job; // Must be at struct offset 0.
+ HASH_CTX_STS status; //!< Context status flag
+ HASH_CTX_ERROR error; //!< Context error flag
+ uint32_t total_length; //!< Running counter of length processed for this CTX's job
+ const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job
+ uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes.
+ uint8_t partial_block_buffer[MD5_BLOCK_SIZE * 2]; //!< CTX partial blocks
+ uint32_t partial_block_buffer_length;
+ void* user_data; //!< pointer for user to keep any job-related data
+} MD5_HASH_CTX;
+
+/*******************************************************************
+ * CTX level API function prototypes
+ ******************************************************************/
+
+/**
+ * @brief Initialize the context level MD5 multi-buffer manager structure.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void md5_ctx_mgr_init_sse (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new MD5 job to the context level multi-buffer manager.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_submit_sse (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted MD5 jobs and return when complete.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_flush_sse (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the MD5 multi-buffer manager structure.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void md5_ctx_mgr_init_avx (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new MD5 job to the multi-buffer manager.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_submit_avx (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted MD5 jobs and return when complete.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_flush_avx (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the MD5 multi-buffer manager structure.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void md5_ctx_mgr_init_avx2 (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new MD5 job to the multi-buffer manager.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_submit_avx2 (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted MD5 jobs and return when complete.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_flush_avx2 (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the MD5 multi-buffer manager structure.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void md5_ctx_mgr_init_avx512 (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new MD5 job to the multi-buffer manager.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_submit_avx512 (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted MD5 jobs and return when complete.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_flush_avx512 (MD5_HASH_CTX_MGR* mgr);
+
+/******************** multibinary function prototypes **********************/
+
+/**
+ * @brief Initialize the MD5 multi-buffer manager structure.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void md5_ctx_mgr_init (MD5_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new MD5 job to the multi-buffer manager.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_submit (MD5_HASH_CTX_MGR* mgr, MD5_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted MD5 jobs and return when complete.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+MD5_HASH_CTX* md5_ctx_mgr_flush (MD5_HASH_CTX_MGR* mgr);
+
+
+/*******************************************************************
+ * Scheduler (internal) level out-of-order function prototypes
+ ******************************************************************/
+
+void md5_mb_mgr_init_sse (MD5_MB_JOB_MGR *state);
+MD5_JOB* md5_mb_mgr_submit_sse (MD5_MB_JOB_MGR *state, MD5_JOB* job);
+MD5_JOB* md5_mb_mgr_flush_sse (MD5_MB_JOB_MGR *state);
+
+#define md5_mb_mgr_init_avx md5_mb_mgr_init_sse
+MD5_JOB* md5_mb_mgr_submit_avx (MD5_MB_JOB_MGR *state, MD5_JOB* job);
+MD5_JOB* md5_mb_mgr_flush_avx (MD5_MB_JOB_MGR *state);
+
+void md5_mb_mgr_init_avx2 (MD5_MB_JOB_MGR *state);
+MD5_JOB* md5_mb_mgr_submit_avx2 (MD5_MB_JOB_MGR *state, MD5_JOB* job);
+MD5_JOB* md5_mb_mgr_flush_avx2 (MD5_MB_JOB_MGR *state);
+
+void md5_mb_mgr_init_avx512 (MD5_MB_JOB_MGR *state);
+MD5_JOB* md5_mb_mgr_submit_avx512 (MD5_MB_JOB_MGR *state, MD5_JOB* job);
+MD5_JOB* md5_mb_mgr_flush_avx512 (MD5_MB_JOB_MGR *state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _MD5_MB_H_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm b/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm
new file mode 100644
index 000000000..1342dc1c3
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/memcpy.asm
@@ -0,0 +1,346 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+%ifndef __MEMCPY_ASM__
+%define __MEMCPY_ASM__
+
+%include "reg_sizes.asm"
+
+
+; This file defines a series of macros to copy small to medium amounts
+; of data from memory to memory, where the size is variable but limited.
+;
+; The macros are all called as:
+; memcpy DST, SRC, SIZE, TMP0, TMP1, XTMP0, XTMP1, XTMP2, XTMP3
+; with the parameters defined as:
+; DST : register: pointer to dst (not modified)
+; SRC : register: pointer to src (not modified)
+; SIZE : register: length in bytes (not modified)
+; TMP0 : 64-bit temp GPR (clobbered)
+; TMP1 : 64-bit temp GPR (clobbered)
+; XTMP0 : temp XMM (clobbered)
+; XTMP1 : temp XMM (clobbered)
+; XTMP2 : temp XMM (clobbered)
+; XTMP3 : temp XMM (clobbered)
+;
+; The name indicates the options. The name is of the form:
+; memcpy_<VEC>_<SZ><ZERO><RET>
+; where:
+; <VEC> is either "sse" or "avx" or "avx2"
+; <SZ> is either "64" or "128" and defines largest value of SIZE
+; <ZERO> is blank or "_1". If "_1" then the min SIZE is 1 (otherwise 0)
+; <RET> is blank or "_ret". If blank, the code falls through. If "ret"
+; it does a "ret" at the end
+;
+; For the avx2 versions, the temp XMM registers need to be YMM registers
+; If the SZ is 64, then only two YMM temps are needed, i.e. it is called as:
+; memcpy_avx2_64 DST, SRC, SIZE, TMP0, TMP1, YTMP0, YTMP1
+; memcpy_avx2_128 DST, SRC, SIZE, TMP0, TMP1, YTMP0, YTMP1, YTMP2, YTMP3
+;
+; For example:
+; memcpy_sse_64 : SSE, 0 <= size < 64, falls through
+; memcpy_avx_64_1 : AVX1, 1 <= size < 64, falls through
+; memcpy_sse_128_ret : SSE, 0 <= size < 128, ends with ret
+; mempcy_avx_128_1_ret : AVX1, 1 <= size < 128, ends with ret
+;
+
+%macro memcpy_sse_64 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 0, 0
+%endm
+
+%macro memcpy_sse_64_1 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 0, 0
+%endm
+
+%macro memcpy_sse_128 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 0, 0
+%endm
+
+%macro memcpy_sse_128_1 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 0, 0
+%endm
+
+%macro memcpy_sse_64_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 1, 0
+%endm
+
+%macro memcpy_sse_64_1_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 1, 0
+%endm
+
+%macro memcpy_sse_128_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 1, 0
+%endm
+
+%macro memcpy_sse_128_1_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 1, 0
+%endm
+
+
+%macro memcpy_sse_16 5
+ __memcpy_int %1,%2,%3,%4,%5,,,,, 0, 16, 0, 0
+%endm
+
+%macro memcpy_sse_16_1 5
+ __memcpy_int %1,%2,%3,%4,%5,,,,, 1, 16, 0, 0
+%endm
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%macro memcpy_avx_64 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 0, 1
+%endm
+
+%macro memcpy_avx_64_1 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 0, 1
+%endm
+
+%macro memcpy_avx_128 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 0, 1
+%endm
+
+%macro memcpy_avx_128_1 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 0, 1
+%endm
+
+%macro memcpy_avx_64_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 64, 1, 1
+%endm
+
+%macro memcpy_avx_64_1_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 64, 1, 1
+%endm
+
+%macro memcpy_avx_128_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 0, 128, 1, 1
+%endm
+
+%macro memcpy_avx_128_1_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,%8,%9, 1, 128, 1, 1
+%endm
+
+
+%macro memcpy_avx_16 5
+ __memcpy_int %1,%2,%3,%4,%5,,,,, 0, 16, 0, 1
+%endm
+
+%macro memcpy_avx_16_1 5
+ __memcpy_int %1,%2,%3,%4,%5,,,,, 1, 16, 0, 1
+%endm
+
+ ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%macro memcpy_avx2_64 7
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 64, 0, 2
+%endm
+
+%macro memcpy_avx2_64_1 7
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 64, 0, 2
+%endm
+
+%macro memcpy_avx2_128 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7, %8, %9, 0, 128, 0, 2
+%endm
+
+%macro memcpy_avx2_128_1 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7, %8, %9, 1, 128, 0, 2
+%endm
+
+%macro memcpy_avx2_64_ret 7
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 64, 1, 2
+%endm
+
+%macro memcpy_avx2_64_1_ret 7
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 64, 1, 2
+%endm
+
+%macro memcpy_avx2_128_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 0, 128, 1, 2
+%endm
+
+%macro memcpy_avx2_128_1_ret 9
+ __memcpy_int %1,%2,%3,%4,%5,%6,%7,--,--, 1, 128, 1, 2
+%endm
+
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+
+%macro __memcpy_int 13
+%define %%DST %1 ; register: pointer to dst (not modified)
+%define %%SRC %2 ; register: pointer to src (not modified)
+%define %%SIZE %3 ; register: length in bytes (not modified)
+%define %%TMP0 %4 ; 64-bit temp GPR (clobbered)
+%define %%TMP1 %5 ; 64-bit temp GPR (clobbered)
+%define %%XTMP0 %6 ; temp XMM (clobbered)
+%define %%XTMP1 %7 ; temp XMM (clobbered)
+%define %%XTMP2 %8 ; temp XMM (clobbered)
+%define %%XTMP3 %9 ; temp XMM (clobbered)
+%define %%NOT0 %10 ; if not 0, then assume size cannot be zero
+%define %%MAXSIZE %11 ; 128, 64, etc
+%define %%USERET %12 ; if not 0, use "ret" at end
+%define %%USEAVX %13 ; 0 = SSE, 1 = AVX1, 2 = AVX2
+
+%if (%%USERET != 0)
+ %define %%DONE ret
+%else
+ %define %%DONE jmp %%end
+%endif
+
+%if (%%USEAVX != 0)
+ %define %%MOVDQU vmovdqu
+%else
+ %define %%MOVDQU movdqu
+%endif
+
+%if (%%MAXSIZE >= 128)
+ test %%SIZE, 64
+ jz %%lt64
+ %if (%%USEAVX >= 2)
+ %%MOVDQU %%XTMP0, [%%SRC + 0*32]
+ %%MOVDQU %%XTMP1, [%%SRC + 1*32]
+ %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*32]
+ %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*32]
+
+ %%MOVDQU [%%DST + 0*32], %%XTMP0
+ %%MOVDQU [%%DST + 1*32], %%XTMP1
+ %%MOVDQU [%%DST + %%SIZE - 2*32], %%XTMP2
+ %%MOVDQU [%%DST + %%SIZE - 1*32], %%XTMP3
+ %else
+ %%MOVDQU %%XTMP0, [%%SRC + 0*16]
+ %%MOVDQU %%XTMP1, [%%SRC + 1*16]
+ %%MOVDQU %%XTMP2, [%%SRC + 2*16]
+ %%MOVDQU %%XTMP3, [%%SRC + 3*16]
+ %%MOVDQU [%%DST + 0*16], %%XTMP0
+ %%MOVDQU [%%DST + 1*16], %%XTMP1
+ %%MOVDQU [%%DST + 2*16], %%XTMP2
+ %%MOVDQU [%%DST + 3*16], %%XTMP3
+
+ %%MOVDQU %%XTMP0, [%%SRC + %%SIZE - 4*16]
+ %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 3*16]
+ %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*16]
+ %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*16]
+ %%MOVDQU [%%DST + %%SIZE - 4*16], %%XTMP0
+ %%MOVDQU [%%DST + %%SIZE - 3*16], %%XTMP1
+ %%MOVDQU [%%DST + %%SIZE - 2*16], %%XTMP2
+ %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP3
+ %endif
+ %%DONE
+%endif
+
+%if (%%MAXSIZE >= 64)
+%%lt64
+ test %%SIZE, 32
+ jz %%lt32
+ %if (%%USEAVX >= 2)
+ %%MOVDQU %%XTMP0, [%%SRC + 0*32]
+ %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 1*32]
+ %%MOVDQU [%%DST + 0*32], %%XTMP0
+ %%MOVDQU [%%DST + %%SIZE - 1*32], %%XTMP1
+ %else
+ %%MOVDQU %%XTMP0, [%%SRC + 0*16]
+ %%MOVDQU %%XTMP1, [%%SRC + 1*16]
+ %%MOVDQU %%XTMP2, [%%SRC + %%SIZE - 2*16]
+ %%MOVDQU %%XTMP3, [%%SRC + %%SIZE - 1*16]
+ %%MOVDQU [%%DST + 0*16], %%XTMP0
+ %%MOVDQU [%%DST + 1*16], %%XTMP1
+ %%MOVDQU [%%DST + %%SIZE - 2*16], %%XTMP2
+ %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP3
+ %endif
+ %%DONE
+%endif
+
+%if (%%MAXSIZE >= 32)
+%%lt32:
+ test %%SIZE, 16
+ jz %%lt16
+ %if (%%USEAVX >= 2)
+ %%MOVDQU XWORD(%%XTMP0), [%%SRC + 0*16]
+ %%MOVDQU XWORD(%%XTMP1), [%%SRC + %%SIZE - 1*16]
+ %%MOVDQU [%%DST + 0*16], XWORD(%%XTMP0)
+ %%MOVDQU [%%DST + %%SIZE - 1*16], XWORD(%%XTMP1)
+ %else
+ %%MOVDQU %%XTMP0, [%%SRC + 0*16]
+ %%MOVDQU %%XTMP1, [%%SRC + %%SIZE - 1*16]
+ %%MOVDQU [%%DST + 0*16], %%XTMP0
+ %%MOVDQU [%%DST + %%SIZE - 1*16], %%XTMP1
+ %endif
+ %%DONE
+%endif
+
+%if (%%MAXSIZE >= 16)
+%%lt16:
+ test %%SIZE, 8
+ jz %%lt8
+ mov %%TMP0, [%%SRC]
+ mov %%TMP1, [%%SRC + %%SIZE - 8]
+ mov [%%DST], %%TMP0
+ mov [%%DST + %%SIZE - 8], %%TMP1
+ %%DONE
+%endif
+
+%if (%%MAXSIZE >= 8)
+%%lt8:
+ test %%SIZE, 4
+ jz %%lt4
+ mov DWORD(%%TMP0), [%%SRC]
+ mov DWORD(%%TMP1), [%%SRC + %%SIZE - 4]
+ mov [%%DST], DWORD(%%TMP0)
+ mov [%%DST + %%SIZE - 4], DWORD(%%TMP1)
+ %%DONE
+%endif
+
+%if (%%MAXSIZE >= 4)
+%%lt4:
+ test %%SIZE, 2
+ jz %%lt2
+ movzx DWORD(%%TMP0), word [%%SRC]
+ movzx DWORD(%%TMP1), byte [%%SRC + %%SIZE - 1]
+ mov [%%DST], WORD(%%TMP0)
+ mov [%%DST + %%SIZE - 1], BYTE(%%TMP1)
+ %%DONE
+%endif
+
+%%lt2:
+%if (%%NOT0 == 0)
+ test %%SIZE, 1
+ jz %%end
+%endif
+ movzx DWORD(%%TMP0), byte [%%SRC]
+ mov [%%DST], BYTE(%%TMP0)
+%%end:
+%if (%%USERET != 0)
+ ret
+%endif
+%endm
+
+%endif ; ifndef __MEMCPY_ASM__
diff --git a/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h b/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h
new file mode 100644
index 000000000..b15da3cb1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/memcpy_inline.h
@@ -0,0 +1,363 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ * @file memcpy_inline.h
+ * @brief Defines intrinsic memcpy functions used by the new hashing API
+ *
+ */
+
+#ifndef _MEMCPY_H_
+#define _MEMCPY_H_
+
+#include "intrinreg.h"
+#include <assert.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define memcpy_varlen memcpy_sse_varlen
+#define memcpy_fixedlen memcpy_sse_fixedlen
+
+#define memclr_varlen memclr_sse_varlen
+#define memclr_fixedlen memclr_sse_fixedlen
+
+static inline void memcpy_lte32_sse_fixedlen(void* dst, const void* src, size_t nbytes);
+static inline void memcpy_gte16_sse_fixedlen(void* dst, const void* src, size_t nbytes);
+static inline void memcpy_sse_fixedlen (void* dst, const void* src, size_t nbytes);
+
+static inline void memcpy_lte32_sse_varlen (void* dst, const void* src, size_t nbytes);
+static inline void memcpy_gte16_sse_varlen (void* dst, const void* src, size_t nbytes);
+static inline void memcpy_sse_varlen (void* dst, const void* src, size_t nbytes);
+
+
+static inline void memclr_lte32_sse_fixedlen(void* dst, size_t nbytes);
+static inline void memclr_gte16_sse_fixedlen(void* dst, size_t nbytes);
+static inline void memclr_sse_fixedlen (void* dst, size_t nbytes);
+
+static inline void memclr_lte32_sse_varlen (void* dst, size_t nbytes);
+static inline void memclr_gte16_sse_varlen (void* dst, size_t nbytes);
+static inline void memclr_sse_varlen (void* dst, size_t nbytes);
+
+#define MEMCPY_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, src, nbytes) \
+ do { \
+ intrinreg##N head; \
+ intrinreg##N tail; \
+ assert(N <= nbytes && nbytes <= 2*N); \
+ if(N == 1 || (fixedwidth && nbytes==N) ) { \
+ head = load_intrinreg##N(src); \
+ store_intrinreg##N(dst, head); \
+ } \
+ else { \
+ head = load_intrinreg##N(src); \
+ tail = load_intrinreg##N((const void*)((const char*)src + (nbytes - N))); \
+ store_intrinreg##N(dst, head); \
+ store_intrinreg##N((void*)((char*)dst + (nbytes - N)), tail); \
+ } \
+ } while(0)
+
+#define MEMCLR_BETWEEN_N_AND_2N_BYTES(N, fixedwidth, dst, nbytes) \
+ do { \
+ const intrinreg##N zero = {0}; \
+ assert(N <= nbytes && nbytes <= 2*N); \
+ if(N == 1 || (fixedwidth && nbytes==N) ) { \
+ store_intrinreg##N(dst, zero); \
+ } \
+ else { \
+ store_intrinreg##N(dst, zero); \
+ store_intrinreg##N((void*)((char*)dst + (nbytes - N)), zero); \
+ } \
+ } while(0)
+
+// Define load/store functions uniformly.
+
+#define load_intrinreg16(src) _mm_loadu_ps((const float*) src)
+#define store_intrinreg16(dst,val) _mm_storeu_ps((float*) dst, val)
+
+static inline intrinreg8 load_intrinreg8(const void *src)
+{
+ return *(intrinreg8 *) src;
+}
+
+static inline void store_intrinreg8(void *dst, intrinreg8 val)
+{
+ *(intrinreg8 *) dst = val;
+}
+
+static inline intrinreg4 load_intrinreg4(const void *src)
+{
+ return *(intrinreg4 *) src;
+}
+
+static inline void store_intrinreg4(void *dst, intrinreg4 val)
+{
+ *(intrinreg4 *) dst = val;
+}
+
+static inline intrinreg2 load_intrinreg2(const void *src)
+{
+ return *(intrinreg2 *) src;
+}
+
+static inline void store_intrinreg2(void *dst, intrinreg2 val)
+{
+ *(intrinreg2 *) dst = val;
+}
+
+static inline intrinreg1 load_intrinreg1(const void *src)
+{
+ return *(intrinreg1 *) src;
+}
+
+static inline void store_intrinreg1(void *dst, intrinreg1 val)
+{
+ *(intrinreg1 *) dst = val;
+}
+
+static inline void memcpy_gte16_sse_fixedlen(void *dst, const void *src, size_t nbytes)
+{
+ size_t i;
+ size_t j;
+ intrinreg16 pool[4];
+ size_t remaining_moves;
+ size_t tail_offset;
+ int do_tail;
+ assert(nbytes >= 16);
+
+ for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4) {
+ for (j = 0; j < 4; j++)
+ pool[j] =
+ load_intrinreg16((const void *)((const char *)src + i + 16 * j));
+ for (j = 0; j < 4; j++)
+ store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]);
+ }
+
+ remaining_moves = (nbytes - i) / 16;
+ tail_offset = nbytes - 16;
+ do_tail = (tail_offset & (16 - 1));
+
+ for (j = 0; j < remaining_moves; j++)
+ pool[j] = load_intrinreg16((const void *)((const char *)src + i + 16 * j));
+
+ if (do_tail)
+ pool[j] = load_intrinreg16((const void *)((const char *)src + tail_offset));
+
+ for (j = 0; j < remaining_moves; j++)
+ store_intrinreg16((void *)((char *)dst + i + 16 * j), pool[j]);
+
+ if (do_tail)
+ store_intrinreg16((void *)((char *)dst + tail_offset), pool[j]);
+}
+
+static inline void memclr_gte16_sse_fixedlen(void *dst, size_t nbytes)
+{
+ size_t i;
+ size_t j;
+ const intrinreg16 zero = { 0 };
+ size_t remaining_moves;
+ size_t tail_offset;
+ int do_tail;
+ assert(nbytes >= 16);
+
+ for (i = 0; i + 16 * 4 <= nbytes; i += 16 * 4)
+ for (j = 0; j < 4; j++)
+ store_intrinreg16((void *)((char *)dst + i + 16 * j), zero);
+
+ remaining_moves = (nbytes - i) / 16;
+ tail_offset = nbytes - 16;
+ do_tail = (tail_offset & (16 - 1));
+
+ for (j = 0; j < remaining_moves; j++)
+ store_intrinreg16((void *)((char *)dst + i + 16 * j), zero);
+
+ if (do_tail)
+ store_intrinreg16((void *)((char *)dst + tail_offset), zero);
+}
+
+static inline void memcpy_lte32_sse_fixedlen(void *dst, const void *src, size_t nbytes)
+{
+ assert(nbytes <= 32);
+ if (nbytes >= 16)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 1, dst, src, nbytes);
+ else if (nbytes >= 8)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 1, dst, src, nbytes);
+ else if (nbytes >= 4)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 1, dst, src, nbytes);
+ else if (nbytes >= 2)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 1, dst, src, nbytes);
+ else if (nbytes >= 1)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 1, dst, src, nbytes);
+}
+
+static inline void memclr_lte32_sse_fixedlen(void *dst, size_t nbytes)
+{
+ assert(nbytes <= 32);
+ if (nbytes >= 16)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 1, dst, nbytes);
+ else if (nbytes >= 8)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 1, dst, nbytes);
+ else if (nbytes >= 4)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 1, dst, nbytes);
+ else if (nbytes >= 2)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 1, dst, nbytes);
+ else if (nbytes >= 1)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 1, dst, nbytes);
+}
+
+static inline void memcpy_lte32_sse_varlen(void *dst, const void *src, size_t nbytes)
+{
+ assert(nbytes <= 32);
+ if (nbytes >= 16)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(16, 0, dst, src, nbytes);
+ else if (nbytes >= 8)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(8, 0, dst, src, nbytes);
+ else if (nbytes >= 4)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(4, 0, dst, src, nbytes);
+ else if (nbytes >= 2)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(2, 0, dst, src, nbytes);
+ else if (nbytes >= 1)
+ MEMCPY_BETWEEN_N_AND_2N_BYTES(1, 0, dst, src, nbytes);
+}
+
+static inline void memclr_lte32_sse_varlen(void *dst, size_t nbytes)
+{
+ assert(nbytes <= 32);
+ if (nbytes >= 16)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(16, 0, dst, nbytes);
+ else if (nbytes >= 8)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(8, 0, dst, nbytes);
+ else if (nbytes >= 4)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(4, 0, dst, nbytes);
+ else if (nbytes >= 2)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(2, 0, dst, nbytes);
+ else if (nbytes >= 1)
+ MEMCLR_BETWEEN_N_AND_2N_BYTES(1, 0, dst, nbytes);
+}
+
+static inline void memcpy_gte16_sse_varlen(void *dst, const void *src, size_t nbytes)
+{
+ size_t i = 0;
+ intrinreg16 tail;
+
+ assert(nbytes >= 16);
+
+ while (i + 128 <= nbytes) {
+ memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
+ (const void *)((const char *)src + i), 128);
+ i += 128;
+ }
+ if (i + 64 <= nbytes) {
+ memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
+ (const void *)((const char *)src + i), 64);
+ i += 64;
+ }
+ if (i + 32 <= nbytes) {
+ memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
+ (const void *)((const char *)src + i), 32);
+ i += 32;
+ }
+ if (i + 16 <= nbytes) {
+ memcpy_gte16_sse_fixedlen((void *)((char *)dst + i),
+ (const void *)((const char *)src + i), 16);
+ i += 16;
+ }
+
+ i = nbytes - 16;
+ tail = load_intrinreg16((const void *)((const char *)src + i));
+ store_intrinreg16((void *)((char *)dst + i), tail);
+}
+
+static inline void memclr_gte16_sse_varlen(void *dst, size_t nbytes)
+{
+ size_t i = 0;
+ const intrinreg16 zero = { 0 };
+
+ assert(nbytes >= 16);
+
+ while (i + 128 <= nbytes) {
+ memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 128);
+ i += 128;
+ }
+ if (i + 64 <= nbytes) {
+ memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 64);
+ i += 64;
+ }
+ if (i + 32 <= nbytes) {
+ memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 32);
+ i += 32;
+ }
+ if (i + 16 <= nbytes) {
+ memclr_gte16_sse_fixedlen((void *)((char *)dst + i), 16);
+ i += 16;
+ }
+
+ i = nbytes - 16;
+ store_intrinreg16((void *)((char *)dst + i), zero);
+}
+
+static inline void memcpy_sse_fixedlen(void *dst, const void *src, size_t nbytes)
+{
+ if (nbytes >= 16)
+ memcpy_gte16_sse_fixedlen(dst, src, nbytes);
+ else
+ memcpy_lte32_sse_fixedlen(dst, src, nbytes);
+}
+
+static inline void memclr_sse_fixedlen(void *dst, size_t nbytes)
+{
+ if (nbytes >= 16)
+ memclr_gte16_sse_fixedlen(dst, nbytes);
+ else
+ memclr_lte32_sse_fixedlen(dst, nbytes);
+}
+
+static inline void memcpy_sse_varlen(void *dst, const void *src, size_t nbytes)
+{
+ if (nbytes >= 16)
+ memcpy_gte16_sse_varlen(dst, src, nbytes);
+ else
+ memcpy_lte32_sse_varlen(dst, src, nbytes);
+}
+
+static inline void memclr_sse_varlen(void *dst, size_t nbytes)
+{
+ if (nbytes >= 16)
+ memclr_gte16_sse_varlen(dst, nbytes);
+ else
+ memclr_lte32_sse_varlen(dst, nbytes);
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // __MEMCPY_H
diff --git a/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h
new file mode 100644
index 000000000..ea9bb9ac4
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1.h
@@ -0,0 +1,315 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_H_
+#define _MH_SHA1_H_
+
+/**
+ * @file mh_sha1.h
+ * @brief mh_sha1 function prototypes and structures
+ *
+ * Interface for mh_sha1 functions
+ *
+ * <b> mh_sha1 Init-Update..Update-Finalize </b>
+ *
+ * This file defines the interface to optimized functions used in mh_sha1.
+ * The definition of multi-hash SHA1(mh_sha1, for short) is: Pad the buffer
+ * in SHA1 style until the total length is a multiple of 4*16*16
+ * (words-width * parallel-segments * block-size); Hash the buffer in
+ * parallel, generating digests of 4*16*5 (words-width*parallel-segments*
+ * digest-size); Treat the set of digests as another data buffer, and
+ * generate a final SHA1 digest for it.
+ *
+ *
+ * Example
+ * \code
+ * uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS];
+ * struct mh_sha1_ctx *ctx;
+ *
+ * ctx = malloc(sizeof(struct mh_sha1_ctx));
+ * mh_sha1_init(ctx);
+ * mh_sha1_update(ctx, buff, block_len);
+ * mh_sha1_finalize(ctx, mh_sha1_digest);
+ * \endcode
+ */
+
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// External Interface Definition
+#define HASH_SEGS 16
+#define SHA1_BLOCK_SIZE 64
+#define MH_SHA1_BLOCK_SIZE (HASH_SEGS * SHA1_BLOCK_SIZE)
+#define SHA1_DIGEST_WORDS 5
+#define AVX512_ALIGNED 64
+
+/** @brief Holds info describing a single mh_sha1
+ *
+ * It is better to use heap to allocate this data structure to avoid stack overflow.
+ *
+*/
+struct mh_sha1_ctx {
+ uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; //!< the digest of multi-hash SHA1
+
+ uint64_t total_length;
+ //!< Parameters for update feature, describe the lengths of input buffers in bytes
+ uint8_t partial_block_buffer [MH_SHA1_BLOCK_SIZE * 2];
+ //!< Padding the tail of input data for SHA1
+ uint8_t mh_sha1_interim_digests[sizeof(uint32_t) * SHA1_DIGEST_WORDS * HASH_SEGS];
+ //!< Storing the SHA1 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose.
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE + AVX512_ALIGNED];
+ //!< Re-structure sha1 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose.
+};
+
+/**
+ * @enum mh_sha1_ctx_error
+ * @brief CTX error flags
+ */
+enum mh_sha1_ctx_error{
+ MH_SHA1_CTX_ERROR_NONE = 0, //!< MH_SHA1_MURMUR3_CTX_ERROR_NONE
+ MH_SHA1_CTX_ERROR_NULL = -1, //!< MH_SHA1_MURMUR3_CTX_ERROR_NULL
+};
+
+
+/*******************************************************************
+ * mh_sha1 API function prototypes
+ ******************************************************************/
+
+/**
+ * @brief Initialize the mh_sha1_ctx structure.
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @returns int Return 0 if the function runs without errors
+ */
+int mh_sha1_init (struct mh_sha1_ctx* ctx);
+
+/**
+ * @brief Multi-hash sha1 update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ */
+int mh_sha1_update (struct mh_sha1_ctx * ctx, const void* buffer, uint32_t len);
+
+/**
+ * @brief Finalize the message digests for multi-hash sha1.
+ *
+ * Place the message digest in mh_sha1_digest which must have enough space
+ * for the outputs.
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @returns int Return 0 if the function runs without errors
+ */
+int mh_sha1_finalize (struct mh_sha1_ctx* ctx, void* mh_sha1_digest);
+
+/*******************************************************************
+ * multi-types of mh_sha1 internal API
+ *
+ * XXXX The multi-binary version
+ * XXXX_base The C code version which used to display the algorithm
+ * XXXX_sse The version uses a ASM function optimized for SSE
+ * XXXX_avx The version uses a ASM function optimized for AVX
+ * XXXX_avx2 The version uses a ASM function optimized for AVX2
+ * XXXX_avx512 The version uses a ASM function optimized for AVX512
+ *
+ ******************************************************************/
+
+/**
+ * @brief Multi-hash sha1 update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * Base update() function that does not require SIMD support.
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_update_base (struct mh_sha1_ctx* ctx, const void* buffer, uint32_t len);
+
+/**
+ * @brief Multi-hash sha1 update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires SSE
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_update_sse (struct mh_sha1_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Multi-hash sha1 update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires AVX
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_update_avx (struct mh_sha1_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Multi-hash sha1 update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires AVX2
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_update_avx2 (struct mh_sha1_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Multi-hash sha1 update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires AVX512
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_update_avx512 (struct mh_sha1_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+
+/**
+ * @brief Finalize the message digests for multi-hash sha1.
+ *
+ * Place the message digests in mh_sha1_digest,
+ * which must have enough space for the outputs.
+ * Base Finalize() function that does not require SIMD support.
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_finalize_base (struct mh_sha1_ctx* ctx,
+ void* mh_sha1_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digest in mh_sha1_digest which must have enough space
+ * for the outputs.
+ *
+ * @requires SSE
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_finalize_sse (struct mh_sha1_ctx* ctx,
+ void* mh_sha1_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digest in mh_sha1_digest which must have enough space
+ * for the outputs.
+ *
+ * @requires AVX
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_finalize_avx (struct mh_sha1_ctx* ctx,
+ void* mh_sha1_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digest in mh_sha1_digest which must have enough space
+ * for the outputs.
+ *
+ * @requires AVX2
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_finalize_avx2 (struct mh_sha1_ctx* ctx,
+ void* mh_sha1_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digest in mh_sha1_digest which must have enough space
+ * for the outputs.
+ *
+ * @requires AVX512
+ *
+ * @param ctx Structure holding mh_sha1 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_finalize_avx512 (struct mh_sha1_ctx* ctx,
+ void* mh_sha1_digest);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h
new file mode 100644
index 000000000..0c84650e2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/mh_sha1_murmur3_x64_128.h
@@ -0,0 +1,327 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MH_SHA1_MURMUR3_X64_128_H_
+#define _MH_SHA1_MURMUR3_X64_128_H_
+
+/**
+ * @file mh_sha1_murmur3_x64_128.h
+ * @brief mh_sha1_murmur3_x64_128 function prototypes and structures
+ *
+ * Interface for mh_sha1_murmur3_x64_128 functions
+ *
+ * <b> mh_sha1_murmur3_x64_128 Init-Update..Update-Finalize </b>
+ *
+ * This file defines the interface to optimized functions used in mh_sha1 and
+ * mh_sha1_murmur3_x64_128. The definition of multi-hash SHA1(mh_sha1,
+ * for short) is: Pad the buffer in SHA1 style until the total length is a multiple
+ * of 4*16*16(words-width * parallel-segments * block-size); Hash the buffer
+ * in parallel, generating digests of 4*16*5 (words-width*parallel-segments*
+ * digest-size); Treat the set of digests as another data buffer, and generate
+ * a final SHA1 digest for it. mh_sha1_murmur3_x64_128 is a stitching function
+ * which will get a murmur3_x64_128 digest while generate mh_sha1 digest.
+ *
+ *
+ * Example
+ * \code
+ * uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS];
+ * uint32_t murmur_digest[MURMUR3_x64_128_DIGEST_WORDS];
+ * struct mh_sha1_murmur3_x64_128_ctx *ctx;
+ *
+ * ctx = malloc(sizeof(struct mh_sha1_murmur3_x64_128_ctx));
+ * mh_sha1_murmur3_x64_128_init(ctx, 0);
+ * mh_sha1_murmur3_x64_128_update(ctx, buff, block_len);
+ * mh_sha1_murmur3_x64_128_finalize(ctx, mh_sha1_digest,
+ * murmur_digest);
+ * \endcode
+ */
+
+#include <stdint.h>
+#include "mh_sha1.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+// External Interface Definition
+// Add murmur3_x64_128 definition
+#define MUR_BLOCK_SIZE (2 * sizeof(uint64_t))
+#define MURMUR3_x64_128_DIGEST_WORDS 4
+
+/** @brief Holds info describing a single mh_sha1_murmur3_x64_128
+ *
+ * It is better to use heap to allocate this data structure to avoid stack overflow.
+ *
+*/
+struct mh_sha1_murmur3_x64_128_ctx {
+ uint32_t mh_sha1_digest[SHA1_DIGEST_WORDS]; //!< the digest of multi-hash SHA1
+ uint32_t murmur3_x64_128_digest[MURMUR3_x64_128_DIGEST_WORDS]; //!< the digest of murmur3_x64_128
+
+ uint64_t total_length;
+ //!< Parameters for update feature, describe the lengths of input buffers in bytes
+ uint8_t partial_block_buffer [MH_SHA1_BLOCK_SIZE * 2];
+ //!< Padding the tail of input data for SHA1
+ uint8_t mh_sha1_interim_digests[sizeof(uint32_t) * SHA1_DIGEST_WORDS * HASH_SEGS];
+ //!< Storing the SHA1 interim digests of all 16 segments. Each time, it will be copied to stack for 64-byte alignment purpose.
+ uint8_t frame_buffer[MH_SHA1_BLOCK_SIZE + AVX512_ALIGNED];
+ //!< Re-structure sha1 block data from different segments to fit big endian. Use AVX512_ALIGNED for 64-byte alignment purpose.
+};
+
+/**
+ * @enum mh_sha1_murmur3_ctx_error
+ * @brief CTX error flags
+ */
+enum mh_sha1_murmur3_ctx_error{
+ MH_SHA1_MURMUR3_CTX_ERROR_NONE = 0, //!< MH_SHA1_MURMUR3_CTX_ERROR_NONE
+ MH_SHA1_MURMUR3_CTX_ERROR_NULL = -1, //!<MH_SHA1_MURMUR3_CTX_ERROR_NULL
+};
+
+
+/*******************************************************************
+ * mh_sha1_murmur3_x64_128 API function prototypes
+ ******************************************************************/
+
+/**
+ * @brief Initialize the mh_sha1_murmur3_x64_128_ctx structure.
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param murmur_seed Seed as an initial digest of murmur3
+ * @returns int Return 0 if the function runs without errors
+ */
+int mh_sha1_murmur3_x64_128_init (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ uint64_t murmur_seed);
+
+/**
+ * @brief Combined multi-hash and murmur hash update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ */
+int mh_sha1_murmur3_x64_128_update (struct mh_sha1_murmur3_x64_128_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest,
+ * which must have enough space for the outputs.
+ * This function determines what instruction sets are enabled and selects the
+ * appropriate version at runtime.
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns int Return 0 if the function runs without errors
+ */
+int mh_sha1_murmur3_x64_128_finalize (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ void* mh_sha1_digest, void* murmur3_x64_128_digest);
+
+/*******************************************************************
+ * multi-types of mh_sha1_murmur3_x64_128 internal API
+ *
+ * XXXX The multi-binary version
+ * XXXX_base The C code version which used to display the algorithm
+ * XXXX_sse The version uses a ASM function optimized for SSE
+ * XXXX_avx The version uses a ASM function optimized for AVX
+ * XXXX_avx2 The version uses a ASM function optimized for AVX2
+ *
+ ******************************************************************/
+
+/**
+ * @brief Combined multi-hash and murmur hash update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * Base update() function that does not require SIMD support.
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_update_base (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Combined multi-hash and murmur hash update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires SSE
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_update_sse (struct mh_sha1_murmur3_x64_128_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Combined multi-hash and murmur hash update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires AVX
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_update_avx (struct mh_sha1_murmur3_x64_128_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Combined multi-hash and murmur hash update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires AVX2
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_update_avx2 (struct mh_sha1_murmur3_x64_128_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Combined multi-hash and murmur hash update.
+ *
+ * Can be called repeatedly to update hashes with new input data.
+ * @requires AVX512
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_update_avx512 (struct mh_sha1_murmur3_x64_128_ctx * ctx,
+ const void* buffer, uint32_t len);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest,
+ * which must have enough space for the outputs.
+ * Base Finalize() function that does not require SIMD support.
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_finalize_base (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ void* mh_sha1_digest, void* murmur3_x64_128_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest,
+ * which must have enough space for the outputs.
+ *
+ * @requires SSE
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_finalize_sse (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ void* mh_sha1_digest, void* murmur3_x64_128_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest,
+ * which must have enough space for the outputs.
+ *
+ * @requires AVX
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_finalize_avx (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ void* mh_sha1_digest, void* murmur3_x64_128_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest,
+ * which must have enough space for the outputs.
+ *
+ * @requires AVX2
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_finalize_avx2 (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ void* mh_sha1_digest, void* murmur3_x64_128_digest);
+
+/**
+ * @brief Finalize the message digests for combined multi-hash and murmur.
+ *
+ * Place the message digests in mh_sha1_digest and murmur3_x64_128_digest,
+ * which must have enough space for the outputs.
+ *
+ * @requires AVX512
+ *
+ * @param ctx Structure holding mh_sha1_murmur3_x64_128 info
+ * @param mh_sha1_digest The digest of mh_sha1
+ * @param murmur3_x64_128_digest The digest of murmur3_x64_128
+ * @returns int Return 0 if the function runs without errors
+ *
+ */
+int mh_sha1_murmur3_x64_128_finalize_avx512 (struct mh_sha1_murmur3_x64_128_ctx* ctx,
+ void* mh_sha1_digest, void* murmur3_x64_128_digest);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
+
diff --git a/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h b/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h
new file mode 100644
index 000000000..d9b713575
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/multi_buffer.h
@@ -0,0 +1,127 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _MULTI_BUFFER_H_
+#define _MULTI_BUFFER_H_
+
+/**
+ * @file multi_buffer.h
+ * @brief Multi-buffer common fields
+ *
+ */
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __FreeBSD__
+#include <sys/types.h>
+#include <sys/endian.h>
+# define _byteswap_uint64(x) bswap64(x)
+# define _byteswap_ulong(x) bswap32(x)
+#elif defined (__APPLE__)
+#include <libkern/OSByteOrder.h>
+# define _byteswap_uint64(x) OSSwapInt64(x)
+# define _byteswap_ulong(x) OSSwapInt32(x)
+#elif defined (__GNUC__) && !defined (__MINGW32__)
+# include <byteswap.h>
+# define _byteswap_uint64(x) bswap_64(x)
+# define _byteswap_ulong(x) bswap_32(x)
+#endif
+
+/**
+ * @enum JOB_STS
+ * @brief Job return codes
+ */
+
+typedef enum {STS_UNKNOWN = 0, //!< STS_UNKNOWN
+ STS_BEING_PROCESSED = 1,//!< STS_BEING_PROCESSED
+ STS_COMPLETED = 2, //!< STS_COMPLETED
+ STS_INTERNAL_ERROR, //!< STS_INTERNAL_ERROR
+ STS_ERROR //!< STS_ERROR
+} JOB_STS;
+
+#define HASH_MB_NO_FLAGS 0
+#define HASH_MB_FIRST 1
+#define HASH_MB_LAST 2
+
+/* Common flags for the new API only
+ * */
+
+/**
+ * @enum HASH_CTX_FLAG
+ * @brief CTX job type
+ */
+typedef enum {
+ HASH_UPDATE = 0x00, //!< HASH_UPDATE
+ HASH_FIRST = 0x01, //!< HASH_FIRST
+ HASH_LAST = 0x02, //!< HASH_LAST
+ HASH_ENTIRE = 0x03, //!< HASH_ENTIRE
+} HASH_CTX_FLAG;
+
+/**
+ * @enum HASH_CTX_STS
+ * @brief CTX status flags
+ */
+typedef enum {
+ HASH_CTX_STS_IDLE = 0x00, //!< HASH_CTX_STS_IDLE
+ HASH_CTX_STS_PROCESSING = 0x01, //!< HASH_CTX_STS_PROCESSING
+ HASH_CTX_STS_LAST = 0x02, //!< HASH_CTX_STS_LAST
+ HASH_CTX_STS_COMPLETE = 0x04, //!< HASH_CTX_STS_COMPLETE
+} HASH_CTX_STS;
+
+/**
+ * @enum HASH_CTX_ERROR
+ * @brief CTX error flags
+ */
+typedef enum {
+ HASH_CTX_ERROR_NONE = 0, //!< HASH_CTX_ERROR_NONE
+ HASH_CTX_ERROR_INVALID_FLAGS = -1, //!< HASH_CTX_ERROR_INVALID_FLAGS
+ HASH_CTX_ERROR_ALREADY_PROCESSING = -2, //!< HASH_CTX_ERROR_ALREADY_PROCESSING
+ HASH_CTX_ERROR_ALREADY_COMPLETED = -3, //!< HASH_CTX_ERROR_ALREADY_COMPLETED
+} HASH_CTX_ERROR;
+
+
+#define hash_ctx_user_data(ctx) ((ctx)->user_data)
+#define hash_ctx_digest(ctx) ((ctx)->job.result_digest)
+#define hash_ctx_processing(ctx) ((ctx)->status & HASH_CTX_STS_PROCESSING)
+#define hash_ctx_complete(ctx) ((ctx)->status == HASH_CTX_STS_COMPLETE)
+#define hash_ctx_status(ctx) ((ctx)->status)
+#define hash_ctx_error(ctx) ((ctx)->error)
+#define hash_ctx_init(ctx) \
+ do { \
+ (ctx)->error = HASH_CTX_ERROR_NONE; \
+ (ctx)->status = HASH_CTX_STS_COMPLETE; \
+ } while(0)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _MULTI_BUFFER_H_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm b/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm
new file mode 100644
index 000000000..45f87a9b1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/multibinary.asm
@@ -0,0 +1,271 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifndef _MULTIBINARY_ASM_
+%define _MULTIBINARY_ASM_
+
+%ifidn __OUTPUT_FORMAT__, elf32
+ %define mbin_def_ptr dd
+ %define mbin_ptr_sz dword
+ %define mbin_rdi edi
+ %define mbin_rsi esi
+ %define mbin_rax eax
+ %define mbin_rbx ebx
+ %define mbin_rcx ecx
+ %define mbin_rdx edx
+%else
+ %define mbin_def_ptr dq
+ %define mbin_ptr_sz qword
+ %define mbin_rdi rdi
+ %define mbin_rsi rsi
+ %define mbin_rax rax
+ %define mbin_rbx rbx
+ %define mbin_rcx rcx
+ %define mbin_rdx rdx
+%endif
+
+;;;;
+; multibinary macro:
+; creates the visable entry point that uses HW optimized call pointer
+; creates the init of the HW optimized call pointer
+;;;;
+%macro mbin_interface 1
+ ;;;;
+ ; *_dispatched is defaulted to *_mbinit and replaced on first call.
+ ; Therefore, *_dispatch_init is only executed on first call.
+ ;;;;
+ section .data
+ %1_dispatched:
+ mbin_def_ptr %1_mbinit
+
+ section .text
+ global %1:function
+ %1_mbinit:
+ ;;; only called the first time to setup hardware match
+ call %1_dispatch_init
+ ;;; falls thru to execute the hw optimized code
+ %1:
+ jmp mbin_ptr_sz [%1_dispatched]
+%endmacro
+
+;;;;;
+; mbin_dispatch_init parameters
+; Use this function when SSE/00/01 is a minimum requirement
+; 1-> function name
+; 2-> SSE/00/01 optimized function used as base
+; 3-> AVX or AVX/02 opt func
+; 4-> AVX2 or AVX/04 opt func
+;;;;;
+%macro mbin_dispatch_init 4
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ lea mbin_rsi, [%2 WRT_OPT] ; Default to SSE 00/01
+
+ mov eax, 1
+ cpuid
+ and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ lea mbin_rbx, [%3 WRT_OPT] ; AVX (gen2) opt func
+ jne _%1_init_done ; AVX is not available so end
+ mov mbin_rsi, mbin_rbx
+
+ ;; Try for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen4) opt func
+ cmovne mbin_rsi, mbin_rbx
+
+ ;; Does it have xmm and ymm support
+ xor ecx, ecx
+ xgetbv
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ je _%1_init_done
+ lea mbin_rsi, [%2 WRT_OPT]
+
+ _%1_init_done:
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+;;;;;
+; mbin_dispatch_init2 parameters
+; Cases where only base functions are available
+; 1-> function name
+; 2-> base function
+;;;;;
+%macro mbin_dispatch_init2 2
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+;;;;;
+; mbin_dispatch_init5 parameters
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_1 or 00/01 optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX2/04 opt func
+;;;;;
+%macro mbin_dispatch_init5 5
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ ; Test for SSE4.1
+ test ecx, FLAG_CPUID1_ECX_SSE4_1
+ lea mbin_rbx, [%3 WRT_OPT] ; SSE opt func
+ cmovne mbin_rsi, mbin_rbx
+
+ and ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ cmp ecx, (FLAG_CPUID1_ECX_AVX | FLAG_CPUID1_ECX_OSXSAVE)
+ lea mbin_rbx, [%4 WRT_OPT] ; AVX (gen2) opt func
+ jne _%1_init_done ; AVX is not available so end
+ mov mbin_rsi, mbin_rbx
+
+ ;; Try for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ lea mbin_rbx, [%5 WRT_OPT] ; AVX (gen4) opt func
+ cmovne mbin_rsi, mbin_rbx
+
+ ;; Does it have xmm and ymm support
+ xor ecx, ecx
+ xgetbv
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ je _%1_init_done
+ lea mbin_rsi, [%3 WRT_OPT]
+
+ _%1_init_done:
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+;;;;;
+; mbin_dispatch_init6 parameters
+; 1-> function name
+; 2-> base function
+; 3-> SSE4_1 or 00/01 optimized function
+; 4-> AVX/02 opt func
+; 5-> AVX2/04 opt func
+; 6-> AVX512/06 opt func
+;;;;;
+%macro mbin_dispatch_init6 6
+ section .text
+ %1_dispatch_init:
+ push mbin_rsi
+ push mbin_rax
+ push mbin_rbx
+ push mbin_rcx
+ push mbin_rdx
+ push mbin_rdi
+ lea mbin_rsi, [%2 WRT_OPT] ; Default - use base function
+
+ mov eax, 1
+ cpuid
+ mov ebx, ecx ; save cpuid1.ecx
+ test ecx, FLAG_CPUID1_ECX_SSE4_1
+ je _%1_init_done ; Use base function if no SSE4_1
+ lea mbin_rsi, [%3 WRT_OPT] ; SSE possible so use 00/01 opt
+
+ ;; Test for XMM_YMM support/AVX
+ test ecx, FLAG_CPUID1_ECX_OSXSAVE
+ je _%1_init_done
+ xor ecx, ecx
+ xgetbv ; xcr -> edx:eax
+ mov edi, eax ; save xgetvb.eax
+
+ and eax, FLAG_XGETBV_EAX_XMM_YMM
+ cmp eax, FLAG_XGETBV_EAX_XMM_YMM
+ jne _%1_init_done
+ test ebx, FLAG_CPUID1_ECX_AVX
+ je _%1_init_done
+ lea mbin_rsi, [%4 WRT_OPT] ; AVX/02 opt
+
+ ;; Test for AVX2
+ xor ecx, ecx
+ mov eax, 7
+ cpuid
+ test ebx, FLAG_CPUID7_EBX_AVX2
+ je _%1_init_done ; No AVX2 possible
+ lea mbin_rsi, [%5 WRT_OPT] ; AVX2/04 opt func
+
+ ;; Test for AVX512
+ and edi, FLAG_XGETBV_EAX_ZMM_OPM
+ cmp edi, FLAG_XGETBV_EAX_ZMM_OPM
+ jne _%1_init_done ; No AVX512 possible
+ and ebx, FLAGS_CPUID7_ECX_AVX512_G1
+ cmp ebx, FLAGS_CPUID7_ECX_AVX512_G1
+ lea mbin_rbx, [%6 WRT_OPT] ; AVX512/06 opt
+ cmove mbin_rsi, mbin_rbx
+
+ _%1_init_done:
+ pop mbin_rdi
+ pop mbin_rdx
+ pop mbin_rcx
+ pop mbin_rbx
+ pop mbin_rax
+ mov [%1_dispatched], mbin_rsi
+ pop mbin_rsi
+ ret
+%endmacro
+
+%endif ; ifndef _MULTIBINARY_ASM_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm b/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm
new file mode 100644
index 000000000..64064a1a2
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/reg_sizes.asm
@@ -0,0 +1,149 @@
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+; Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+;
+; Redistribution and use in source and binary forms, with or without
+; modification, are permitted provided that the following conditions
+; are met:
+; * Redistributions of source code must retain the above copyright
+; notice, this list of conditions and the following disclaimer.
+; * Redistributions in binary form must reproduce the above copyright
+; notice, this list of conditions and the following disclaimer in
+; the documentation and/or other materials provided with the
+; distribution.
+; * Neither the name of Intel Corporation nor the names of its
+; contributors may be used to endorse or promote products derived
+; from this software without specific prior written permission.
+;
+; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+; "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+; LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+; A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+; OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+; SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+; LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+; DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+; THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+; (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+; OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+
+%ifndef _REG_SIZES_ASM_
+%define _REG_SIZES_ASM_
+
+%ifdef __NASM_VER__
+%ifidn __OUTPUT_FORMAT__, win64
+%error nasm not supported in windows
+%else
+%define endproc_frame
+%endif
+%endif
+
+%define EFLAGS_HAS_CPUID (1<<21)
+%define FLAG_CPUID1_ECX_CLMUL (1<<1)
+%define FLAG_CPUID1_EDX_SSE2 (1<<26)
+%define FLAG_CPUID1_ECX_SSE3 (1)
+%define FLAG_CPUID1_ECX_SSE4_1 (1<<19)
+%define FLAG_CPUID1_ECX_SSE4_2 (1<<20)
+%define FLAG_CPUID1_ECX_POPCNT (1<<23)
+%define FLAG_CPUID1_ECX_AESNI (1<<25)
+%define FLAG_CPUID1_ECX_OSXSAVE (1<<27)
+%define FLAG_CPUID1_ECX_AVX (1<<28)
+%define FLAG_CPUID1_EBX_AVX2 (1<<5)
+
+%define FLAG_CPUID7_EBX_AVX2 (1<<5)
+%define FLAG_CPUID7_EBX_AVX512F (1<<16)
+%define FLAG_CPUID7_EBX_AVX512DQ (1<<17)
+%define FLAG_CPUID7_EBX_AVX512IFMA (1<<21)
+%define FLAG_CPUID7_EBX_AVX512PF (1<<26)
+%define FLAG_CPUID7_EBX_AVX512ER (1<<27)
+%define FLAG_CPUID7_EBX_AVX512CD (1<<28)
+%define FLAG_CPUID7_EBX_AVX512BW (1<<30)
+%define FLAG_CPUID7_EBX_AVX512VL (1<<31)
+%define FLAG_CPUID7_ECX_AVX512VBMI (1<<1)
+
+%define FLAGS_CPUID7_ECX_AVX512_G1 (FLAG_CPUID7_EBX_AVX512F | FLAG_CPUID7_EBX_AVX512VL | FLAG_CPUID7_EBX_AVX512BW | FLAG_CPUID7_EBX_AVX512CD | FLAG_CPUID7_EBX_AVX512DQ)
+
+%define FLAG_XGETBV_EAX_XMM (1<<1)
+%define FLAG_XGETBV_EAX_YMM (1<<2)
+%define FLAG_XGETBV_EAX_XMM_YMM 0x6
+%define FLAG_XGETBV_EAX_ZMM_OPM 0xe0
+
+%define FLAG_CPUID1_EAX_AVOTON 0x000406d0
+%define FLAG_CPUID1_EAX_STEP_MASK 0xfffffff0
+
+; define d and w variants for registers
+
+%define raxd eax
+%define raxw ax
+%define raxb al
+
+%define rbxd ebx
+%define rbxw bx
+%define rbxb bl
+
+%define rcxd ecx
+%define rcxw cx
+%define rcxb cl
+
+%define rdxd edx
+%define rdxw dx
+%define rdxb dl
+
+%define rsid esi
+%define rsiw si
+%define rsib sil
+
+%define rdid edi
+%define rdiw di
+%define rdib dil
+
+%define rbpd ebp
+%define rbpw bp
+%define rbpb bpl
+
+%define ymm0x xmm0
+%define ymm1x xmm1
+%define ymm2x xmm2
+%define ymm3x xmm3
+%define ymm4x xmm4
+%define ymm5x xmm5
+%define ymm6x xmm6
+%define ymm7x xmm7
+%define ymm8x xmm8
+%define ymm9x xmm9
+%define ymm10x xmm10
+%define ymm11x xmm11
+%define ymm12x xmm12
+%define ymm13x xmm13
+%define ymm14x xmm14
+%define ymm15x xmm15
+
+%define DWORD(reg) reg %+ d
+%define WORD(reg) reg %+ w
+%define BYTE(reg) reg %+ b
+
+%define XWORD(reg) reg %+ x
+
+%ifidn __OUTPUT_FORMAT__,elf32
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
+%endif
+%ifidn __OUTPUT_FORMAT__,elf64
+section .note.GNU-stack noalloc noexec nowrite progbits
+section .text
+%endif
+%ifidn __OUTPUT_FORMAT__, macho64
+%define elf64 macho64
+%endif
+
+%macro slversion 4
+ section .text
+ global %1_slver_%2%3%4
+ global %1_slver
+ %1_slver:
+ %1_slver_%2%3%4:
+ dw 0x%4
+ db 0x%3, 0x%2
+%endmacro
+
+%endif ; ifndef _REG_SIZES_ASM_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h
new file mode 100644
index 000000000..7ddeb45f3
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/sha1_mb.h
@@ -0,0 +1,377 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _SHA1_MB_H_
+#define _SHA1_MB_H_
+
+/**
+ * @file sha1_mb.h
+ * @brief Multi-buffer CTX API SHA1 function prototypes and structures
+ *
+ * Interface for multi-buffer SHA1 functions
+ *
+ * <b> Multi-buffer SHA1 Entire or First-Update..Update-Last </b>
+ *
+ * The interface to this multi-buffer hashing code is carried out through the
+ * context-level (CTX) init, submit and flush functions and the SHA1_HASH_CTX_MGR and
+ * SHA1_HASH_CTX objects. Numerous SHA1_HASH_CTX objects may be instantiated by the
+ * application for use with a single SHA1_HASH_CTX_MGR.
+ *
+ * The CTX interface functions carry out the initialization and padding of the jobs
+ * entered by the user and add them to the multi-buffer manager. The lower level "scheduler"
+ * layer then processes the jobs in an out-of-order manner. The scheduler layer functions
+ * are internal and are not intended to be invoked directly. Jobs can be submitted
+ * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial
+ * jobs which can be started using the HASH_FIRST flag, and later resumed or finished
+ * using the HASH_UPDATE and HASH_LAST flags respectively.
+ *
+ * <b>Note:</b> The submit function does not require data buffers to be block sized.
+ *
+ * The SHA1 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and
+ * AVX512. In addition, a multibinary interface is provided, which selects the appropriate
+ * architecture-specific function at runtime.
+ *
+ * <b>Usage:</b> The application creates a SHA1_HASH_CTX_MGR object and initializes it
+ * with a call to sha1_ctx_mgr_init*() function, where henceforth "*" stands for the
+ * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the
+ * multibinary version). The SHA1_HASH_CTX_MGR object will be used to schedule processor
+ * resources, with up to 4 SHA1_HASH_CTX objects (or 8 in the AVX2 case, 16 in the AVX512)
+ * being processed at a time.
+ *
+ * Each SHA1_HASH_CTX must be initialized before first use by the hash_ctx_init macro
+ * defined in multi_buffer.h. After initialization, the application may begin computing
+ * a hash by giving the SHA1_HASH_CTX to a SHA1_HASH_CTX_MGR using the submit functions
+ * sha1_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA1_HASH_CTX is
+ * returned to the application (via this or a later call to sha1_ctx_mgr_submit*() or
+ * sha1_ctx_mgr_flush*()), the application can then re-submit it with another call to
+ * sha1_ctx_mgr_submit*(), but without the HASH_FIRST flag set.
+ *
+ * Ideally, on the last buffer for that hash, sha1_ctx_mgr_submit_sse is called with
+ * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero
+ * length if necessary. When a SHA1_HASH_CTX is returned after having been submitted with
+ * HASH_LAST, it will contain a valid hash. The SHA1_HASH_CTX can be reused immediately
+ * by submitting with HASH_FIRST.
+ *
+ * For example, you would submit hashes with the following flags for the following numbers
+ * of buffers:
+ * <ul>
+ * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE)
+ * <li> two buffers: HASH_FIRST, HASH_LAST
+ * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST
+ * etc.
+ * </ul>
+ *
+ * The order in which SHA1_CTX objects are returned is in general different from the order
+ * in which they are submitted.
+ *
+ * A few possible error conditions exist:
+ * <ul>
+ * <li> Submitting flags other than the allowed entire/first/update/last values
+ * <li> Submitting a context that is currently being managed by a SHA1_HASH_CTX_MGR.
+ * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set.
+ * </ul>
+ *
+ * These error conditions are reported by returning the SHA1_HASH_CTX immediately after
+ * a submit with its error member set to a non-zero error code (defined in
+ * multi_buffer.h). No changes are made to the SHA1_HASH_CTX_MGR in the case of an
+ * error; no processing is done for other hashes.
+ *
+ */
+
+#include <stdint.h>
+#include "multi_buffer.h"
+#include "types.h"
+
+#ifndef _MSC_VER
+#include <stdbool.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Hash Constants and Typedefs
+#define SHA1_DIGEST_NWORDS 5
+#define SHA1_MAX_LANES 16
+#define SHA1_X8_LANES 8
+#define SHA1_MIN_LANES 4
+#define SHA1_BLOCK_SIZE 64
+#define SHA1_LOG2_BLOCK_SIZE 6
+#define SHA1_PADLENGTHFIELD_SIZE 8
+#define SHA1_INITIAL_DIGEST \
+ 0x67452301, 0xefcdab89, 0x98badcfe, 0x10325476, 0xc3d2e1f0
+
+typedef uint32_t sha1_digest_array[SHA1_DIGEST_NWORDS][SHA1_MAX_LANES];
+typedef uint32_t SHA1_WORD_T;
+
+/** @brief Scheduler layer - Holds info describing a single SHA1 job for the multi-buffer manager */
+
+typedef struct {
+ uint8_t* buffer; //!< pointer to data buffer for this job
+ uint32_t len; //!< length of buffer for this job in blocks.
+ DECLARE_ALIGNED(uint32_t result_digest[SHA1_DIGEST_NWORDS],64);
+ JOB_STS status; //!< output job status
+ void* user_data; //!< pointer for user's job-related data
+} SHA1_JOB;
+
+/** @brief Scheduler layer - Holds arguments for submitted SHA1 job */
+
+typedef struct {
+ sha1_digest_array digest;
+ uint8_t* data_ptr[SHA1_MAX_LANES];
+} SHA1_MB_ARGS_X16;
+
+/** @brief Scheduler layer - Lane data */
+
+typedef struct {
+ SHA1_JOB *job_in_lane;
+} SHA1_LANE_DATA;
+
+/** @brief Scheduler layer - Holds state for multi-buffer SHA1 jobs */
+
+typedef struct {
+ SHA1_MB_ARGS_X16 args;
+ uint32_t lens[SHA1_MAX_LANES];
+ uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7 or 0...15) of unused lanes, nibble 4 or 8 is set to F as a flag
+ SHA1_LANE_DATA ldata[SHA1_MAX_LANES];
+ uint32_t num_lanes_inuse;
+} SHA1_MB_JOB_MGR;
+
+/** @brief Context layer - Holds state for multi-buffer SHA1 jobs */
+
+typedef struct {
+ SHA1_MB_JOB_MGR mgr;
+} SHA1_HASH_CTX_MGR;
+
+/** @brief Context layer - Holds info describing a single SHA1 job for the multi-buffer CTX manager */
+
+typedef struct {
+ SHA1_JOB job; // Must be at struct offset 0.
+ HASH_CTX_STS status; //!< Context status flag
+ HASH_CTX_ERROR error; //!< Context error flag
+ uint32_t total_length; //!< Running counter of length processed for this CTX's job
+ const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job
+ uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes.
+ uint8_t partial_block_buffer[SHA1_BLOCK_SIZE * 2]; //!< CTX partial blocks
+ uint32_t partial_block_buffer_length;
+ void* user_data; //!< pointer for user to keep any job-related data
+} SHA1_HASH_CTX;
+
+/*******************************************************************
+ * Context level API function prototypes
+ ******************************************************************/
+
+/**
+ * @brief Initialize the context level SHA1 multi-buffer manager structure.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha1_ctx_mgr_init_sse (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA1 job to the context level multi-buffer manager.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_submit_sse (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA1 jobs and return when complete.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_flush_sse (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA1 multi-buffer manager structure.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha1_ctx_mgr_init_avx (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA1 job to the multi-buffer manager.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA1 jobs and return when complete.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA1 multi-buffer manager structure.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha1_ctx_mgr_init_avx2 (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA1 job to the multi-buffer manager.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx2 (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA1 jobs and return when complete.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx2 (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA1 multi-buffer manager structure.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha1_ctx_mgr_init_avx512 (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA1 job to the multi-buffer manager.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_submit_avx512 (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA1 jobs and return when complete.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_flush_avx512 (SHA1_HASH_CTX_MGR* mgr);
+
+/******************** multibinary function prototypes **********************/
+
+/**
+ * @brief Initialize the SHA1 multi-buffer manager structure.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha1_ctx_mgr_init (SHA1_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA1 job to the multi-buffer manager.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_submit (SHA1_HASH_CTX_MGR* mgr, SHA1_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA1 jobs and return when complete.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA1_HASH_CTX* sha1_ctx_mgr_flush (SHA1_HASH_CTX_MGR* mgr);
+
+
+/*******************************************************************
+ * Scheduler (internal) level out-of-order function prototypes
+ ******************************************************************/
+
+void sha1_mb_mgr_init_sse (SHA1_MB_JOB_MGR *state);
+SHA1_JOB* sha1_mb_mgr_submit_sse (SHA1_MB_JOB_MGR *state, SHA1_JOB* job);
+SHA1_JOB* sha1_mb_mgr_flush_sse (SHA1_MB_JOB_MGR *state);
+
+#define sha1_mb_mgr_init_avx sha1_mb_mgr_init_sse
+SHA1_JOB* sha1_mb_mgr_submit_avx (SHA1_MB_JOB_MGR *state, SHA1_JOB* job);
+SHA1_JOB* sha1_mb_mgr_flush_avx (SHA1_MB_JOB_MGR *state);
+
+void sha1_mb_mgr_init_avx2 (SHA1_MB_JOB_MGR *state);
+SHA1_JOB* sha1_mb_mgr_submit_avx2 (SHA1_MB_JOB_MGR *state, SHA1_JOB* job);
+SHA1_JOB* sha1_mb_mgr_flush_avx2 (SHA1_MB_JOB_MGR *state);
+
+void sha1_mb_mgr_init_avx512 (SHA1_MB_JOB_MGR *state);
+SHA1_JOB* sha1_mb_mgr_submit_avx512 (SHA1_MB_JOB_MGR *state, SHA1_JOB* job);
+SHA1_JOB* sha1_mb_mgr_flush_avx512 (SHA1_MB_JOB_MGR *state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _SHA1_MB_H_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h
new file mode 100644
index 000000000..cd48508d8
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/sha256_mb.h
@@ -0,0 +1,376 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _SHA256_MB_H_
+#define _SHA256_MB_H_
+
+/**
+ * @file sha256_mb.h
+ * @brief Multi-buffer CTX API SHA256 function prototypes and structures
+ *
+ * Interface for multi-buffer SHA256 functions
+ *
+ * <b> Multi-buffer SHA256 Entire or First-Update..Update-Last </b>
+ *
+ * The interface to this multi-buffer hashing code is carried out through the
+ * context-level (CTX) init, submit and flush functions and the SHA256_HASH_CTX_MGR and
+ * SHA256_HASH_CTX objects. Numerous SHA256_HASH_CTX objects may be instantiated by the
+ * application for use with a single SHA256_HASH_CTX_MGR.
+ *
+ * The CTX interface functions carry out the initialization and padding of the jobs
+ * entered by the user and add them to the multi-buffer manager. The lower level "scheduler"
+ * layer then processes the jobs in an out-of-order manner. The scheduler layer functions
+ * are internal and are not intended to be invoked directly. Jobs can be submitted
+ * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial
+ * jobs which can be started using the HASH_FIRST flag, and later resumed or finished
+ * using the HASH_UPDATE and HASH_LAST flags respectively.
+ *
+ * <b>Note:</b> The submit function does not require data buffers to be block sized.
+ *
+ * The SHA256 CTX interface functions are available for 4 architectures: SSE, AVX, AVX2 and
+ * AVX512. In addition, a multibinary interface is provided, which selects the appropriate
+ * architecture-specific function at runtime.
+ *
+ * <b>Usage:</b> The application creates a SHA256_HASH_CTX_MGR object and initializes it
+ * with a call to sha256_ctx_mgr_init*() function, where henceforth "*" stands for the
+ * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the
+ * multibinary version). The SHA256_HASH_CTX_MGR object will be used to schedule processor
+ * resources, with up to 4 SHA256_HASH_CTX objects (or 8 in the AVX2 case, 16 in the AVX512)
+ * being processed at a time.
+ *
+ * Each SHA256_HASH_CTX must be initialized before first use by the hash_ctx_init macro
+ * defined in multi_buffer.h. After initialization, the application may begin computing
+ * a hash by giving the SHA256_HASH_CTX to a SHA256_HASH_CTX_MGR using the submit functions
+ * sha256_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA256_HASH_CTX is
+ * returned to the application (via this or a later call to sha256_ctx_mgr_submit*() or
+ * sha256_ctx_mgr_flush*()), the application can then re-submit it with another call to
+ * sha256_ctx_mgr_submit*(), but without the HASH_FIRST flag set.
+ *
+ * Ideally, on the last buffer for that hash, sha256_ctx_mgr_submit_sse is called with
+ * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero
+ * length if necessary. When a SHA256_HASH_CTX is returned after having been submitted with
+ * HASH_LAST, it will contain a valid hash. The SHA256_HASH_CTX can be reused immediately
+ * by submitting with HASH_FIRST.
+ *
+ * For example, you would submit hashes with the following flags for the following numbers
+ * of buffers:
+ * <ul>
+ * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE)
+ * <li> two buffers: HASH_FIRST, HASH_LAST
+ * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST
+ * etc.
+ * </ul>
+ *
+ * The order in which SHA256_CTX objects are returned is in general different from the order
+ * in which they are submitted.
+ *
+ * A few possible error conditions exist:
+ * <ul>
+ * <li> Submitting flags other than the allowed entire/first/update/last values
+ * <li> Submitting a context that is currently being managed by a SHA256_HASH_CTX_MGR.
+ * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set.
+ * </ul>
+ *
+ * These error conditions are reported by returning the SHA256_HASH_CTX immediately after
+ * a submit with its error member set to a non-zero error code (defined in
+ * multi_buffer.h). No changes are made to the SHA256_HASH_CTX_MGR in the case of an
+ * error; no processing is done for other hashes.
+ *
+ */
+
+#include <stdint.h>
+#include "multi_buffer.h"
+#include "types.h"
+
+#ifndef _MSC_VER
+#include <stdbool.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Hash Constants and Typedefs
+#define SHA256_DIGEST_NWORDS 8
+#define SHA256_MAX_LANES 16
+#define SHA256_X8_LANES 8
+#define SHA256_MIN_LANES 4
+#define SHA256_BLOCK_SIZE 64
+#define SHA256_LOG2_BLOCK_SIZE 6
+#define SHA256_PADLENGTHFIELD_SIZE 8
+#define SHA256_INITIAL_DIGEST \
+ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, \
+ 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19
+
+typedef uint32_t sha256_digest_array[SHA256_DIGEST_NWORDS][SHA256_MAX_LANES];
+typedef uint32_t SHA256_WORD_T;
+
+/** @brief Scheduler layer - Holds info describing a single SHA256 job for the multi-buffer manager */
+
+typedef struct {
+ uint8_t* buffer; //!< pointer to data buffer for this job
+ uint64_t len; //!< length of buffer for this job in blocks.
+ DECLARE_ALIGNED(uint32_t result_digest[SHA256_DIGEST_NWORDS], 64);
+ JOB_STS status; //!< output job status
+ void* user_data; //!< pointer for user's job-related data
+} SHA256_JOB;
+
+/** @brief Scheduler layer - Holds arguments for submitted SHA256 job */
+
+typedef struct {
+ sha256_digest_array digest;
+ uint8_t* data_ptr[SHA256_MAX_LANES];
+} SHA256_MB_ARGS_X16;
+
+/** @brief Scheduler layer - Lane data */
+
+typedef struct {
+ SHA256_JOB *job_in_lane;
+} SHA256_LANE_DATA;
+
+/** @brief Scheduler layer - Holds state for multi-buffer SHA256 jobs */
+
+typedef struct {
+ SHA256_MB_ARGS_X16 args;
+ uint32_t lens[SHA256_MAX_LANES];
+ uint64_t unused_lanes; //!< each nibble is index (0...3 or 0...7) of unused lanes, nibble 4 or 8 is set to F as a flag
+ SHA256_LANE_DATA ldata[SHA256_MAX_LANES];
+ uint32_t num_lanes_inuse;
+} SHA256_MB_JOB_MGR;
+
+/** @brief Context layer - Holds state for multi-buffer SHA256 jobs */
+
+typedef struct {
+ SHA256_MB_JOB_MGR mgr;
+} SHA256_HASH_CTX_MGR;
+
+/** @brief Context layer - Holds info describing a single SHA256 job for the multi-buffer CTX manager */
+
+typedef struct {
+ SHA256_JOB job; // Must be at struct offset 0.
+ HASH_CTX_STS status; //!< Context status flag
+ HASH_CTX_ERROR error; //!< Context error flag
+ uint32_t total_length; //!< Running counter of length processed for this CTX's job
+ const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job
+ uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes.
+ uint8_t partial_block_buffer[SHA256_BLOCK_SIZE * 2]; //!< CTX partial blocks
+ uint32_t partial_block_buffer_length;
+ void* user_data; //!< pointer for user to keep any job-related data
+} SHA256_HASH_CTX;
+
+/*******************************************************************
+ * CTX level API function prototypes
+ ******************************************************************/
+
+/**
+ * @brief Initialize the context level SHA256 multi-buffer manager structure.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha256_ctx_mgr_init_sse (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA256 job to the context level multi-buffer manager.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_submit_sse (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA256 jobs and return when complete.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_flush_sse (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA256 multi-buffer manager structure.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha256_ctx_mgr_init_avx (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA256 job to the multi-buffer manager.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA256 jobs and return when complete.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA256 multi-buffer manager structure.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha256_ctx_mgr_init_avx2 (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA256 job to the multi-buffer manager.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx2 (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA256 jobs and return when complete.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx2 (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA256 multi-buffer manager structure.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha256_ctx_mgr_init_avx512 (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA256 job to the multi-buffer manager.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_submit_avx512 (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA256 jobs and return when complete.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_flush_avx512 (SHA256_HASH_CTX_MGR* mgr);
+/******************** multibinary function prototypes **********************/
+
+/**
+ * @brief Initialize the SHA256 multi-buffer manager structure.
+ * @requires SSE4.1 or AVX or AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha256_ctx_mgr_init (SHA256_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA256 job to the multi-buffer manager.
+ * @requires SSE4.1 or AVX or AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_submit (SHA256_HASH_CTX_MGR* mgr, SHA256_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA256 jobs and return when complete.
+ * @requires SSE4.1 or AVX or AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA256_HASH_CTX* sha256_ctx_mgr_flush (SHA256_HASH_CTX_MGR* mgr);
+
+
+/*******************************************************************
+ * Scheduler (internal) level out-of-order function prototypes
+ ******************************************************************/
+
+void sha256_mb_mgr_init_sse(SHA256_MB_JOB_MGR *state);
+SHA256_JOB* sha256_mb_mgr_submit_sse(SHA256_MB_JOB_MGR *state, SHA256_JOB* job);
+SHA256_JOB* sha256_mb_mgr_flush_sse(SHA256_MB_JOB_MGR *state);
+
+#define sha256_mb_mgr_init_avx sha256_mb_mgr_init_sse
+SHA256_JOB* sha256_mb_mgr_submit_avx (SHA256_MB_JOB_MGR *state, SHA256_JOB* job);
+SHA256_JOB* sha256_mb_mgr_flush_avx (SHA256_MB_JOB_MGR *state);
+
+void sha256_mb_mgr_init_avx2 (SHA256_MB_JOB_MGR *state);
+SHA256_JOB* sha256_mb_mgr_submit_avx2 (SHA256_MB_JOB_MGR *state, SHA256_JOB* job);
+SHA256_JOB* sha256_mb_mgr_flush_avx2 (SHA256_MB_JOB_MGR *state);
+
+void sha256_mb_mgr_init_avx512 (SHA256_MB_JOB_MGR *state);
+SHA256_JOB* sha256_mb_mgr_submit_avx512 (SHA256_MB_JOB_MGR *state, SHA256_JOB* job);
+SHA256_JOB* sha256_mb_mgr_flush_avx512 (SHA256_MB_JOB_MGR *state);
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _SHA256_MB_H_
diff --git a/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h b/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h
new file mode 100644
index 000000000..aee6156f1
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/sha512_mb.h
@@ -0,0 +1,422 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+#ifndef _SHA512_MB_H_
+#define _SHA512_MB_H_
+
+/**
+ * @file sha512_mb.h
+ * @brief Single/Multi-buffer CTX API SHA512 function prototypes and structures
+ *
+ * Interface for single and multi-buffer SHA512 functions
+ *
+ * <b> Single/Multi-buffer SHA512 Entire or First-Update..Update-Last </b>
+ *
+ * The interface to this single/multi-buffer hashing code is carried out through the
+ * context-level (CTX) init, submit and flush functions and the SHA512_HASH_CTX_MGR and
+ * SHA512_HASH_CTX objects. Numerous SHA512_HASH_CTX objects may be instantiated by the
+ * application for use with a single SHA512_HASH_CTX_MGR.
+ *
+ * The CTX interface functions carry out the initialization and padding of the jobs
+ * entered by the user and add them to the multi-buffer manager. The lower level "scheduler"
+ * layer then processes the jobs in an out-of-order manner. The scheduler layer functions
+ * are internal and are not intended to be invoked directly. Jobs can be submitted
+ * to a CTX as a complete buffer to be hashed, using the HASH_ENTIRE flag, or as partial
+ * jobs which can be started using the HASH_FIRST flag, and later resumed or finished
+ * using the HASH_UPDATE and HASH_LAST flags respectively.
+ *
+ * <b>Note:</b> The submit function does not require data buffers to be block sized.
+ *
+ * The SHA512 CTX interface functions are available for 5 architectures: multi-buffer SSE,
+ * AVX, AVX2, AVX512 and single-buffer SSE4 (which is used in the same way as the
+ * multi-buffer code). In addition, a multibinary interface is provided, which selects the
+ * appropriate architecture-specific function at runtime. This multibinary interface
+ * selects the single buffer SSE4 functions when the platform is detected to be Silvermont.
+ *
+ * <b>Usage:</b> The application creates a SHA512_HASH_CTX_MGR object and initializes it
+ * with a call to sha512_ctx_mgr_init*() function, where henceforth "*" stands for the
+ * relevant suffix for each architecture; _sse, _avx, _avx2, _avx512(or no suffix for the
+ * multibinary version). The SHA512_HASH_CTX_MGR object will be used to schedule processor
+ * resources, with up to 2 SHA512_HASH_CTX objects (or 4 in the AVX2 case, 8 in the AVX512
+ * case) being processed at a time.
+ *
+ * Each SHA512_HASH_CTX must be initialized before first use by the hash_ctx_init macro
+ * defined in multi_buffer.h. After initialization, the application may begin computing
+ * a hash by giving the SHA512_HASH_CTX to a SHA512_HASH_CTX_MGR using the submit functions
+ * sha512_ctx_mgr_submit*() with the HASH_FIRST flag set. When the SHA512_HASH_CTX is
+ * returned to the application (via this or a later call to sha512_ctx_mgr_submit*() or
+ * sha512_ctx_mgr_flush*()), the application can then re-submit it with another call to
+ * sha512_ctx_mgr_submit*(), but without the HASH_FIRST flag set.
+ *
+ * Ideally, on the last buffer for that hash, sha512_ctx_mgr_submit_sse is called with
+ * HASH_LAST, although it is also possible to submit the hash with HASH_LAST and a zero
+ * length if necessary. When a SHA512_HASH_CTX is returned after having been submitted with
+ * HASH_LAST, it will contain a valid hash. The SHA512_HASH_CTX can be reused immediately
+ * by submitting with HASH_FIRST.
+ *
+ * For example, you would submit hashes with the following flags for the following numbers
+ * of buffers:
+ * <ul>
+ * <li> one buffer: HASH_FIRST | HASH_LAST (or, equivalently, HASH_ENTIRE)
+ * <li> two buffers: HASH_FIRST, HASH_LAST
+ * <li> three buffers: HASH_FIRST, HASH_UPDATE, HASH_LAST
+ * etc.
+ * </ul>
+ *
+ * The order in which SHA512_CTX objects are returned is in general different from the order
+ * in which they are submitted.
+ *
+ * A few possible error conditions exist:
+ * <ul>
+ * <li> Submitting flags other than the allowed entire/first/update/last values
+ * <li> Submitting a context that is currently being managed by a SHA512_HASH_CTX_MGR. (Note:
+ * This error case is not applicable to the single buffer SSE4 version)
+ * <li> Submitting a context after HASH_LAST is used but before HASH_FIRST is set.
+ * </ul>
+ *
+ * These error conditions are reported by returning the SHA512_HASH_CTX immediately after
+ * a submit with its error member set to a non-zero error code (defined in
+ * multi_buffer.h). No changes are made to the SHA512_HASH_CTX_MGR in the case of an
+ * error; no processing is done for other hashes.
+ *
+ */
+
+#include <stdint.h>
+#include "multi_buffer.h"
+#include "types.h"
+
+#ifndef _MSC_VER
+#include <stdbool.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Hash Constants and Typedefs
+#define SHA512_DIGEST_NWORDS 8
+#define SHA512_MAX_LANES 8
+#define SHA512_X4_LANES 4
+#define SHA512_MIN_LANES 2
+#define SHA512_BLOCK_SIZE 128
+#define SHA512_LOG2_BLOCK_SIZE 7
+#define SHA512_PADLENGTHFIELD_SIZE 16
+#define SHA512_INITIAL_DIGEST \
+ 0x6a09e667f3bcc908,0xbb67ae8584caa73b,0x3c6ef372fe94f82b,0xa54ff53a5f1d36f1, \
+ 0x510e527fade682d1,0x9b05688c2b3e6c1f,0x1f83d9abfb41bd6b,0x5be0cd19137e2179
+
+
+typedef uint64_t sha512_digest_array[SHA512_DIGEST_NWORDS][SHA512_MAX_LANES];
+typedef uint64_t SHA512_WORD_T;
+
+/** @brief Scheduler layer - Holds info describing a single SHA512 job for the multi-buffer manager */
+
+typedef struct {
+ uint8_t* buffer; //!< pointer to data buffer for this job
+ uint64_t len; //!< length of buffer for this job in blocks.
+ DECLARE_ALIGNED(uint64_t result_digest[SHA512_DIGEST_NWORDS], 64);
+ JOB_STS status; //!< output job status
+ void* user_data; //!< pointer for user's job-related data
+} SHA512_JOB;
+
+/** @brief Scheduler layer - Holds arguments for submitted SHA512 job */
+
+typedef struct {
+ sha512_digest_array digest;
+ uint8_t* data_ptr[SHA512_MAX_LANES];
+} SHA512_MB_ARGS_X8;
+
+/** @brief Scheduler layer - Lane data */
+
+typedef struct {
+ SHA512_JOB *job_in_lane;
+} SHA512_LANE_DATA;
+
+/** @brief Scheduler layer - Holds state for multi-buffer SHA512 jobs */
+
+typedef struct {
+ SHA512_MB_ARGS_X8 args;
+ uint64_t lens[SHA512_MAX_LANES];
+ uint64_t unused_lanes; //!< each byte is index (00, 01 or 00...03) of unused lanes, byte 2 or 4 is set to FF as a flag
+ SHA512_LANE_DATA ldata[SHA512_MAX_LANES];
+ uint32_t num_lanes_inuse;
+} SHA512_MB_JOB_MGR;
+
+/** @brief Context layer - Holds state for multi-buffer SHA512 jobs */
+
+typedef struct {
+ SHA512_MB_JOB_MGR mgr;
+} SHA512_HASH_CTX_MGR;
+
+/** @brief Context layer - Holds info describing a single SHA512 job for the multi-buffer CTX manager */
+
+typedef struct {
+ SHA512_JOB job; // Must be at struct offset 0.
+ HASH_CTX_STS status; //!< Context status flag
+ HASH_CTX_ERROR error; //!< Context error flag
+ uint32_t total_length; //!< Running counter of length processed for this CTX's job
+ const void* incoming_buffer; //!< pointer to data input buffer for this CTX's job
+ uint32_t incoming_buffer_length; //!< length of buffer for this job in bytes.
+ uint8_t partial_block_buffer[SHA512_BLOCK_SIZE * 2]; //!< CTX partial blocks
+ uint32_t partial_block_buffer_length;
+ void* user_data; //!< pointer for user to keep any job-related data
+} SHA512_HASH_CTX;
+
+/*******************************************************************
+ * Context level API function prototypes
+ ******************************************************************/
+
+/**
+ * @brief Initialize the context level SHA512 multi-buffer manager structure.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha512_ctx_mgr_init_sse (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA512 job to the context level multi-buffer manager.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_submit_sse (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA512 jobs and return when complete.
+ * @requires SSE4.1
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_flush_sse (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA512 multi-buffer manager structure.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha512_ctx_mgr_init_avx (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA512 job to the multi-buffer manager.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA512 jobs and return when complete.
+ * @requires AVX
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA512 multi-buffer manager structure.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha512_ctx_mgr_init_avx2 (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA512 job to the multi-buffer manager.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx2 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA512 jobs and return when complete.
+ * @requires AVX2
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx2 (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA512 multi-buffer manager structure.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha512_ctx_mgr_init_avx512 (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA512 job to the multi-buffer manager.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_submit_avx512 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA512 jobs and return when complete.
+ * @requires AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_flush_avx512 (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Initialize the SHA512 multi-buffer manager structure.
+ * @requires SSE4
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha512_ctx_mgr_init_sb_sse4 (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA512 job to the multi-buffer manager.
+ * @requires SSE4
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_submit_sb_sse4 (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA512 jobs and return when complete.
+ * @requires SSE4
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_flush_sb_sse4 (SHA512_HASH_CTX_MGR* mgr);
+
+/******************** multibinary function prototypes **********************/
+
+/**
+ * @brief Initialize the SHA512 multi-buffer manager structure.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns void
+ */
+void sha512_ctx_mgr_init (SHA512_HASH_CTX_MGR* mgr);
+
+/**
+ * @brief Submit a new SHA512 job to the multi-buffer manager.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @param ctx Structure holding ctx job info
+ * @param buffer Pointer to buffer to be processed
+ * @param len Length of buffer (in bytes) to be processed
+ * @param flags Input flag specifying job type (first, update, last or entire)
+ * @returns NULL if no jobs complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_submit (SHA512_HASH_CTX_MGR* mgr, SHA512_HASH_CTX* ctx,
+ const void* buffer, uint32_t len, HASH_CTX_FLAG flags);
+
+/**
+ * @brief Finish all submitted SHA512 jobs and return when complete.
+ * @requires SSE4.1 or AVX or AVX2 or AVX512
+ *
+ * @param mgr Structure holding context level state info
+ * @returns NULL if no jobs to complete or pointer to jobs structure.
+ */
+SHA512_HASH_CTX* sha512_ctx_mgr_flush (SHA512_HASH_CTX_MGR* mgr);
+
+/*******************************************************************
+ * Scheduler (internal) level out-of-order function prototypes
+ ******************************************************************/
+
+void sha512_mb_mgr_init_sse (SHA512_MB_JOB_MGR *state);
+SHA512_JOB* sha512_mb_mgr_submit_sse (SHA512_MB_JOB_MGR *state, SHA512_JOB* job);
+SHA512_JOB* sha512_mb_mgr_flush_sse (SHA512_MB_JOB_MGR *state);
+
+#define sha512_mb_mgr_init_avx sha512_mb_mgr_init_sse
+SHA512_JOB* sha512_mb_mgr_submit_avx (SHA512_MB_JOB_MGR *state, SHA512_JOB* job);
+SHA512_JOB* sha512_mb_mgr_flush_avx (SHA512_MB_JOB_MGR *state);
+
+void sha512_mb_mgr_init_avx2 (SHA512_MB_JOB_MGR *state);
+SHA512_JOB* sha512_mb_mgr_submit_avx2 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job);
+SHA512_JOB* sha512_mb_mgr_flush_avx2 (SHA512_MB_JOB_MGR *state);
+
+void sha512_mb_mgr_init_avx512 (SHA512_MB_JOB_MGR *state);
+SHA512_JOB* sha512_mb_mgr_submit_avx512 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job);
+SHA512_JOB* sha512_mb_mgr_flush_avx512 (SHA512_MB_JOB_MGR *state);
+
+// Single buffer SHA512 APIs, optimized for SLM.
+void sha512_sse4 (const void* M, void* D, uint64_t L);
+// Note that these APIs comply with multi-buffer APIs' high level usage
+void sha512_sb_mgr_init_sse4 (SHA512_MB_JOB_MGR *state);
+SHA512_JOB* sha512_sb_mgr_submit_sse4 (SHA512_MB_JOB_MGR *state, SHA512_JOB* job);
+SHA512_JOB* sha512_sb_mgr_flush_sse4 (SHA512_MB_JOB_MGR *state);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _SHA512_MB_H_
+
+
diff --git a/src/crypto/isa-l/isa-l_crypto/include/test.h b/src/crypto/isa-l/isa-l_crypto/include/test.h
new file mode 100644
index 000000000..41a21626e
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/test.h
@@ -0,0 +1,81 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+#ifndef _TEST_H
+#define _TEST_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// Use sys/time.h functions for time
+
+#include <sys/time.h>
+
+struct perf{
+ struct timeval tv;
+};
+
+
+inline int perf_start(struct perf *p)
+{
+ return gettimeofday(&(p->tv), 0);
+}
+inline int perf_stop(struct perf *p)
+{
+ return gettimeofday(&(p->tv), 0);
+}
+
+inline void perf_print(struct perf stop, struct perf start, long long dsize)
+{
+ long long secs = stop.tv.tv_sec - start.tv.tv_sec;
+ long long usecs = secs * 1000000 + stop.tv.tv_usec - start.tv.tv_usec;
+
+ printf("runtime = %10lld usecs", usecs);
+ if (dsize != 0) {
+#if 1 // not bug in printf for 32-bit
+ printf(", bandwidth %lld MB in %.4f sec = %.2f MB/s\n", dsize/(1024*1024),
+ ((double) usecs)/1000000, ((double) dsize) / (double)usecs);
+#else
+ printf(", bandwidth %lld MB ", dsize/(1024*1024));
+ printf("in %.4f sec ",(double)usecs/1000000);
+ printf("= %.2f MB/s\n", (double)dsize/usecs);
+#endif
+ }
+ else
+ printf("\n");
+}
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _TEST_H
diff --git a/src/crypto/isa-l/isa-l_crypto/include/types.h b/src/crypto/isa-l/isa-l_crypto/include/types.h
new file mode 100644
index 000000000..caf3bac59
--- /dev/null
+++ b/src/crypto/isa-l/isa-l_crypto/include/types.h
@@ -0,0 +1,71 @@
+/**********************************************************************
+ Copyright(c) 2011-2016 Intel Corporation All rights reserved.
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in
+ the documentation and/or other materials provided with the
+ distribution.
+ * Neither the name of Intel Corporation nor the names of its
+ contributors may be used to endorse or promote products derived
+ from this software without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+**********************************************************************/
+
+
+/**
+ * @file types.h
+ * @brief Defines common align and debug macros
+ *
+ */
+
+#ifndef __TYPES_H
+#define __TYPES_H
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+#if defined __unix__ || defined __APPLE__
+# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+# define __forceinline static inline
+# define aligned_free(x) free(x)
+#else
+# ifdef __MINGW32__
+# define DECLARE_ALIGNED(decl, alignval) decl __attribute__((aligned(alignval)))
+# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
+# define aligned_free(x) _aligned_free(x)
+# else
+# define DECLARE_ALIGNED(decl, alignval) __declspec(align(alignval)) decl
+# define posix_memalign(p, algn, len) (NULL == (*((char**)(p)) = (void*) _aligned_malloc(len, algn)))
+# define aligned_free(x) _aligned_free(x)
+# endif
+#endif
+
+#ifdef DEBUG
+# define DEBUG_PRINT(x) printf x
+#else
+# define DEBUG_PRINT(x) do {} while (0)
+#endif
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif //__TYPES_H