diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c')
-rwxr-xr-x | src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c | 554 |
1 files changed, 554 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c b/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c new file mode 100755 index 000000000..5a4eb98c5 --- /dev/null +++ b/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c @@ -0,0 +1,554 @@ +/******************************************************************************* + Copyright (c) 2009-2019, Intel Corporation + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + * Neither the name of Intel Corporation nor the names of its contributors + may be used to endorse or promote products derived from this software + without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE + FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*******************************************************************************/ + +/*----------------------------------------------------------------------- +* zuc_sse.c +*----------------------------------------------------------------------- +* An implementation of ZUC, the core algorithm for the +* 3GPP Confidentiality and Integrity algorithms. +* +*-----------------------------------------------------------------------*/ + +#include <string.h> + +#include "include/zuc_internal.h" +#include "include/wireless_common.h" +#include "include/save_xmms.h" +#include "include/clear_regs_mem.h" +#include "intel-ipsec-mb.h" + +#define SAVE_XMMS save_xmms +#define RESTORE_XMMS restore_xmms +#define CLEAR_SCRATCH_SIMD_REGS clear_scratch_xmms_sse + +static inline +void _zuc_eea3_1_buffer_sse(const void *pKey, + const void *pIv, + const void *pBufferIn, + void *pBufferOut, + const uint32_t length) +{ + DECLARE_ALIGNED(ZucState_t zucState, 64); + DECLARE_ALIGNED(uint8_t keyStream[64], 64); + /* buffer to store 64 bytes of keystream */ + DECLARE_ALIGNED(uint8_t tempSrc[64], 64); + DECLARE_ALIGNED(uint8_t tempDst[64], 64); + + const uint64_t *pIn64 = NULL; + const uint8_t *pIn8 = NULL; + uint8_t *pOut8 = NULL; + uint64_t *pOut64 = NULL, *pKeyStream64 = NULL; + uint64_t *pTemp64 = NULL, *pdstTemp64 = NULL; + + uint32_t numKeyStreamsPerPkt = length/ ZUC_KEYSTR_LEN; + uint32_t numBytesLeftOver = length % ZUC_KEYSTR_LEN; + + /* need to set the LFSR state to zero */ + memset(&zucState, 0, sizeof(ZucState_t)); + + /* initialize the zuc state */ + asm_ZucInitialization(pKey, pIv, &(zucState)); + + /* Loop Over all the Quad-Words in input buffer and XOR with the 64bits + * of generated keystream */ + pOut64 = (uint64_t *) pBufferOut; + pIn64 = (const uint64_t *) pBufferIn; + + while (numKeyStreamsPerPkt--) { + /* Generate the key stream 64 bytes at a time */ + asm_ZucGenKeystream64B((uint32_t *) &keyStream[0], &zucState); + + /* XOR The Keystream generated with the input buffer here */ + pKeyStream64 = (uint64_t *) keyStream; + asm_XorKeyStream64B_sse(pIn64, pOut64, pKeyStream64); + pIn64 += 8; + pOut64 += 8; + } + + /* Check for remaining 0 to 63 bytes */ + pIn8 = (const uint8_t *) pBufferIn; + pOut8 = (uint8_t *) pBufferOut; + if(numBytesLeftOver) { + asm_ZucGenKeystream64B((uint32_t *) &keyStream[0], &zucState); + + /* copy the remaining bytes into temporary buffer and XOR with + * the 64-bytes of keystream. Then copy on the valid bytes back + * to the output buffer */ + + memcpy(&tempSrc[0], &pIn8[length - numBytesLeftOver], + numBytesLeftOver); + pKeyStream64 = (uint64_t *) &keyStream[0]; + pTemp64 = (uint64_t *) &tempSrc[0]; + pdstTemp64 = (uint64_t *) &tempDst[0]; + + asm_XorKeyStream64B_sse(pTemp64, pdstTemp64, pKeyStream64); + memcpy(&pOut8[length - numBytesLeftOver], &tempDst[0], + numBytesLeftOver); + + } +#ifdef SAFE_DATA + /* Clear sensitive data in stack */ + clear_mem(keyStream, sizeof(keyStream)); + clear_mem(&zucState, sizeof(zucState)); +#endif +} + +static inline +void _zuc_eea3_4_buffer_sse(const void * const pKey[4], + const void * const pIv[4], + const void * const pBufferIn[4], + void *pBufferOut[4], + const uint32_t length[4]) +{ + + DECLARE_ALIGNED(ZucState4_t state, 64); + DECLARE_ALIGNED(ZucState_t singlePktState, 64); + + unsigned int i = 0; + /* Calculate the minimum input packet size */ + uint32_t bytes1 = (length[0] < length[1] ? + length[0] : length[1]); + uint32_t bytes2 = (length[2] < length[3] ? + length[2] : length[3]); + /* min number of bytes */ + uint32_t bytes = (bytes1 < bytes2) ? bytes1 : bytes2; + uint32_t numKeyStreamsPerPkt = bytes/ZUC_KEYSTR_LEN; + uint32_t remainBytes[4] = {0}; + DECLARE_ALIGNED(uint8_t keyStr1[64], 64); + DECLARE_ALIGNED(uint8_t keyStr2[64], 64); + DECLARE_ALIGNED(uint8_t keyStr3[64], 64); + DECLARE_ALIGNED(uint8_t keyStr4[64], 64); + DECLARE_ALIGNED(uint8_t tempSrc[64], 64); + DECLARE_ALIGNED(uint8_t tempDst[64], 64); + /* structure to store the 4 keys */ + DECLARE_ALIGNED(ZucKey4_t keys, 64); + /* structure to store the 4 IV's */ + DECLARE_ALIGNED(ZucIv4_t ivs, 64); + uint32_t numBytesLeftOver = 0; + const uint8_t *pTempBufInPtr = NULL; + uint8_t *pTempBufOutPtr = NULL; + + const uint64_t *pIn64_0 = NULL; + const uint64_t *pIn64_1 = NULL; + const uint64_t *pIn64_2 = NULL; + const uint64_t *pIn64_3 = NULL; + uint64_t *pOut64_0 = NULL; + uint64_t *pOut64_1 = NULL; + uint64_t *pOut64_2 = NULL; + uint64_t *pOut64_3 = NULL; + uint64_t *pTempSrc64 = NULL; + uint64_t *pTempDst64 = NULL; + uint64_t *pKeyStream64 = NULL; + + /* rounded down minimum length */ + bytes = numKeyStreamsPerPkt * ZUC_KEYSTR_LEN; + + /* Need to set the LFSR state to zero */ + memset(&state, 0, sizeof(ZucState4_t)); + + /* Calculate the number of bytes left over for each packet */ + for (i=0; i< 4; i++) + remainBytes[i] = length[i] - bytes; + + /* Setup the Keys */ + keys.pKey1 = pKey[0]; + keys.pKey2 = pKey[1]; + keys.pKey3 = pKey[2]; + keys.pKey4 = pKey[3]; + + /* setup the IV's */ + ivs.pIv1 = pIv[0]; + ivs.pIv2 = pIv[1]; + ivs.pIv3 = pIv[2]; + ivs.pIv4 = pIv[3]; + + asm_ZucInitialization_4_sse( &keys, &ivs, &state); + + pOut64_0 = (uint64_t *) pBufferOut[0]; + pOut64_1 = (uint64_t *) pBufferOut[1]; + pOut64_2 = (uint64_t *) pBufferOut[2]; + pOut64_3 = (uint64_t *) pBufferOut[3]; + + pIn64_0 = (const uint64_t *) pBufferIn[0]; + pIn64_1 = (const uint64_t *) pBufferIn[1]; + pIn64_2 = (const uint64_t *) pBufferIn[2]; + pIn64_3 = (const uint64_t *) pBufferIn[3]; + + /* Loop for 64 bytes at a time generating 4 key-streams per loop */ + while (numKeyStreamsPerPkt) { + /* Generate 64 bytes at a time */ + asm_ZucGenKeystream64B_4_sse(&state, + (uint32_t *) keyStr1, + (uint32_t *) keyStr2, + (uint32_t *) keyStr3, + (uint32_t *) keyStr4); + + /* XOR the KeyStream with the input buffers and store in output + * buffer*/ + pKeyStream64 = (uint64_t *) keyStr1; + asm_XorKeyStream64B_sse(pIn64_0, pOut64_0, pKeyStream64); + pIn64_0 += 8; + pOut64_0 += 8; + + pKeyStream64 = (uint64_t *) keyStr2; + asm_XorKeyStream64B_sse(pIn64_1, pOut64_1, pKeyStream64); + pIn64_1 += 8; + pOut64_1 += 8; + + pKeyStream64 = (uint64_t *) keyStr3; + asm_XorKeyStream64B_sse(pIn64_2, pOut64_2, pKeyStream64); + pIn64_2 += 8; + pOut64_2 += 8; + + pKeyStream64 = (uint64_t *) keyStr4; + asm_XorKeyStream64B_sse(pIn64_3, pOut64_3, pKeyStream64); + pIn64_3 += 8; + pOut64_3 += 8; + + /* Update keystream count */ + numKeyStreamsPerPkt--; + + } + + /* process each packet separately for the remaining bytes */ + for (i = 0; i < 4; i++) { + if (remainBytes[i]) { + /* need to copy the zuc state to single packet state */ + singlePktState.lfsrState[0] = state.lfsrState[0][i]; + singlePktState.lfsrState[1] = state.lfsrState[1][i]; + singlePktState.lfsrState[2] = state.lfsrState[2][i]; + singlePktState.lfsrState[3] = state.lfsrState[3][i]; + singlePktState.lfsrState[4] = state.lfsrState[4][i]; + singlePktState.lfsrState[5] = state.lfsrState[5][i]; + singlePktState.lfsrState[6] = state.lfsrState[6][i]; + singlePktState.lfsrState[7] = state.lfsrState[7][i]; + singlePktState.lfsrState[8] = state.lfsrState[8][i]; + singlePktState.lfsrState[9] = state.lfsrState[9][i]; + singlePktState.lfsrState[10] = state.lfsrState[10][i]; + singlePktState.lfsrState[11] = state.lfsrState[11][i]; + singlePktState.lfsrState[12] = state.lfsrState[12][i]; + singlePktState.lfsrState[13] = state.lfsrState[13][i]; + singlePktState.lfsrState[14] = state.lfsrState[14][i]; + singlePktState.lfsrState[15] = state.lfsrState[15][i]; + + singlePktState.fR1 = state.fR1[i]; + singlePktState.fR2 = state.fR2[i]; + + singlePktState.bX0 = state.bX0[i]; + singlePktState.bX1 = state.bX1[i]; + singlePktState.bX2 = state.bX2[i]; + singlePktState.bX3 = state.bX3[i]; + + numKeyStreamsPerPkt = remainBytes[i] / ZUC_KEYSTR_LEN; + numBytesLeftOver = remainBytes[i] % ZUC_KEYSTR_LEN; + + pTempBufInPtr = pBufferIn[i]; + pTempBufOutPtr = pBufferOut[i]; + + /* update the output and input pointers here to point + * to the i'th buffers */ + pOut64_0 = (uint64_t *) &pTempBufOutPtr[length[i] - + remainBytes[i]]; + pIn64_0 = (const uint64_t *) &pTempBufInPtr[length[i] - + remainBytes[i]]; + + while (numKeyStreamsPerPkt--) { + /* Generate the key stream 64 bytes at a time */ + asm_ZucGenKeystream64B((uint32_t *) keyStr1, + &singlePktState); + pKeyStream64 = (uint64_t *) keyStr1; + asm_XorKeyStream64B_sse(pIn64_0, pOut64_0, + pKeyStream64); + pIn64_0 += 8; + pOut64_0 += 8; + } + + + /* Check for remaining 0 to 63 bytes */ + if (numBytesLeftOver) { + asm_ZucGenKeystream64B((uint32_t *) &keyStr1, + &singlePktState); + uint32_t offset = length[i] - numBytesLeftOver; + + /* copy the remaining bytes into temporary + * buffer and XOR with the 64-bytes of + * keystream. Then copy on the valid bytes back + * to the output buffer */ + memcpy(&tempSrc[0], &pTempBufInPtr[offset], + numBytesLeftOver); + memset(&tempSrc[numBytesLeftOver], 0, + 64 - numBytesLeftOver); + + pKeyStream64 = (uint64_t *) &keyStr1[0]; + pTempSrc64 = (uint64_t *) &tempSrc[0]; + pTempDst64 = (uint64_t *) &tempDst[0]; + asm_XorKeyStream64B_sse(pTempSrc64, pTempDst64, + pKeyStream64); + + memcpy(&pTempBufOutPtr[offset], + &tempDst[0], numBytesLeftOver); + } + } + } +#ifdef SAFE_DATA + /* Clear sensitive data in stack */ + clear_mem(keyStr1, sizeof(keyStr1)); + clear_mem(keyStr2, sizeof(keyStr2)); + clear_mem(keyStr3, sizeof(keyStr3)); + clear_mem(keyStr4, sizeof(keyStr4)); + clear_mem(&singlePktState, sizeof(singlePktState)); + clear_mem(&state, sizeof(state)); + clear_mem(&keys, sizeof(keys)); + clear_mem(&ivs, sizeof(ivs)); +#endif +} + +void zuc_eea3_1_buffer_sse(const void *pKey, + const void *pIv, + const void *pBufferIn, + void *pBufferOut, + const uint32_t length) +{ +#ifndef LINUX + DECLARE_ALIGNED(uint128_t xmm_save[10], 16); + + SAVE_XMMS(xmm_save); +#endif +#ifdef SAFE_PARAM + /* Check for NULL pointers */ + if (pKey == NULL || pIv == NULL || pBufferIn == NULL || + pBufferOut == NULL) + return; + + /* Check input data is in range of supported length */ + if (length < ZUC_MIN_LEN || length > ZUC_MAX_LEN) + return; +#endif + + _zuc_eea3_1_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length); + +#ifdef SAFE_DATA + /* Clear sensitive data in registers */ + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif +#ifndef LINUX + RESTORE_XMMS(xmm_save); +#endif +} + +void zuc_eea3_4_buffer_sse(const void * const pKey[4], + const void * const pIv[4], + const void * const pBufferIn[4], + void *pBufferOut[4], + const uint32_t length[4]) +{ +#ifndef LINUX + DECLARE_ALIGNED(uint128_t xmm_save[10], 16); + + SAVE_XMMS(xmm_save); +#endif +#ifdef SAFE_PARAM + unsigned int i; + + /* Check for NULL pointers */ + if (pKey == NULL || pIv == NULL || pBufferIn == NULL || + pBufferOut == NULL || length == NULL) + return; + + for (i = 0; i < 4; i++) { + if (pKey[i] == NULL || pIv[i] == NULL || + pBufferIn[i] == NULL || pBufferOut[i] == NULL) + return; + + /* Check input data is in range of supported length */ + if (length[i] < ZUC_MIN_LEN || length[i] > ZUC_MAX_LEN) + return; + } +#endif + + _zuc_eea3_4_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length); + +#ifdef SAFE_DATA + /* Clear sensitive data in registers */ + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif +#ifndef LINUX + RESTORE_XMMS(xmm_save); +#endif +} + + +void zuc_eea3_n_buffer_sse(const void * const pKey[], const void * const pIv[], + const void * const pBufferIn[], void *pBufferOut[], + const uint32_t length[], + const uint32_t numBuffers) +{ +#ifndef LINUX + DECLARE_ALIGNED(uint128_t xmm_save[10], 16); + + SAVE_XMMS(xmm_save); +#endif + + unsigned int i; + unsigned int packetCount = numBuffers; + +#ifdef SAFE_PARAM + /* Check for NULL pointers */ + if (pKey == NULL || pIv == NULL || pBufferIn == NULL || + pBufferOut == NULL || length == NULL) + return; + + for (i = 0; i < numBuffers; i++) { + if (pKey[i] == NULL || pIv[i] == NULL || + pBufferIn[i] == NULL || pBufferOut[i] == NULL) + return; + + /* Check input data is in range of supported length */ + if (length[i] < ZUC_MIN_LEN || length[i] > ZUC_MAX_LEN) + return; + } +#endif + i = 0; + + while(packetCount >= 4) { + packetCount -=4; + _zuc_eea3_4_buffer_sse(&pKey[i], + &pIv[i], + &pBufferIn[i], + &pBufferOut[i], + &length[i]); + i+=4; + } + + while(packetCount--) { + _zuc_eea3_1_buffer_sse(pKey[i], + pIv[i], + pBufferIn[i], + pBufferOut[i], + length[i]); + i++; + } + +#ifdef SAFE_DATA + /* Clear sensitive data in registers */ + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif +#ifndef LINUX + RESTORE_XMMS(xmm_save); +#endif +} + +static inline uint64_t rotate_left(uint64_t u, size_t r) +{ + return (((u) << (r)) | ((u) >> (64 - (r)))); +} + +static inline uint64_t load_uint64(const void *ptr) +{ + return *((const uint64_t *)ptr); +} + +void zuc_eia3_1_buffer_sse(const void *pKey, + const void *pIv, + const void *pBufferIn, + const uint32_t lengthInBits, + uint32_t *pMacI) +{ +#ifndef LINUX + DECLARE_ALIGNED(uint128_t xmm_save[10], 16); + + SAVE_XMMS(xmm_save); +#endif + DECLARE_ALIGNED(ZucState_t zucState, 64); + DECLARE_ALIGNED(uint32_t keyStream[16 * 2], 64); + const uint32_t keyStreamLengthInBits = ZUC_KEYSTR_LEN * 8; + /* generate a key-stream 2 words longer than the input message */ + const uint32_t N = lengthInBits + (2 * ZUC_WORD); + uint32_t L = (N + 31) / ZUC_WORD; + uint32_t *pZuc = (uint32_t *) &keyStream[0]; + uint32_t remainingBits = lengthInBits; + uint32_t T = 0; + const uint8_t *pIn8 = (const uint8_t *) pBufferIn; + +#ifdef SAFE_PARAM + /* Check for NULL pointers */ + if (pKey == NULL || pIv == NULL || pBufferIn == NULL || pMacI == NULL) + return; + + /* Check input data is in range of supported length */ + if (lengthInBits < ZUC_MIN_LEN || lengthInBits > ZUC_MAX_LEN) + return; +#endif + + memset(&zucState, 0, sizeof(ZucState_t)); + + asm_ZucInitialization(pKey, pIv, &(zucState)); + asm_ZucGenKeystream64B(pZuc, &zucState); + + /* loop over the message bits */ + while (remainingBits >= keyStreamLengthInBits) { + remainingBits -= keyStreamLengthInBits; + L -= (keyStreamLengthInBits / 32); + + /* Generate the next key stream 8 bytes or 64 bytes */ + if (!remainingBits) + asm_ZucGenKeystream8B(&keyStream[16], &zucState); + else + asm_ZucGenKeystream64B(&keyStream[16], &zucState); + T = asm_Eia3Round64BSSE(T, &keyStream[0], pIn8); + memcpy(&keyStream[0], &keyStream[16], 16 * sizeof(uint32_t)); + pIn8 = &pIn8[ZUC_KEYSTR_LEN]; + } + + /* + * If remaining bits has more than 14 ZUC WORDS (double words), + * keystream needs to have up to another 2 ZUC WORDS (8B) + */ + if (remainingBits > (14 * 32)) + asm_ZucGenKeystream8B(&keyStream[16], &zucState); + T ^= asm_Eia3RemainderSSE(&keyStream[0], pIn8, remainingBits); + T ^= rotate_left(load_uint64(&keyStream[remainingBits / 32]), + remainingBits % 32); + + /* save the final MAC-I result */ + uint32_t keyBlock = keyStream[L - 1]; + *pMacI = bswap4(T ^ keyBlock); + +#ifdef SAFE_DATA + /* Clear sensitive data (in registers and stack) */ + clear_mem(keyStream, sizeof(keyStream)); + clear_mem(&zucState, sizeof(zucState)); + CLEAR_SCRATCH_GPS(); + CLEAR_SCRATCH_SIMD_REGS(); +#endif +#ifndef LINUX + RESTORE_XMMS(xmm_save); +#endif +} |