summaryrefslogtreecommitdiffstats
path: root/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c')
-rwxr-xr-xsrc/spdk/intel-ipsec-mb/sse/zuc_sse_top.c554
1 files changed, 554 insertions, 0 deletions
diff --git a/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c b/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c
new file mode 100755
index 000000000..5a4eb98c5
--- /dev/null
+++ b/src/spdk/intel-ipsec-mb/sse/zuc_sse_top.c
@@ -0,0 +1,554 @@
+/*******************************************************************************
+ Copyright (c) 2009-2019, Intel Corporation
+
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions are met:
+
+ * Redistributions of source code must retain the above copyright notice,
+ this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of Intel Corporation nor the names of its contributors
+ may be used to endorse or promote products derived from this software
+ without specific prior written permission.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*******************************************************************************/
+
+/*-----------------------------------------------------------------------
+* zuc_sse.c
+*-----------------------------------------------------------------------
+* An implementation of ZUC, the core algorithm for the
+* 3GPP Confidentiality and Integrity algorithms.
+*
+*-----------------------------------------------------------------------*/
+
+#include <string.h>
+
+#include "include/zuc_internal.h"
+#include "include/wireless_common.h"
+#include "include/save_xmms.h"
+#include "include/clear_regs_mem.h"
+#include "intel-ipsec-mb.h"
+
+#define SAVE_XMMS save_xmms
+#define RESTORE_XMMS restore_xmms
+#define CLEAR_SCRATCH_SIMD_REGS clear_scratch_xmms_sse
+
+static inline
+void _zuc_eea3_1_buffer_sse(const void *pKey,
+ const void *pIv,
+ const void *pBufferIn,
+ void *pBufferOut,
+ const uint32_t length)
+{
+ DECLARE_ALIGNED(ZucState_t zucState, 64);
+ DECLARE_ALIGNED(uint8_t keyStream[64], 64);
+ /* buffer to store 64 bytes of keystream */
+ DECLARE_ALIGNED(uint8_t tempSrc[64], 64);
+ DECLARE_ALIGNED(uint8_t tempDst[64], 64);
+
+ const uint64_t *pIn64 = NULL;
+ const uint8_t *pIn8 = NULL;
+ uint8_t *pOut8 = NULL;
+ uint64_t *pOut64 = NULL, *pKeyStream64 = NULL;
+ uint64_t *pTemp64 = NULL, *pdstTemp64 = NULL;
+
+ uint32_t numKeyStreamsPerPkt = length/ ZUC_KEYSTR_LEN;
+ uint32_t numBytesLeftOver = length % ZUC_KEYSTR_LEN;
+
+ /* need to set the LFSR state to zero */
+ memset(&zucState, 0, sizeof(ZucState_t));
+
+ /* initialize the zuc state */
+ asm_ZucInitialization(pKey, pIv, &(zucState));
+
+ /* Loop Over all the Quad-Words in input buffer and XOR with the 64bits
+ * of generated keystream */
+ pOut64 = (uint64_t *) pBufferOut;
+ pIn64 = (const uint64_t *) pBufferIn;
+
+ while (numKeyStreamsPerPkt--) {
+ /* Generate the key stream 64 bytes at a time */
+ asm_ZucGenKeystream64B((uint32_t *) &keyStream[0], &zucState);
+
+ /* XOR The Keystream generated with the input buffer here */
+ pKeyStream64 = (uint64_t *) keyStream;
+ asm_XorKeyStream64B_sse(pIn64, pOut64, pKeyStream64);
+ pIn64 += 8;
+ pOut64 += 8;
+ }
+
+ /* Check for remaining 0 to 63 bytes */
+ pIn8 = (const uint8_t *) pBufferIn;
+ pOut8 = (uint8_t *) pBufferOut;
+ if(numBytesLeftOver) {
+ asm_ZucGenKeystream64B((uint32_t *) &keyStream[0], &zucState);
+
+ /* copy the remaining bytes into temporary buffer and XOR with
+ * the 64-bytes of keystream. Then copy on the valid bytes back
+ * to the output buffer */
+
+ memcpy(&tempSrc[0], &pIn8[length - numBytesLeftOver],
+ numBytesLeftOver);
+ pKeyStream64 = (uint64_t *) &keyStream[0];
+ pTemp64 = (uint64_t *) &tempSrc[0];
+ pdstTemp64 = (uint64_t *) &tempDst[0];
+
+ asm_XorKeyStream64B_sse(pTemp64, pdstTemp64, pKeyStream64);
+ memcpy(&pOut8[length - numBytesLeftOver], &tempDst[0],
+ numBytesLeftOver);
+
+ }
+#ifdef SAFE_DATA
+ /* Clear sensitive data in stack */
+ clear_mem(keyStream, sizeof(keyStream));
+ clear_mem(&zucState, sizeof(zucState));
+#endif
+}
+
+static inline
+void _zuc_eea3_4_buffer_sse(const void * const pKey[4],
+ const void * const pIv[4],
+ const void * const pBufferIn[4],
+ void *pBufferOut[4],
+ const uint32_t length[4])
+{
+
+ DECLARE_ALIGNED(ZucState4_t state, 64);
+ DECLARE_ALIGNED(ZucState_t singlePktState, 64);
+
+ unsigned int i = 0;
+ /* Calculate the minimum input packet size */
+ uint32_t bytes1 = (length[0] < length[1] ?
+ length[0] : length[1]);
+ uint32_t bytes2 = (length[2] < length[3] ?
+ length[2] : length[3]);
+ /* min number of bytes */
+ uint32_t bytes = (bytes1 < bytes2) ? bytes1 : bytes2;
+ uint32_t numKeyStreamsPerPkt = bytes/ZUC_KEYSTR_LEN;
+ uint32_t remainBytes[4] = {0};
+ DECLARE_ALIGNED(uint8_t keyStr1[64], 64);
+ DECLARE_ALIGNED(uint8_t keyStr2[64], 64);
+ DECLARE_ALIGNED(uint8_t keyStr3[64], 64);
+ DECLARE_ALIGNED(uint8_t keyStr4[64], 64);
+ DECLARE_ALIGNED(uint8_t tempSrc[64], 64);
+ DECLARE_ALIGNED(uint8_t tempDst[64], 64);
+ /* structure to store the 4 keys */
+ DECLARE_ALIGNED(ZucKey4_t keys, 64);
+ /* structure to store the 4 IV's */
+ DECLARE_ALIGNED(ZucIv4_t ivs, 64);
+ uint32_t numBytesLeftOver = 0;
+ const uint8_t *pTempBufInPtr = NULL;
+ uint8_t *pTempBufOutPtr = NULL;
+
+ const uint64_t *pIn64_0 = NULL;
+ const uint64_t *pIn64_1 = NULL;
+ const uint64_t *pIn64_2 = NULL;
+ const uint64_t *pIn64_3 = NULL;
+ uint64_t *pOut64_0 = NULL;
+ uint64_t *pOut64_1 = NULL;
+ uint64_t *pOut64_2 = NULL;
+ uint64_t *pOut64_3 = NULL;
+ uint64_t *pTempSrc64 = NULL;
+ uint64_t *pTempDst64 = NULL;
+ uint64_t *pKeyStream64 = NULL;
+
+ /* rounded down minimum length */
+ bytes = numKeyStreamsPerPkt * ZUC_KEYSTR_LEN;
+
+ /* Need to set the LFSR state to zero */
+ memset(&state, 0, sizeof(ZucState4_t));
+
+ /* Calculate the number of bytes left over for each packet */
+ for (i=0; i< 4; i++)
+ remainBytes[i] = length[i] - bytes;
+
+ /* Setup the Keys */
+ keys.pKey1 = pKey[0];
+ keys.pKey2 = pKey[1];
+ keys.pKey3 = pKey[2];
+ keys.pKey4 = pKey[3];
+
+ /* setup the IV's */
+ ivs.pIv1 = pIv[0];
+ ivs.pIv2 = pIv[1];
+ ivs.pIv3 = pIv[2];
+ ivs.pIv4 = pIv[3];
+
+ asm_ZucInitialization_4_sse( &keys, &ivs, &state);
+
+ pOut64_0 = (uint64_t *) pBufferOut[0];
+ pOut64_1 = (uint64_t *) pBufferOut[1];
+ pOut64_2 = (uint64_t *) pBufferOut[2];
+ pOut64_3 = (uint64_t *) pBufferOut[3];
+
+ pIn64_0 = (const uint64_t *) pBufferIn[0];
+ pIn64_1 = (const uint64_t *) pBufferIn[1];
+ pIn64_2 = (const uint64_t *) pBufferIn[2];
+ pIn64_3 = (const uint64_t *) pBufferIn[3];
+
+ /* Loop for 64 bytes at a time generating 4 key-streams per loop */
+ while (numKeyStreamsPerPkt) {
+ /* Generate 64 bytes at a time */
+ asm_ZucGenKeystream64B_4_sse(&state,
+ (uint32_t *) keyStr1,
+ (uint32_t *) keyStr2,
+ (uint32_t *) keyStr3,
+ (uint32_t *) keyStr4);
+
+ /* XOR the KeyStream with the input buffers and store in output
+ * buffer*/
+ pKeyStream64 = (uint64_t *) keyStr1;
+ asm_XorKeyStream64B_sse(pIn64_0, pOut64_0, pKeyStream64);
+ pIn64_0 += 8;
+ pOut64_0 += 8;
+
+ pKeyStream64 = (uint64_t *) keyStr2;
+ asm_XorKeyStream64B_sse(pIn64_1, pOut64_1, pKeyStream64);
+ pIn64_1 += 8;
+ pOut64_1 += 8;
+
+ pKeyStream64 = (uint64_t *) keyStr3;
+ asm_XorKeyStream64B_sse(pIn64_2, pOut64_2, pKeyStream64);
+ pIn64_2 += 8;
+ pOut64_2 += 8;
+
+ pKeyStream64 = (uint64_t *) keyStr4;
+ asm_XorKeyStream64B_sse(pIn64_3, pOut64_3, pKeyStream64);
+ pIn64_3 += 8;
+ pOut64_3 += 8;
+
+ /* Update keystream count */
+ numKeyStreamsPerPkt--;
+
+ }
+
+ /* process each packet separately for the remaining bytes */
+ for (i = 0; i < 4; i++) {
+ if (remainBytes[i]) {
+ /* need to copy the zuc state to single packet state */
+ singlePktState.lfsrState[0] = state.lfsrState[0][i];
+ singlePktState.lfsrState[1] = state.lfsrState[1][i];
+ singlePktState.lfsrState[2] = state.lfsrState[2][i];
+ singlePktState.lfsrState[3] = state.lfsrState[3][i];
+ singlePktState.lfsrState[4] = state.lfsrState[4][i];
+ singlePktState.lfsrState[5] = state.lfsrState[5][i];
+ singlePktState.lfsrState[6] = state.lfsrState[6][i];
+ singlePktState.lfsrState[7] = state.lfsrState[7][i];
+ singlePktState.lfsrState[8] = state.lfsrState[8][i];
+ singlePktState.lfsrState[9] = state.lfsrState[9][i];
+ singlePktState.lfsrState[10] = state.lfsrState[10][i];
+ singlePktState.lfsrState[11] = state.lfsrState[11][i];
+ singlePktState.lfsrState[12] = state.lfsrState[12][i];
+ singlePktState.lfsrState[13] = state.lfsrState[13][i];
+ singlePktState.lfsrState[14] = state.lfsrState[14][i];
+ singlePktState.lfsrState[15] = state.lfsrState[15][i];
+
+ singlePktState.fR1 = state.fR1[i];
+ singlePktState.fR2 = state.fR2[i];
+
+ singlePktState.bX0 = state.bX0[i];
+ singlePktState.bX1 = state.bX1[i];
+ singlePktState.bX2 = state.bX2[i];
+ singlePktState.bX3 = state.bX3[i];
+
+ numKeyStreamsPerPkt = remainBytes[i] / ZUC_KEYSTR_LEN;
+ numBytesLeftOver = remainBytes[i] % ZUC_KEYSTR_LEN;
+
+ pTempBufInPtr = pBufferIn[i];
+ pTempBufOutPtr = pBufferOut[i];
+
+ /* update the output and input pointers here to point
+ * to the i'th buffers */
+ pOut64_0 = (uint64_t *) &pTempBufOutPtr[length[i] -
+ remainBytes[i]];
+ pIn64_0 = (const uint64_t *) &pTempBufInPtr[length[i] -
+ remainBytes[i]];
+
+ while (numKeyStreamsPerPkt--) {
+ /* Generate the key stream 64 bytes at a time */
+ asm_ZucGenKeystream64B((uint32_t *) keyStr1,
+ &singlePktState);
+ pKeyStream64 = (uint64_t *) keyStr1;
+ asm_XorKeyStream64B_sse(pIn64_0, pOut64_0,
+ pKeyStream64);
+ pIn64_0 += 8;
+ pOut64_0 += 8;
+ }
+
+
+ /* Check for remaining 0 to 63 bytes */
+ if (numBytesLeftOver) {
+ asm_ZucGenKeystream64B((uint32_t *) &keyStr1,
+ &singlePktState);
+ uint32_t offset = length[i] - numBytesLeftOver;
+
+ /* copy the remaining bytes into temporary
+ * buffer and XOR with the 64-bytes of
+ * keystream. Then copy on the valid bytes back
+ * to the output buffer */
+ memcpy(&tempSrc[0], &pTempBufInPtr[offset],
+ numBytesLeftOver);
+ memset(&tempSrc[numBytesLeftOver], 0,
+ 64 - numBytesLeftOver);
+
+ pKeyStream64 = (uint64_t *) &keyStr1[0];
+ pTempSrc64 = (uint64_t *) &tempSrc[0];
+ pTempDst64 = (uint64_t *) &tempDst[0];
+ asm_XorKeyStream64B_sse(pTempSrc64, pTempDst64,
+ pKeyStream64);
+
+ memcpy(&pTempBufOutPtr[offset],
+ &tempDst[0], numBytesLeftOver);
+ }
+ }
+ }
+#ifdef SAFE_DATA
+ /* Clear sensitive data in stack */
+ clear_mem(keyStr1, sizeof(keyStr1));
+ clear_mem(keyStr2, sizeof(keyStr2));
+ clear_mem(keyStr3, sizeof(keyStr3));
+ clear_mem(keyStr4, sizeof(keyStr4));
+ clear_mem(&singlePktState, sizeof(singlePktState));
+ clear_mem(&state, sizeof(state));
+ clear_mem(&keys, sizeof(keys));
+ clear_mem(&ivs, sizeof(ivs));
+#endif
+}
+
+void zuc_eea3_1_buffer_sse(const void *pKey,
+ const void *pIv,
+ const void *pBufferIn,
+ void *pBufferOut,
+ const uint32_t length)
+{
+#ifndef LINUX
+ DECLARE_ALIGNED(uint128_t xmm_save[10], 16);
+
+ SAVE_XMMS(xmm_save);
+#endif
+#ifdef SAFE_PARAM
+ /* Check for NULL pointers */
+ if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
+ pBufferOut == NULL)
+ return;
+
+ /* Check input data is in range of supported length */
+ if (length < ZUC_MIN_LEN || length > ZUC_MAX_LEN)
+ return;
+#endif
+
+ _zuc_eea3_1_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length);
+
+#ifdef SAFE_DATA
+ /* Clear sensitive data in registers */
+ CLEAR_SCRATCH_GPS();
+ CLEAR_SCRATCH_SIMD_REGS();
+#endif
+#ifndef LINUX
+ RESTORE_XMMS(xmm_save);
+#endif
+}
+
+void zuc_eea3_4_buffer_sse(const void * const pKey[4],
+ const void * const pIv[4],
+ const void * const pBufferIn[4],
+ void *pBufferOut[4],
+ const uint32_t length[4])
+{
+#ifndef LINUX
+ DECLARE_ALIGNED(uint128_t xmm_save[10], 16);
+
+ SAVE_XMMS(xmm_save);
+#endif
+#ifdef SAFE_PARAM
+ unsigned int i;
+
+ /* Check for NULL pointers */
+ if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
+ pBufferOut == NULL || length == NULL)
+ return;
+
+ for (i = 0; i < 4; i++) {
+ if (pKey[i] == NULL || pIv[i] == NULL ||
+ pBufferIn[i] == NULL || pBufferOut[i] == NULL)
+ return;
+
+ /* Check input data is in range of supported length */
+ if (length[i] < ZUC_MIN_LEN || length[i] > ZUC_MAX_LEN)
+ return;
+ }
+#endif
+
+ _zuc_eea3_4_buffer_sse(pKey, pIv, pBufferIn, pBufferOut, length);
+
+#ifdef SAFE_DATA
+ /* Clear sensitive data in registers */
+ CLEAR_SCRATCH_GPS();
+ CLEAR_SCRATCH_SIMD_REGS();
+#endif
+#ifndef LINUX
+ RESTORE_XMMS(xmm_save);
+#endif
+}
+
+
+void zuc_eea3_n_buffer_sse(const void * const pKey[], const void * const pIv[],
+ const void * const pBufferIn[], void *pBufferOut[],
+ const uint32_t length[],
+ const uint32_t numBuffers)
+{
+#ifndef LINUX
+ DECLARE_ALIGNED(uint128_t xmm_save[10], 16);
+
+ SAVE_XMMS(xmm_save);
+#endif
+
+ unsigned int i;
+ unsigned int packetCount = numBuffers;
+
+#ifdef SAFE_PARAM
+ /* Check for NULL pointers */
+ if (pKey == NULL || pIv == NULL || pBufferIn == NULL ||
+ pBufferOut == NULL || length == NULL)
+ return;
+
+ for (i = 0; i < numBuffers; i++) {
+ if (pKey[i] == NULL || pIv[i] == NULL ||
+ pBufferIn[i] == NULL || pBufferOut[i] == NULL)
+ return;
+
+ /* Check input data is in range of supported length */
+ if (length[i] < ZUC_MIN_LEN || length[i] > ZUC_MAX_LEN)
+ return;
+ }
+#endif
+ i = 0;
+
+ while(packetCount >= 4) {
+ packetCount -=4;
+ _zuc_eea3_4_buffer_sse(&pKey[i],
+ &pIv[i],
+ &pBufferIn[i],
+ &pBufferOut[i],
+ &length[i]);
+ i+=4;
+ }
+
+ while(packetCount--) {
+ _zuc_eea3_1_buffer_sse(pKey[i],
+ pIv[i],
+ pBufferIn[i],
+ pBufferOut[i],
+ length[i]);
+ i++;
+ }
+
+#ifdef SAFE_DATA
+ /* Clear sensitive data in registers */
+ CLEAR_SCRATCH_GPS();
+ CLEAR_SCRATCH_SIMD_REGS();
+#endif
+#ifndef LINUX
+ RESTORE_XMMS(xmm_save);
+#endif
+}
+
+static inline uint64_t rotate_left(uint64_t u, size_t r)
+{
+ return (((u) << (r)) | ((u) >> (64 - (r))));
+}
+
+static inline uint64_t load_uint64(const void *ptr)
+{
+ return *((const uint64_t *)ptr);
+}
+
+void zuc_eia3_1_buffer_sse(const void *pKey,
+ const void *pIv,
+ const void *pBufferIn,
+ const uint32_t lengthInBits,
+ uint32_t *pMacI)
+{
+#ifndef LINUX
+ DECLARE_ALIGNED(uint128_t xmm_save[10], 16);
+
+ SAVE_XMMS(xmm_save);
+#endif
+ DECLARE_ALIGNED(ZucState_t zucState, 64);
+ DECLARE_ALIGNED(uint32_t keyStream[16 * 2], 64);
+ const uint32_t keyStreamLengthInBits = ZUC_KEYSTR_LEN * 8;
+ /* generate a key-stream 2 words longer than the input message */
+ const uint32_t N = lengthInBits + (2 * ZUC_WORD);
+ uint32_t L = (N + 31) / ZUC_WORD;
+ uint32_t *pZuc = (uint32_t *) &keyStream[0];
+ uint32_t remainingBits = lengthInBits;
+ uint32_t T = 0;
+ const uint8_t *pIn8 = (const uint8_t *) pBufferIn;
+
+#ifdef SAFE_PARAM
+ /* Check for NULL pointers */
+ if (pKey == NULL || pIv == NULL || pBufferIn == NULL || pMacI == NULL)
+ return;
+
+ /* Check input data is in range of supported length */
+ if (lengthInBits < ZUC_MIN_LEN || lengthInBits > ZUC_MAX_LEN)
+ return;
+#endif
+
+ memset(&zucState, 0, sizeof(ZucState_t));
+
+ asm_ZucInitialization(pKey, pIv, &(zucState));
+ asm_ZucGenKeystream64B(pZuc, &zucState);
+
+ /* loop over the message bits */
+ while (remainingBits >= keyStreamLengthInBits) {
+ remainingBits -= keyStreamLengthInBits;
+ L -= (keyStreamLengthInBits / 32);
+
+ /* Generate the next key stream 8 bytes or 64 bytes */
+ if (!remainingBits)
+ asm_ZucGenKeystream8B(&keyStream[16], &zucState);
+ else
+ asm_ZucGenKeystream64B(&keyStream[16], &zucState);
+ T = asm_Eia3Round64BSSE(T, &keyStream[0], pIn8);
+ memcpy(&keyStream[0], &keyStream[16], 16 * sizeof(uint32_t));
+ pIn8 = &pIn8[ZUC_KEYSTR_LEN];
+ }
+
+ /*
+ * If remaining bits has more than 14 ZUC WORDS (double words),
+ * keystream needs to have up to another 2 ZUC WORDS (8B)
+ */
+ if (remainingBits > (14 * 32))
+ asm_ZucGenKeystream8B(&keyStream[16], &zucState);
+ T ^= asm_Eia3RemainderSSE(&keyStream[0], pIn8, remainingBits);
+ T ^= rotate_left(load_uint64(&keyStream[remainingBits / 32]),
+ remainingBits % 32);
+
+ /* save the final MAC-I result */
+ uint32_t keyBlock = keyStream[L - 1];
+ *pMacI = bswap4(T ^ keyBlock);
+
+#ifdef SAFE_DATA
+ /* Clear sensitive data (in registers and stack) */
+ clear_mem(keyStream, sizeof(keyStream));
+ clear_mem(&zucState, sizeof(zucState));
+ CLEAR_SCRATCH_GPS();
+ CLEAR_SCRATCH_SIMD_REGS();
+#endif
+#ifndef LINUX
+ RESTORE_XMMS(xmm_save);
+#endif
+}