/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "secerr.h" #include "rijndael.h" #if ((defined(__clang__) || \ (defined(__GNUC__) && defined(__GNUC_MINOR__) && \ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 8)))) && \ defined(IS_LITTLE_ENDIAN)) #ifndef __ARM_FEATURE_CRYPTO #error "Compiler option is invalid" #endif #include SECStatus arm_aes_encrypt_ecb_128(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; /* Rounds */ state = vaeseq_u8(state, key1); state = vaesmcq_u8(state); state = vaeseq_u8(state, key2); state = vaesmcq_u8(state); state = vaeseq_u8(state, key3); state = vaesmcq_u8(state); state = vaeseq_u8(state, key4); state = vaesmcq_u8(state); state = vaeseq_u8(state, key5); state = vaesmcq_u8(state); state = vaeseq_u8(state, key6); state = vaesmcq_u8(state); state = vaeseq_u8(state, key7); state = vaesmcq_u8(state); state = vaeseq_u8(state, key8); state = vaesmcq_u8(state); state = vaeseq_u8(state, key9); state = vaesmcq_u8(state); state = vaeseq_u8(state, key10); /* AddRoundKey */ state = veorq_u8(state, key11); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; } return SECSuccess; } SECStatus arm_aes_decrypt_ecb_128(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (inputLen == 0) { return SECSuccess; } key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; /* Rounds */ state = vaesdq_u8(state, key11); state = vaesimcq_u8(state); state = vaesdq_u8(state, key10); state = vaesimcq_u8(state); state = vaesdq_u8(state, key9); state = vaesimcq_u8(state); state = vaesdq_u8(state, key8); state = vaesimcq_u8(state); state = vaesdq_u8(state, key7); state = vaesimcq_u8(state); state = vaesdq_u8(state, key6); state = vaesimcq_u8(state); state = vaesdq_u8(state, key5); state = vaesimcq_u8(state); state = vaesdq_u8(state, key4); state = vaesimcq_u8(state); state = vaesdq_u8(state, key3); state = vaesimcq_u8(state); state = vaesdq_u8(state, key2); /* AddRoundKey */ state = veorq_u8(state, key1); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; } return SECSuccess; } SECStatus arm_aes_encrypt_cbc_128(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11; uint8x16_t iv; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } /* iv */ iv = vld1q_u8(cx->iv); /* expanedKey */ key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; state = veorq_u8(state, iv); /* Rounds */ state = vaeseq_u8(state, key1); state = vaesmcq_u8(state); state = vaeseq_u8(state, key2); state = vaesmcq_u8(state); state = vaeseq_u8(state, key3); state = vaesmcq_u8(state); state = vaeseq_u8(state, key4); state = vaesmcq_u8(state); state = vaeseq_u8(state, key5); state = vaesmcq_u8(state); state = vaeseq_u8(state, key6); state = vaesmcq_u8(state); state = vaeseq_u8(state, key7); state = vaesmcq_u8(state); state = vaeseq_u8(state, key8); state = vaesmcq_u8(state); state = vaeseq_u8(state, key9); state = vaesmcq_u8(state); state = vaeseq_u8(state, key10); /* AddRoundKey */ state = veorq_u8(state, key11); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; iv = state; } vst1q_u8(cx->iv, iv); return SECSuccess; } SECStatus arm_aes_decrypt_cbc_128(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t iv; uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } /* iv */ iv = vld1q_u8(cx->iv); /* expanedKey */ key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); while (inputLen > 0) { uint8x16_t state, old_state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif old_state = state; input += 16; inputLen -= 16; /* Rounds */ state = vaesdq_u8(state, key11); state = vaesimcq_u8(state); state = vaesdq_u8(state, key10); state = vaesimcq_u8(state); state = vaesdq_u8(state, key9); state = vaesimcq_u8(state); state = vaesdq_u8(state, key8); state = vaesimcq_u8(state); state = vaesdq_u8(state, key7); state = vaesimcq_u8(state); state = vaesdq_u8(state, key6); state = vaesimcq_u8(state); state = vaesdq_u8(state, key5); state = vaesimcq_u8(state); state = vaesdq_u8(state, key4); state = vaesimcq_u8(state); state = vaesdq_u8(state, key3); state = vaesimcq_u8(state); state = vaesdq_u8(state, key2); /* AddRoundKey */ state = veorq_u8(state, key1); state = veorq_u8(state, iv); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; iv = old_state; } vst1q_u8(cx->iv, iv); return SECSuccess; } SECStatus arm_aes_encrypt_ecb_192(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13; PRUint8 *key = (PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; /* Rounds */ state = vaeseq_u8(state, key1); state = vaesmcq_u8(state); state = vaeseq_u8(state, key2); state = vaesmcq_u8(state); state = vaeseq_u8(state, key3); state = vaesmcq_u8(state); state = vaeseq_u8(state, key4); state = vaesmcq_u8(state); state = vaeseq_u8(state, key5); state = vaesmcq_u8(state); state = vaeseq_u8(state, key6); state = vaesmcq_u8(state); state = vaeseq_u8(state, key7); state = vaesmcq_u8(state); state = vaeseq_u8(state, key8); state = vaesmcq_u8(state); state = vaeseq_u8(state, key9); state = vaesmcq_u8(state); state = vaeseq_u8(state, key10); state = vaesmcq_u8(state); state = vaeseq_u8(state, key11); state = vaesmcq_u8(state); state = vaeseq_u8(state, key12); /* AddRoundKey */ state = veorq_u8(state, key13); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; } return SECSuccess; } SECStatus arm_aes_decrypt_ecb_192(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; /* Rounds */ state = vaesdq_u8(state, key13); state = vaesimcq_u8(state); state = vaesdq_u8(state, key12); state = vaesimcq_u8(state); state = vaesdq_u8(state, key11); state = vaesimcq_u8(state); state = vaesdq_u8(state, key10); state = vaesimcq_u8(state); state = vaesdq_u8(state, key9); state = vaesimcq_u8(state); state = vaesdq_u8(state, key8); state = vaesimcq_u8(state); state = vaesdq_u8(state, key7); state = vaesimcq_u8(state); state = vaesdq_u8(state, key6); state = vaesimcq_u8(state); state = vaesdq_u8(state, key5); state = vaesimcq_u8(state); state = vaesdq_u8(state, key4); state = vaesimcq_u8(state); state = vaesdq_u8(state, key3); state = vaesimcq_u8(state); state = vaesdq_u8(state, key2); /* AddRoundKey */ state = veorq_u8(state, key1); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; } return SECSuccess; } SECStatus arm_aes_encrypt_cbc_192(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13; uint8x16_t iv; PRUint8 *key = (PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } /* iv */ iv = vld1q_u8(cx->iv); /* expanedKey */ key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; state = veorq_u8(state, iv); /* Rounds */ state = vaeseq_u8(state, key1); state = vaesmcq_u8(state); state = vaeseq_u8(state, key2); state = vaesmcq_u8(state); state = vaeseq_u8(state, key3); state = vaesmcq_u8(state); state = vaeseq_u8(state, key4); state = vaesmcq_u8(state); state = vaeseq_u8(state, key5); state = vaesmcq_u8(state); state = vaeseq_u8(state, key6); state = vaesmcq_u8(state); state = vaeseq_u8(state, key7); state = vaesmcq_u8(state); state = vaeseq_u8(state, key8); state = vaesmcq_u8(state); state = vaeseq_u8(state, key9); state = vaesmcq_u8(state); state = vaeseq_u8(state, key10); state = vaesmcq_u8(state); state = vaeseq_u8(state, key11); state = vaesmcq_u8(state); state = vaeseq_u8(state, key12); state = veorq_u8(state, key13); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; iv = state; } vst1q_u8(cx->iv, iv); return SECSuccess; } SECStatus arm_aes_decrypt_cbc_192(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t iv; uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } /* iv */ iv = vld1q_u8(cx->iv); /* expanedKey */ key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); while (inputLen > 0) { uint8x16_t state, old_state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif old_state = state; input += 16; inputLen -= 16; /* Rounds */ state = vaesdq_u8(state, key13); state = vaesimcq_u8(state); state = vaesdq_u8(state, key12); state = vaesimcq_u8(state); state = vaesdq_u8(state, key11); state = vaesimcq_u8(state); state = vaesdq_u8(state, key10); state = vaesimcq_u8(state); state = vaesdq_u8(state, key9); state = vaesimcq_u8(state); state = vaesdq_u8(state, key8); state = vaesimcq_u8(state); state = vaesdq_u8(state, key7); state = vaesimcq_u8(state); state = vaesdq_u8(state, key6); state = vaesimcq_u8(state); state = vaesdq_u8(state, key5); state = vaesimcq_u8(state); state = vaesdq_u8(state, key4); state = vaesimcq_u8(state); state = vaesdq_u8(state, key3); state = vaesimcq_u8(state); state = vaesdq_u8(state, key2); /* AddRoundKey */ state = veorq_u8(state, key1); state = veorq_u8(state, iv); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; iv = old_state; } vst1q_u8(cx->iv, iv); return SECSuccess; } SECStatus arm_aes_encrypt_ecb_256(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13, key14, key15; PRUint8 *key = (PRUint8 *)cx->k.expandedKey; if (inputLen == 0) { return SECSuccess; } key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); key14 = vld1q_u8(key + 208); key15 = vld1q_u8(key + 224); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; /* Rounds */ state = vaeseq_u8(state, key1); state = vaesmcq_u8(state); state = vaeseq_u8(state, key2); state = vaesmcq_u8(state); state = vaeseq_u8(state, key3); state = vaesmcq_u8(state); state = vaeseq_u8(state, key4); state = vaesmcq_u8(state); state = vaeseq_u8(state, key5); state = vaesmcq_u8(state); state = vaeseq_u8(state, key6); state = vaesmcq_u8(state); state = vaeseq_u8(state, key7); state = vaesmcq_u8(state); state = vaeseq_u8(state, key8); state = vaesmcq_u8(state); state = vaeseq_u8(state, key9); state = vaesmcq_u8(state); state = vaeseq_u8(state, key10); state = vaesmcq_u8(state); state = vaeseq_u8(state, key11); state = vaesmcq_u8(state); state = vaeseq_u8(state, key12); state = vaesmcq_u8(state); state = vaeseq_u8(state, key13); state = vaesmcq_u8(state); state = vaeseq_u8(state, key14); /* AddRoundKey */ state = veorq_u8(state, key15); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; } return SECSuccess; } SECStatus arm_aes_decrypt_ecb_256(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13, key14, key15; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); key14 = vld1q_u8(key + 208); key15 = vld1q_u8(key + 224); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; /* Rounds */ state = vaesdq_u8(state, key15); state = vaesimcq_u8(state); state = vaesdq_u8(state, key14); state = vaesimcq_u8(state); state = vaesdq_u8(state, key13); state = vaesimcq_u8(state); state = vaesdq_u8(state, key12); state = vaesimcq_u8(state); state = vaesdq_u8(state, key11); state = vaesimcq_u8(state); state = vaesdq_u8(state, key10); state = vaesimcq_u8(state); state = vaesdq_u8(state, key9); state = vaesimcq_u8(state); state = vaesdq_u8(state, key8); state = vaesimcq_u8(state); state = vaesdq_u8(state, key7); state = vaesimcq_u8(state); state = vaesdq_u8(state, key6); state = vaesimcq_u8(state); state = vaesdq_u8(state, key5); state = vaesimcq_u8(state); state = vaesdq_u8(state, key4); state = vaesimcq_u8(state); state = vaesdq_u8(state, key3); state = vaesimcq_u8(state); state = vaesdq_u8(state, key2); /* AddRoundKey */ state = veorq_u8(state, key1); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; } return SECSuccess; } SECStatus arm_aes_encrypt_cbc_256(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13, key14, key15; uint8x16_t iv; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } /* iv */ iv = vld1q_u8(cx->iv); /* expanedKey */ key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); key14 = vld1q_u8(key + 208); key15 = vld1q_u8(key + 224); while (inputLen > 0) { uint8x16_t state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif input += 16; inputLen -= 16; state = veorq_u8(state, iv); /* Rounds */ state = vaeseq_u8(state, key1); state = vaesmcq_u8(state); state = vaeseq_u8(state, key2); state = vaesmcq_u8(state); state = vaeseq_u8(state, key3); state = vaesmcq_u8(state); state = vaeseq_u8(state, key4); state = vaesmcq_u8(state); state = vaeseq_u8(state, key5); state = vaesmcq_u8(state); state = vaeseq_u8(state, key6); state = vaesmcq_u8(state); state = vaeseq_u8(state, key7); state = vaesmcq_u8(state); state = vaeseq_u8(state, key8); state = vaesmcq_u8(state); state = vaeseq_u8(state, key9); state = vaesmcq_u8(state); state = vaeseq_u8(state, key10); state = vaesmcq_u8(state); state = vaeseq_u8(state, key11); state = vaesmcq_u8(state); state = vaeseq_u8(state, key12); state = vaesmcq_u8(state); state = vaeseq_u8(state, key13); state = vaesmcq_u8(state); state = vaeseq_u8(state, key14); /* AddRoundKey */ state = veorq_u8(state, key15); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; iv = state; } vst1q_u8(cx->iv, iv); return SECSuccess; } SECStatus arm_aes_decrypt_cbc_256(AESContext *cx, unsigned char *output, unsigned int *outputLen, unsigned int maxOutputLen, const unsigned char *input, unsigned int inputLen, unsigned int blocksize) { #if !defined(HAVE_UNALIGNED_ACCESS) pre_align unsigned char buf[16] post_align; #endif uint8x16_t iv; uint8x16_t key1, key2, key3, key4, key5, key6, key7, key8, key9, key10; uint8x16_t key11, key12, key13, key14, key15; const PRUint8 *key = (const PRUint8 *)cx->k.expandedKey; if (!inputLen) { return SECSuccess; } /* iv */ iv = vld1q_u8(cx->iv); /* expanedKey */ key1 = vld1q_u8(key); key2 = vld1q_u8(key + 16); key3 = vld1q_u8(key + 32); key4 = vld1q_u8(key + 48); key5 = vld1q_u8(key + 64); key6 = vld1q_u8(key + 80); key7 = vld1q_u8(key + 96); key8 = vld1q_u8(key + 112); key9 = vld1q_u8(key + 128); key10 = vld1q_u8(key + 144); key11 = vld1q_u8(key + 160); key12 = vld1q_u8(key + 176); key13 = vld1q_u8(key + 192); key14 = vld1q_u8(key + 208); key15 = vld1q_u8(key + 224); while (inputLen > 0) { uint8x16_t state, old_state; #if defined(HAVE_UNALIGNED_ACCESS) state = vld1q_u8(input); #else if ((uintptr_t)input & 0x7) { memcpy(buf, input, 16); state = vld1q_u8(__builtin_assume_aligned(buf, 16)); } else { state = vld1q_u8(__builtin_assume_aligned(input, 8)); } #endif old_state = state; input += 16; inputLen -= 16; /* Rounds */ state = vaesdq_u8(state, key15); state = vaesimcq_u8(state); state = vaesdq_u8(state, key14); state = vaesimcq_u8(state); state = vaesdq_u8(state, key13); state = vaesimcq_u8(state); state = vaesdq_u8(state, key12); state = vaesimcq_u8(state); state = vaesdq_u8(state, key11); state = vaesimcq_u8(state); state = vaesdq_u8(state, key10); state = vaesimcq_u8(state); state = vaesdq_u8(state, key9); state = vaesimcq_u8(state); state = vaesdq_u8(state, key8); state = vaesimcq_u8(state); state = vaesdq_u8(state, key7); state = vaesimcq_u8(state); state = vaesdq_u8(state, key6); state = vaesimcq_u8(state); state = vaesdq_u8(state, key5); state = vaesimcq_u8(state); state = vaesdq_u8(state, key4); state = vaesimcq_u8(state); state = vaesdq_u8(state, key3); state = vaesimcq_u8(state); state = vaesdq_u8(state, key2); /* AddRoundKey */ state = veorq_u8(state, key1); state = veorq_u8(state, iv); #if defined(HAVE_UNALIGNED_ACCESS) vst1q_u8(output, state); #else if ((uintptr_t)output & 0x7) { vst1q_u8(__builtin_assume_aligned(buf, 16), state); memcpy(output, buf, 16); } else { vst1q_u8(__builtin_assume_aligned(output, 8), state); } #endif output += 16; iv = old_state; } vst1q_u8(cx->iv, iv); return SECSuccess; } #endif