/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #ifdef FREEBL_NO_DEPEND #include "stubs.h" #endif #include "rijndael.h" #include "secerr.h" #include /* aes-ni */ #define EXPAND_KEY128(k, rcon, res) \ tmp_key = _mm_aeskeygenassist_si128(k, rcon); \ tmp_key = _mm_shuffle_epi32(tmp_key, 0xFF); \ tmp = _mm_xor_si128(k, _mm_slli_si128(k, 4)); \ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ tmp = _mm_xor_si128(tmp, _mm_slli_si128(tmp, 4)); \ res = _mm_xor_si128(tmp, tmp_key) static void native_key_expansion128(AESContext *cx, const unsigned char *key) { __m128i *keySchedule = cx->k.keySchedule; pre_align __m128i tmp_key post_align; pre_align __m128i tmp post_align; keySchedule[0] = _mm_loadu_si128((__m128i *)key); EXPAND_KEY128(keySchedule[0], 0x01, keySchedule[1]); EXPAND_KEY128(keySchedule[1], 0x02, keySchedule[2]); EXPAND_KEY128(keySchedule[2], 0x04, keySchedule[3]); EXPAND_KEY128(keySchedule[3], 0x08, keySchedule[4]); EXPAND_KEY128(keySchedule[4], 0x10, keySchedule[5]); EXPAND_KEY128(keySchedule[5], 0x20, keySchedule[6]); EXPAND_KEY128(keySchedule[6], 0x40, keySchedule[7]); EXPAND_KEY128(keySchedule[7], 0x80, keySchedule[8]); EXPAND_KEY128(keySchedule[8], 0x1B, keySchedule[9]); EXPAND_KEY128(keySchedule[9], 0x36, keySchedule[10]); } #define EXPAND_KEY192_PART1(res, k0, kt, rcon) \ tmp2 = _mm_slli_si128(k0, 4); \ tmp1 = _mm_xor_si128(k0, tmp2); \ tmp2 = _mm_slli_si128(tmp2, 4); \ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ tmp2 = _mm_aeskeygenassist_si128(kt, rcon); \ res = _mm_xor_si128(tmp1, _mm_shuffle_epi32(tmp2, 0x55)) #define EXPAND_KEY192_PART2(res, k1, k2) \ tmp2 = _mm_xor_si128(k1, _mm_slli_si128(k1, 4)); \ res = _mm_xor_si128(tmp2, _mm_shuffle_epi32(k2, 0xFF)) #define EXPAND_KEY192(k0, res1, res2, res3, carry, rcon1, rcon2) \ EXPAND_KEY192_PART1(tmp3, k0, res1, rcon1); \ EXPAND_KEY192_PART2(carry, res1, tmp3); \ res1 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(res1), \ _mm_castsi128_pd(tmp3), 0)); \ res2 = _mm_castpd_si128(_mm_shuffle_pd(_mm_castsi128_pd(tmp3), \ _mm_castsi128_pd(carry), 1)); \ EXPAND_KEY192_PART1(res3, tmp3, carry, rcon2) static void native_key_expansion192(AESContext *cx, const unsigned char *key) { __m128i *keySchedule = cx->k.keySchedule; pre_align __m128i tmp1 post_align; pre_align __m128i tmp2 post_align; pre_align __m128i tmp3 post_align; pre_align __m128i carry post_align; keySchedule[0] = _mm_loadu_si128((__m128i *)key); keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); EXPAND_KEY192(keySchedule[0], keySchedule[1], keySchedule[2], keySchedule[3], carry, 0x1, 0x2); EXPAND_KEY192_PART2(keySchedule[4], carry, keySchedule[3]); EXPAND_KEY192(keySchedule[3], keySchedule[4], keySchedule[5], keySchedule[6], carry, 0x4, 0x8); EXPAND_KEY192_PART2(keySchedule[7], carry, keySchedule[6]); EXPAND_KEY192(keySchedule[6], keySchedule[7], keySchedule[8], keySchedule[9], carry, 0x10, 0x20); EXPAND_KEY192_PART2(keySchedule[10], carry, keySchedule[9]); EXPAND_KEY192(keySchedule[9], keySchedule[10], keySchedule[11], keySchedule[12], carry, 0x40, 0x80); } #define EXPAND_KEY256_PART(res, rconx, k1x, k2x, X) \ tmp_key = _mm_shuffle_epi32(_mm_aeskeygenassist_si128(k2x, rconx), X); \ tmp2 = _mm_slli_si128(k1x, 4); \ tmp1 = _mm_xor_si128(k1x, tmp2); \ tmp2 = _mm_slli_si128(tmp2, 4); \ tmp1 = _mm_xor_si128(_mm_xor_si128(tmp1, tmp2), _mm_slli_si128(tmp2, 4)); \ res = _mm_xor_si128(tmp1, tmp_key); #define EXPAND_KEY256(res1, res2, k1, k2, rcon) \ EXPAND_KEY256_PART(res1, rcon, k1, k2, 0xFF); \ EXPAND_KEY256_PART(res2, 0x00, k2, res1, 0xAA) static void native_key_expansion256(AESContext *cx, const unsigned char *key) { __m128i *keySchedule = cx->k.keySchedule; pre_align __m128i tmp_key post_align; pre_align __m128i tmp1 post_align; pre_align __m128i tmp2 post_align; keySchedule[0] = _mm_loadu_si128((__m128i *)key); keySchedule[1] = _mm_loadu_si128((__m128i *)(key + 16)); EXPAND_KEY256(keySchedule[2], keySchedule[3], keySchedule[0], keySchedule[1], 0x01); EXPAND_KEY256(keySchedule[4], keySchedule[5], keySchedule[2], keySchedule[3], 0x02); EXPAND_KEY256(keySchedule[6], keySchedule[7], keySchedule[4], keySchedule[5], 0x04); EXPAND_KEY256(keySchedule[8], keySchedule[9], keySchedule[6], keySchedule[7], 0x08); EXPAND_KEY256(keySchedule[10], keySchedule[11], keySchedule[8], keySchedule[9], 0x10); EXPAND_KEY256(keySchedule[12], keySchedule[13], keySchedule[10], keySchedule[11], 0x20); EXPAND_KEY256_PART(keySchedule[14], 0x40, keySchedule[12], keySchedule[13], 0xFF); } /* * AES key expansion using aes-ni instructions. */ void rijndael_native_key_expansion(AESContext *cx, const unsigned char *key, unsigned int Nk) { switch (Nk) { case 4: native_key_expansion128(cx, key); return; case 6: native_key_expansion192(cx, key); return; case 8: native_key_expansion256(cx, key); return; default: /* This shouldn't happen (checked by the caller). */ return; } } void rijndael_native_encryptBlock(AESContext *cx, unsigned char *output, const unsigned char *input) { unsigned int i; pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); m = _mm_xor_si128(m, cx->k.keySchedule[0]); for (i = 1; i < cx->Nr; ++i) { m = _mm_aesenc_si128(m, cx->k.keySchedule[i]); } m = _mm_aesenclast_si128(m, cx->k.keySchedule[cx->Nr]); _mm_storeu_si128((__m128i *)output, m); } void rijndael_native_decryptBlock(AESContext *cx, unsigned char *output, const unsigned char *input) { int i; pre_align __m128i m post_align = _mm_loadu_si128((__m128i *)input); m = _mm_xor_si128(m, cx->k.keySchedule[cx->Nr]); for (i = cx->Nr - 1; i > 0; --i) { m = _mm_aesdec_si128(m, cx->k.keySchedule[i]); } m = _mm_aesdeclast_si128(m, cx->k.keySchedule[0]); _mm_storeu_si128((__m128i *)output, m); } // out = a ^ b void native_xorBlock(unsigned char *out, const unsigned char *a, const unsigned char *b) { pre_align __m128i post_align in1 = _mm_loadu_si128((__m128i *)(a)); pre_align __m128i post_align in2 = _mm_loadu_si128((__m128i *)(b)); in1 = _mm_xor_si128(in1, in2); _mm_storeu_si128((__m128i *)(out), in1); }