From 6e7a315eb67cb6c113cf37e1d66c4f11a51a2b3e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 18:29:51 +0200 Subject: Adding upstream version 2.06. Signed-off-by: Daniel Baumann --- grub-core/lib/libgcrypt/cipher/rijndael.c | 2092 +++++++++++++++++++++++++++++ 1 file changed, 2092 insertions(+) create mode 100644 grub-core/lib/libgcrypt/cipher/rijndael.c (limited to 'grub-core/lib/libgcrypt/cipher/rijndael.c') diff --git a/grub-core/lib/libgcrypt/cipher/rijndael.c b/grub-core/lib/libgcrypt/cipher/rijndael.c new file mode 100644 index 0000000..559550b --- /dev/null +++ b/grub-core/lib/libgcrypt/cipher/rijndael.c @@ -0,0 +1,2092 @@ +/* Rijndael (AES) for GnuPG + * Copyright (C) 2000, 2001, 2002, 2003, 2007, + * 2008, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + ******************************************************************* + * The code here is based on the optimized implementation taken from + * http://www.esat.kuleuven.ac.be/~rijmen/rijndael/ on Oct 2, 2000, + * which carries this notice: + *------------------------------------------ + * rijndael-alg-fst.c v2.3 April '2000 + * + * Optimised ANSI C code + * + * authors: v1.0: Antoon Bosselaers + * v2.0: Vincent Rijmen + * v2.3: Paulo Barreto + * + * This code is placed in the public domain. + *------------------------------------------ + * + * The SP800-38a document is available at: + * http://csrc.nist.gov/publications/nistpubs/800-38a/sp800-38a.pdf + * + */ + +#include +#include +#include +#include /* for memcmp() */ + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" + +#define MAXKC (256/32) +#define MAXROUNDS 14 +#define BLOCKSIZE (128/8) + + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef __GNUC__ +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + + +/* USE_PADLOCK indicates whether to compile the padlock specific + code. */ +#undef USE_PADLOCK +#ifdef ENABLE_PADLOCK_SUPPORT +# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && defined (__GNUC__) +# define USE_PADLOCK 1 +# endif +#endif /*ENABLE_PADLOCK_SUPPORT*/ + +/* USE_AESNI inidicates whether to compile with Intel AES-NI code. We + need the vector-size attribute which seems to be available since + gcc 3. However, to be on the safe side we require at least gcc 4. */ +#undef USE_AESNI +#ifdef ENABLE_AESNI_SUPPORT +# if defined (__i386__) && SIZEOF_UNSIGNED_LONG == 4 && __GNUC__ >= 4 +# define USE_AESNI 1 +# endif +#endif /* ENABLE_AESNI_SUPPORT */ + +#ifdef USE_AESNI + typedef int m128i_t __attribute__ ((__vector_size__ (16))); +#endif /*USE_AESNI*/ + +/* Define an u32 variant for the sake of gcc 4.4's strict aliasing. */ +#if __GNUC__ > 4 || ( __GNUC__ == 4 && __GNUC_MINOR__ >= 4 ) +typedef u32 __attribute__ ((__may_alias__)) u32_a_t; +#else +typedef u32 u32_a_t; +#endif + + + +/* Our context object. */ +typedef struct +{ + /* The first fields are the keyschedule arrays. This is so that + they are aligned on a 16 byte boundary if using gcc. This + alignment is required for the AES-NI code and a good idea in any + case. The alignment is guaranteed due to the way cipher.c + allocates the space for the context. The PROPERLY_ALIGNED_TYPE + hack is used to force a minimal alignment if not using gcc of if + the alignment requirement is higher that 16 bytes. */ + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte keyschedule[MAXROUNDS+1][4][4]; +#ifdef USE_PADLOCK + /* The key as passed to the padlock engine. It is only used if + the padlock engine is used (USE_PADLOCK, below). */ + unsigned char padlock_key[16] __attribute__ ((aligned (16))); +#endif /*USE_PADLOCK*/ + } u1; + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte keyschedule[MAXROUNDS+1][4][4]; + } u2; + int rounds; /* Key-length-dependent number of rounds. */ + int decryption_prepared; /* The decryption key schedule is available. */ +#ifdef USE_PADLOCK + int use_padlock; /* Padlock shall be used. */ +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + int use_aesni; /* AES-NI shall be used. */ +#endif /*USE_AESNI*/ +} RIJNDAEL_context ATTR_ALIGNED_16; + +/* Macros defining alias for the keyschedules. */ +#define keyschenc u1.keyschedule +#define keyschdec u2.keyschedule +#define padlockkey u1.padlock_key + +/* Two macros to be called prior and after the use of AESNI + instructions. There should be no external function calls between + the use of these macros. There purpose is to make sure that the + SSE regsiters are cleared and won't reveal any information about + the key or the data. */ +#ifdef USE_AESNI +# define aesni_prepare() do { } while (0) +# define aesni_cleanup() \ + do { asm volatile ("pxor %%xmm0, %%xmm0\n\t" \ + "pxor %%xmm1, %%xmm1\n" :: ); \ + } while (0) +# define aesni_cleanup_2_4() \ + do { asm volatile ("pxor %%xmm2, %%xmm2\n\t" \ + "pxor %%xmm3, %%xmm3\n" \ + "pxor %%xmm4, %%xmm4\n":: ); \ + } while (0) +#else +# define aesni_prepare() do { } while (0) +# define aesni_cleanup() do { } while (0) +#endif + + +/* All the numbers. */ +#include "rijndael-tables.h" + + + +/* Function prototypes. */ +#ifdef USE_AESNI +/* We don't want to inline these functions to help gcc allocate enough + registers. */ +static void do_aesni_ctr (const RIJNDAEL_context *ctx, unsigned char *ctr, + unsigned char *b, const unsigned char *a) + __attribute__ ((__noinline__)); +static void do_aesni_ctr_4 (const RIJNDAEL_context *ctx, unsigned char *ctr, + unsigned char *b, const unsigned char *a) + __attribute__ ((__noinline__)); +#endif /*USE_AESNI*/ + +static const char *selftest(void); + + + +/* Perform the key setup. */ +static gcry_err_code_t +do_setkey (RIJNDAEL_context *ctx, const byte *key, const unsigned keylen) +{ + static int initialized = 0; + static const char *selftest_failed=0; + int rounds; + int i,j, r, t, rconpointer = 0; + int KC; + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte k[MAXKC][4]; + } k; +#define k k.k + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte tk[MAXKC][4]; + } tk; +#define tk tk.tk + + /* The on-the-fly self tests are only run in non-fips mode. In fips + mode explicit self-tests are required. Actually the on-the-fly + self-tests are not fully thread-safe and it might happen that a + failed self-test won't get noticed in another thread. + + FIXME: We might want to have a central registry of succeeded + self-tests. */ + if (!fips_mode () && !initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("%s\n", selftest_failed ); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + ctx->decryption_prepared = 0; +#ifdef USE_PADLOCK + ctx->use_padlock = 0; +#endif +#ifdef USE_AESNI + ctx->use_aesni = 0; +#endif + + if( keylen == 128/8 ) + { + rounds = 10; + KC = 4; + + if (0) + ; +#ifdef USE_PADLOCK + else if ((_gcry_get_hw_features () & HWF_PADLOCK_AES)) + { + ctx->use_padlock = 1; + memcpy (ctx->padlockkey, key, keylen); + } +#endif +#ifdef USE_AESNI + else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) + { + ctx->use_aesni = 1; + } +#endif + } + else if ( keylen == 192/8 ) + { + rounds = 12; + KC = 6; + + if (0) + { + ; + } +#ifdef USE_AESNI + else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) + { + ctx->use_aesni = 1; + } +#endif + } + else if ( keylen == 256/8 ) + { + rounds = 14; + KC = 8; + + if (0) + { + ; + } +#ifdef USE_AESNI + else if ((_gcry_get_hw_features () & HWF_INTEL_AESNI)) + { + ctx->use_aesni = 1; + } +#endif + } + else + return GPG_ERR_INV_KEYLEN; + + ctx->rounds = rounds; + + /* NB: We don't yet support Padlock hardware key generation. */ + + if (0) + ; +#ifdef USE_AESNI_is_disabled_here + else if (ctx->use_aesni && ctx->rounds == 10) + { + /* Note: This code works for AES-128 but it is not much better + than using the standard key schedule. We disable it for + now and don't put any effort into implementing this for + AES-192 and AES-256. */ + asm volatile ("movl %[key], %%esi\n\t" + "movdqu (%%esi), %%xmm1\n\t" /* xmm1 := key */ + "movl %[ksch], %%esi\n\t" + "movdqa %%xmm1, (%%esi)\n\t" /* ksch[0] := xmm1 */ + "aeskeygenassist $0x01, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x10(%%esi)\n\t" /* ksch[1] := xmm1 */ + "aeskeygenassist $0x02, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x20(%%esi)\n\t" /* ksch[2] := xmm1 */ + "aeskeygenassist $0x04, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x30(%%esi)\n\t" /* ksch[3] := xmm1 */ + "aeskeygenassist $0x08, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x40(%%esi)\n\t" /* ksch[4] := xmm1 */ + "aeskeygenassist $0x10, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x50(%%esi)\n\t" /* ksch[5] := xmm1 */ + "aeskeygenassist $0x20, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x60(%%esi)\n\t" /* ksch[6] := xmm1 */ + "aeskeygenassist $0x40, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x70(%%esi)\n\t" /* ksch[7] := xmm1 */ + "aeskeygenassist $0x80, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x80(%%esi)\n\t" /* ksch[8] := xmm1 */ + "aeskeygenassist $0x1b, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0x90(%%esi)\n\t" /* ksch[9] := xmm1 */ + "aeskeygenassist $0x36, %%xmm1, %%xmm2\n\t" + "call .Lexpand128_%=\n\t" + "movdqa %%xmm1, 0xa0(%%esi)\n\t" /* ksch[10] := xmm1 */ + "jmp .Lleave%=\n" + + ".Lexpand128_%=:\n\t" + "pshufd $0xff, %%xmm2, %%xmm2\n\t" + "movdqa %%xmm1, %%xmm3\n\t" + "pslldq $4, %%xmm3\n\t" + "pxor %%xmm3, %%xmm1\n\t" + "pslldq $4, %%xmm3\n\t" + "pxor %%xmm3, %%xmm1\n\t" + "pslldq $4, %%xmm3\n\t" + "pxor %%xmm3, %%xmm2\n\t" + "pxor %%xmm2, %%xmm1\n\t" + "ret\n" + + ".Lleave%=:\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n" + : + : [key] "g" (key), [ksch] "g" (ctx->keyschenc) + : "%esi", "cc", "memory" ); + } +#endif /*USE_AESNI*/ + else + { +#define W (ctx->keyschenc) + for (i = 0; i < keylen; i++) + { + k[i >> 2][i & 3] = key[i]; + } + + for (j = KC-1; j >= 0; j--) + { + *((u32_a_t*)tk[j]) = *((u32_a_t*)k[j]); + } + r = 0; + t = 0; + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + + while (r < rounds + 1) + { + /* While not enough round key material calculated calculate + new values. */ + tk[0][0] ^= S[tk[KC-1][1]]; + tk[0][1] ^= S[tk[KC-1][2]]; + tk[0][2] ^= S[tk[KC-1][3]]; + tk[0][3] ^= S[tk[KC-1][0]]; + tk[0][0] ^= rcon[rconpointer++]; + + if (KC != 8) + { + for (j = 1; j < KC; j++) + { + *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]); + } + } + else + { + for (j = 1; j < KC/2; j++) + { + *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]); + } + tk[KC/2][0] ^= S[tk[KC/2 - 1][0]]; + tk[KC/2][1] ^= S[tk[KC/2 - 1][1]]; + tk[KC/2][2] ^= S[tk[KC/2 - 1][2]]; + tk[KC/2][3] ^= S[tk[KC/2 - 1][3]]; + for (j = KC/2 + 1; j < KC; j++) + { + *((u32_a_t*)tk[j]) ^= *((u32_a_t*)tk[j-1]); + } + } + + /* Copy values into round key array. */ + for (j = 0; (j < KC) && (r < rounds + 1); ) + { + for (; (j < KC) && (t < 4); j++, t++) + { + *((u32_a_t*)W[r][t]) = *((u32_a_t*)tk[j]); + } + if (t == 4) + { + r++; + t = 0; + } + } + } +#undef W + } + + return 0; +#undef tk +#undef k +} + + +static gcry_err_code_t +rijndael_setkey (void *context, const byte *key, const unsigned keylen) +{ + RIJNDAEL_context *ctx = context; + + int rc = do_setkey (ctx, key, keylen); + _gcry_burn_stack ( 100 + 16*sizeof(int)); + return rc; +} + + +/* Make a decryption key from an encryption key. */ +static void +prepare_decryption( RIJNDAEL_context *ctx ) +{ + int r; + +#ifdef USE_AESNI + if (ctx->use_aesni) + { + /* The AES-NI decrypt instructions use the Equivalent Inverse + Cipher, thus we can't use the the standard decrypt key + preparation. */ + m128i_t *ekey = (m128i_t*)ctx->keyschenc; + m128i_t *dkey = (m128i_t*)ctx->keyschdec; + int rr; + + dkey[0] = ekey[ctx->rounds]; + for (r=1, rr=ctx->rounds-1; r < ctx->rounds; r++, rr--) + { + asm volatile + ("movdqu %[ekey], %%xmm1\n\t" + /*"aesimc %%xmm1, %%xmm1\n\t"*/ + ".byte 0x66, 0x0f, 0x38, 0xdb, 0xc9\n\t" + "movdqu %%xmm1, %[dkey]" + : [dkey] "=m" (dkey[r]) + : [ekey] "m" (ekey[rr]) ); + } + dkey[r] = ekey[0]; + } + else +#endif /*USE_AESNI*/ + { + union + { + PROPERLY_ALIGNED_TYPE dummy; + byte *w; + } w; +#define w w.w + + for (r=0; r < MAXROUNDS+1; r++ ) + { + *((u32_a_t*)ctx->keyschdec[r][0]) = *((u32_a_t*)ctx->keyschenc[r][0]); + *((u32_a_t*)ctx->keyschdec[r][1]) = *((u32_a_t*)ctx->keyschenc[r][1]); + *((u32_a_t*)ctx->keyschdec[r][2]) = *((u32_a_t*)ctx->keyschenc[r][2]); + *((u32_a_t*)ctx->keyschdec[r][3]) = *((u32_a_t*)ctx->keyschenc[r][3]); + } +#define W (ctx->keyschdec) + for (r = 1; r < ctx->rounds; r++) + { + w = W[r][0]; + *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]]) + ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]); + + w = W[r][1]; + *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]]) + ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]); + + w = W[r][2]; + *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]]) + ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]); + + w = W[r][3]; + *((u32_a_t*)w) = *((u32_a_t*)U1[w[0]]) ^ *((u32_a_t*)U2[w[1]]) + ^ *((u32_a_t*)U3[w[2]]) ^ *((u32_a_t*)U4[w[3]]); + } +#undef W +#undef w + } +} + + +/* Encrypt one block. A and B need to be aligned on a 4 byte + boundary. A and B may be the same. */ +static void +do_encrypt_aligned (const RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) +{ +#define rk (ctx->keyschenc) + int rounds = ctx->rounds; + int r; + union + { + u32 tempu32[4]; /* Force correct alignment. */ + byte temp[4][4]; + } u; + + *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[0][0]); + *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[0][1]); + *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[0][2]); + *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[0][3]); + *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]]) + ^ *((u32_a_t*)T2[u.temp[1][1]]) + ^ *((u32_a_t*)T3[u.temp[2][2]]) + ^ *((u32_a_t*)T4[u.temp[3][3]])); + *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]]) + ^ *((u32_a_t*)T2[u.temp[2][1]]) + ^ *((u32_a_t*)T3[u.temp[3][2]]) + ^ *((u32_a_t*)T4[u.temp[0][3]])); + *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]]) + ^ *((u32_a_t*)T2[u.temp[3][1]]) + ^ *((u32_a_t*)T3[u.temp[0][2]]) + ^ *((u32_a_t*)T4[u.temp[1][3]])); + *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]]) + ^ *((u32_a_t*)T2[u.temp[0][1]]) + ^ *((u32_a_t*)T3[u.temp[1][2]]) + ^ *((u32_a_t*)T4[u.temp[2][3]])); + + for (r = 1; r < rounds-1; r++) + { + *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]); + *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]); + *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]); + *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]); + + *((u32_a_t*)(b )) = (*((u32_a_t*)T1[u.temp[0][0]]) + ^ *((u32_a_t*)T2[u.temp[1][1]]) + ^ *((u32_a_t*)T3[u.temp[2][2]]) + ^ *((u32_a_t*)T4[u.temp[3][3]])); + *((u32_a_t*)(b + 4)) = (*((u32_a_t*)T1[u.temp[1][0]]) + ^ *((u32_a_t*)T2[u.temp[2][1]]) + ^ *((u32_a_t*)T3[u.temp[3][2]]) + ^ *((u32_a_t*)T4[u.temp[0][3]])); + *((u32_a_t*)(b + 8)) = (*((u32_a_t*)T1[u.temp[2][0]]) + ^ *((u32_a_t*)T2[u.temp[3][1]]) + ^ *((u32_a_t*)T3[u.temp[0][2]]) + ^ *((u32_a_t*)T4[u.temp[1][3]])); + *((u32_a_t*)(b +12)) = (*((u32_a_t*)T1[u.temp[3][0]]) + ^ *((u32_a_t*)T2[u.temp[0][1]]) + ^ *((u32_a_t*)T3[u.temp[1][2]]) + ^ *((u32_a_t*)T4[u.temp[2][3]])); + } + + /* Last round is special. */ + *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[rounds-1][0]); + *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[rounds-1][1]); + *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[rounds-1][2]); + *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[rounds-1][3]); + b[ 0] = T1[u.temp[0][0]][1]; + b[ 1] = T1[u.temp[1][1]][1]; + b[ 2] = T1[u.temp[2][2]][1]; + b[ 3] = T1[u.temp[3][3]][1]; + b[ 4] = T1[u.temp[1][0]][1]; + b[ 5] = T1[u.temp[2][1]][1]; + b[ 6] = T1[u.temp[3][2]][1]; + b[ 7] = T1[u.temp[0][3]][1]; + b[ 8] = T1[u.temp[2][0]][1]; + b[ 9] = T1[u.temp[3][1]][1]; + b[10] = T1[u.temp[0][2]][1]; + b[11] = T1[u.temp[1][3]][1]; + b[12] = T1[u.temp[3][0]][1]; + b[13] = T1[u.temp[0][1]][1]; + b[14] = T1[u.temp[1][2]][1]; + b[15] = T1[u.temp[2][3]][1]; + *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[rounds][0]); + *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[rounds][1]); + *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[rounds][2]); + *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[rounds][3]); +#undef rk +} + + +static void +do_encrypt (const RIJNDAEL_context *ctx, + unsigned char *bx, const unsigned char *ax) +{ + /* BX and AX are not necessary correctly aligned. Thus we might + need to copy them here. We try to align to a 16 bytes. */ + if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) + { + union + { + u32 dummy[4]; + byte a[16] ATTR_ALIGNED_16; + } a; + union + { + u32 dummy[4]; + byte b[16] ATTR_ALIGNED_16; + } b; + + memcpy (a.a, ax, 16); + do_encrypt_aligned (ctx, b.b, a.a); + memcpy (bx, b.b, 16); + } + else + { + do_encrypt_aligned (ctx, bx, ax); + } +} + + +/* Encrypt or decrypt one block using the padlock engine. A and B may + be the same. */ +#ifdef USE_PADLOCK +static void +do_padlock (const RIJNDAEL_context *ctx, int decrypt_flag, + unsigned char *bx, const unsigned char *ax) +{ + /* BX and AX are not necessary correctly aligned. Thus we need to + copy them here. */ + unsigned char a[16] __attribute__ ((aligned (16))); + unsigned char b[16] __attribute__ ((aligned (16))); + unsigned int cword[4] __attribute__ ((aligned (16))); + + /* The control word fields are: + 127:12 11:10 9 8 7 6 5 4 3:0 + RESERVED KSIZE CRYPT INTER KEYGN CIPHR ALIGN DGEST ROUND */ + cword[0] = (ctx->rounds & 15); /* (The mask is just a safeguard.) */ + cword[1] = 0; + cword[2] = 0; + cword[3] = 0; + if (decrypt_flag) + cword[0] |= 0x00000200; + + memcpy (a, ax, 16); + + asm volatile + ("pushfl\n\t" /* Force key reload. */ + "popfl\n\t" + "xchg %3, %%ebx\n\t" /* Load key. */ + "movl $1, %%ecx\n\t" /* Init counter for just one block. */ + ".byte 0xf3, 0x0f, 0xa7, 0xc8\n\t" /* REP XSTORE ECB. */ + "xchg %3, %%ebx\n" /* Restore GOT register. */ + : /* No output */ + : "S" (a), "D" (b), "d" (cword), "r" (ctx->padlockkey) + : "%ecx", "cc", "memory" + ); + + memcpy (bx, b, 16); + +} +#endif /*USE_PADLOCK*/ + + +#ifdef USE_AESNI +/* Encrypt one block using the Intel AES-NI instructions. A and B may + be the same; they need to be properly aligned to 16 bytes. + + Our problem here is that gcc does not allow the "x" constraint for + SSE registers in asm unless you compile with -msse. The common + wisdom is to use a separate file for SSE instructions and build it + separately. This would require a lot of extra build system stuff, + similar to what we do in mpi/ for the asm stuff. What we do + instead is to use standard registers and a bit more of plain asm + which copies the data and key stuff to the SSE registers and later + back. If we decide to implement some block modes with parallelized + AES instructions, it might indeed be better to use plain asm ala + mpi/. */ +static void +do_aesni_enc_aligned (const RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" + /* Note: For now we relax the alignment requirement for A and B: It + does not make much difference because in many case we would need + to memcpy them to an extra buffer; using the movdqu is much faster + that memcpy and movdqa. For CFB we know that the IV is properly + aligned but that is a special case. We should better implement + CFB direct in asm. */ + asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */ + "movl %[key], %%esi\n\t" /* esi := keyschenc */ + "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x20(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x30(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x40(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x50(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x60(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x70(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x80(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x90(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xa0(%%esi), %%xmm1\n\t" + "cmp $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xb0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xc0(%%esi), %%xmm1\n\t" + "cmp $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xd0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xe0(%%esi), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + "movdqu %%xmm0, %[dst]\n" + : [dst] "=m" (*b) + : [src] "m" (*a), + [key] "r" (ctx->keyschenc), + [rounds] "r" (ctx->rounds) + : "%esi", "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenclast_xmm1_xmm0 +} + + +static void +do_aesni_dec_aligned (const RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) +{ +#define aesdec_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xde, 0xc1\n\t" +#define aesdeclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdf, 0xc1\n\t" + asm volatile ("movdqu %[src], %%xmm0\n\t" /* xmm0 := *a */ + "movl %[key], %%esi\n\t" + "movdqa (%%esi), %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x20(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x30(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x40(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x50(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x60(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x70(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x80(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0x90(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xa0(%%esi), %%xmm1\n\t" + "cmp $10, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xb0(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xc0(%%esi), %%xmm1\n\t" + "cmp $12, %[rounds]\n\t" + "jz .Ldeclast%=\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xd0(%%esi), %%xmm1\n\t" + aesdec_xmm1_xmm0 + "movdqa 0xe0(%%esi), %%xmm1\n" + + ".Ldeclast%=:\n\t" + aesdeclast_xmm1_xmm0 + "movdqu %%xmm0, %[dst]\n" + : [dst] "=m" (*b) + : [src] "m" (*a), + [key] "r" (ctx->keyschdec), + [rounds] "r" (ctx->rounds) + : "%esi", "cc", "memory"); +#undef aesdec_xmm1_xmm0 +#undef aesdeclast_xmm1_xmm0 +} + + +/* Perform a CFB encryption or decryption round using the + initialization vector IV and the input block A. Write the result + to the output block B and update IV. IV needs to be 16 byte + aligned. */ +static void +do_aesni_cfb (const RIJNDAEL_context *ctx, int decrypt_flag, + unsigned char *iv, unsigned char *b, const unsigned char *a) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" + asm volatile ("movdqa %[iv], %%xmm0\n\t" /* xmm0 := IV */ + "movl %[key], %%esi\n\t" /* esi := keyschenc */ + "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x20(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x30(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x40(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x50(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x60(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x70(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x80(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x90(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xa0(%%esi), %%xmm1\n\t" + "cmp $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xb0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xc0(%%esi), %%xmm1\n\t" + "cmp $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xd0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xe0(%%esi), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + "movdqu %[src], %%xmm1\n\t" /* Save input. */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 = input ^ IV */ + + "cmp $1, %[decrypt]\n\t" + "jz .Ldecrypt_%=\n\t" + "movdqa %%xmm0, %[iv]\n\t" /* [encrypt] Store IV. */ + "jmp .Lleave_%=\n" + ".Ldecrypt_%=:\n\t" + "movdqa %%xmm1, %[iv]\n" /* [decrypt] Store IV. */ + ".Lleave_%=:\n\t" + "movdqu %%xmm0, %[dst]\n" /* Store output. */ + : [iv] "+m" (*iv), [dst] "=m" (*b) + : [src] "m" (*a), + [key] "g" (ctx->keyschenc), + [rounds] "g" (ctx->rounds), + [decrypt] "m" (decrypt_flag) + : "%esi", "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenclast_xmm1_xmm0 +} + +/* Perform a CTR encryption round using the counter CTR and the input + block A. Write the result to the output block B and update CTR. + CTR needs to be a 16 byte aligned little-endian value. */ +static void +do_aesni_ctr (const RIJNDAEL_context *ctx, + unsigned char *ctr, unsigned char *b, const unsigned char *a) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" + static unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ + "movaps %%xmm0, %%xmm2\n\t" + "mov $1, %%esi\n\t" /* xmm2++ (big-endian) */ + "movd %%esi, %%xmm1\n\t" + "pshufb %[mask], %%xmm2\n\t" + "paddq %%xmm1, %%xmm2\n\t" + "pshufb %[mask], %%xmm2\n\t" + "movdqa %%xmm2, %[ctr]\n" /* Update CTR. */ + + "movl %[key], %%esi\n\t" /* esi := keyschenc */ + "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "movdqa 0x10(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x20(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x30(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x40(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x50(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x60(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x70(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x80(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0x90(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xa0(%%esi), %%xmm1\n\t" + "cmp $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xb0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xc0(%%esi), %%xmm1\n\t" + "cmp $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xd0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + "movdqa 0xe0(%%esi), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + "movdqu %[src], %%xmm1\n\t" /* xmm1 := input */ + "pxor %%xmm1, %%xmm0\n\t" /* EncCTR ^= input */ + "movdqu %%xmm0, %[dst]" /* Store EncCTR. */ + + : [ctr] "+m" (*ctr), [dst] "=m" (*b) + : [src] "m" (*a), + [key] "g" (ctx->keyschenc), + [rounds] "g" (ctx->rounds), + [mask] "m" (*be_mask) + : "%esi", "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenclast_xmm1_xmm0 +} + + +/* Four blocks at a time variant of do_aesni_ctr. */ +static void +do_aesni_ctr_4 (const RIJNDAEL_context *ctx, + unsigned char *ctr, unsigned char *b, const unsigned char *a) +{ +#define aesenc_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xc1\n\t" +#define aesenc_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd1\n\t" +#define aesenc_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xd9\n\t" +#define aesenc_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdc, 0xe1\n\t" +#define aesenclast_xmm1_xmm0 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xc1\n\t" +#define aesenclast_xmm1_xmm2 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd1\n\t" +#define aesenclast_xmm1_xmm3 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xd9\n\t" +#define aesenclast_xmm1_xmm4 ".byte 0x66, 0x0f, 0x38, 0xdd, 0xe1\n\t" + + static unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + /* Register usage: + esi keyschedule + xmm0 CTR-0 + xmm1 temp / round key + xmm2 CTR-1 + xmm3 CTR-2 + xmm4 CTR-3 + xmm5 temp + */ + + asm volatile ("movdqa %[ctr], %%xmm0\n\t" /* xmm0, xmm2 := CTR */ + "movaps %%xmm0, %%xmm2\n\t" + "mov $1, %%esi\n\t" /* xmm1 := 1 */ + "movd %%esi, %%xmm1\n\t" + "pshufb %[mask], %%xmm2\n\t" /* xmm2 := le(xmm2) */ + "paddq %%xmm1, %%xmm2\n\t" /* xmm2++ */ + "movaps %%xmm2, %%xmm3\n\t" /* xmm3 := xmm2 */ + "paddq %%xmm1, %%xmm3\n\t" /* xmm3++ */ + "movaps %%xmm3, %%xmm4\n\t" /* xmm4 := xmm3 */ + "paddq %%xmm1, %%xmm4\n\t" /* xmm4++ */ + "movaps %%xmm4, %%xmm5\n\t" /* xmm5 := xmm4 */ + "paddq %%xmm1, %%xmm5\n\t" /* xmm5++ */ + "pshufb %[mask], %%xmm2\n\t" /* xmm2 := be(xmm2) */ + "pshufb %[mask], %%xmm3\n\t" /* xmm3 := be(xmm3) */ + "pshufb %[mask], %%xmm4\n\t" /* xmm4 := be(xmm4) */ + "pshufb %[mask], %%xmm5\n\t" /* xmm5 := be(xmm5) */ + "movdqa %%xmm5, %[ctr]\n" /* Update CTR. */ + + "movl %[key], %%esi\n\t" /* esi := keyschenc */ + "movdqa (%%esi), %%xmm1\n\t" /* xmm1 := key[0] */ + "pxor %%xmm1, %%xmm0\n\t" /* xmm0 ^= key[0] */ + "pxor %%xmm1, %%xmm2\n\t" /* xmm2 ^= key[0] */ + "pxor %%xmm1, %%xmm3\n\t" /* xmm3 ^= key[0] */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 ^= key[0] */ + "movdqa 0x10(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x20(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x30(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x40(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x50(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x60(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x70(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x80(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0x90(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xa0(%%esi), %%xmm1\n\t" + "cmp $10, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xb0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xc0(%%esi), %%xmm1\n\t" + "cmp $12, %[rounds]\n\t" + "jz .Lenclast%=\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xd0(%%esi), %%xmm1\n\t" + aesenc_xmm1_xmm0 + aesenc_xmm1_xmm2 + aesenc_xmm1_xmm3 + aesenc_xmm1_xmm4 + "movdqa 0xe0(%%esi), %%xmm1\n" + + ".Lenclast%=:\n\t" + aesenclast_xmm1_xmm0 + aesenclast_xmm1_xmm2 + aesenclast_xmm1_xmm3 + aesenclast_xmm1_xmm4 + + "movdqu %[src], %%xmm1\n\t" /* Get block 1. */ + "pxor %%xmm1, %%xmm0\n\t" /* EncCTR-1 ^= input */ + "movdqu %%xmm0, %[dst]\n\t" /* Store block 1 */ + + "movdqu (16)%[src], %%xmm1\n\t" /* Get block 2. */ + "pxor %%xmm1, %%xmm2\n\t" /* EncCTR-2 ^= input */ + "movdqu %%xmm2, (16)%[dst]\n\t" /* Store block 2. */ + + "movdqu (32)%[src], %%xmm1\n\t" /* Get block 3. */ + "pxor %%xmm1, %%xmm3\n\t" /* EncCTR-3 ^= input */ + "movdqu %%xmm3, (32)%[dst]\n\t" /* Store block 3. */ + + "movdqu (48)%[src], %%xmm1\n\t" /* Get block 4. */ + "pxor %%xmm1, %%xmm4\n\t" /* EncCTR-4 ^= input */ + "movdqu %%xmm4, (48)%[dst]" /* Store block 4. */ + + : [ctr] "+m" (*ctr), [dst] "=m" (*b) + : [src] "m" (*a), + [key] "g" (ctx->keyschenc), + [rounds] "g" (ctx->rounds), + [mask] "m" (*be_mask) + : "%esi", "cc", "memory"); +#undef aesenc_xmm1_xmm0 +#undef aesenc_xmm1_xmm2 +#undef aesenc_xmm1_xmm3 +#undef aesenc_xmm1_xmm4 +#undef aesenclast_xmm1_xmm0 +#undef aesenclast_xmm1_xmm2 +#undef aesenclast_xmm1_xmm3 +#undef aesenclast_xmm1_xmm4 +} + + +static void +do_aesni (RIJNDAEL_context *ctx, int decrypt_flag, + unsigned char *bx, const unsigned char *ax) +{ + + if (decrypt_flag) + { + if (!ctx->decryption_prepared ) + { + prepare_decryption ( ctx ); + ctx->decryption_prepared = 1; + } + do_aesni_dec_aligned (ctx, bx, ax); + } + else + do_aesni_enc_aligned (ctx, bx, ax); +} +#endif /*USE_AESNI*/ + + +static void +rijndael_encrypt (void *context, byte *b, const byte *a) +{ + RIJNDAEL_context *ctx = context; + + if (0) + ; +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + { + do_padlock (ctx, 0, b, a); + _gcry_burn_stack (48 + 15 /* possible padding for alignment */); + } +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + aesni_prepare (); + do_aesni (ctx, 0, b, a); + aesni_cleanup (); + } +#endif /*USE_AESNI*/ + else + { + do_encrypt (ctx, b, a); + _gcry_burn_stack (56 + 2*sizeof(int)); + } +} + + +/* Bulk encryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cfb_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + + if (0) + ; +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + { + /* Fixme: Let Padlock do the CFBing. */ + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the IV. */ + do_padlock (ctx, 0, iv, iv); + /* XOR the input with the IV and store input into IV. */ + for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + } + } +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + aesni_prepare (); + for ( ;nblocks; nblocks-- ) + { + do_aesni_cfb (ctx, 0, iv, outbuf, inbuf); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + aesni_cleanup (); + } +#endif /*USE_AESNI*/ + else + { + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the IV. */ + do_encrypt_aligned (ctx, iv, iv); + /* XOR the input with the IV and store input into IV. */ + for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) + *outbuf++ = (*ivp++ ^= *inbuf++); + } + } + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + +/* Bulk encryption of complete blocks in CBC mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cbc_enc (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks, int cbc_mac) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + +#ifdef USE_AESNI + if (ctx->use_aesni) + aesni_prepare (); +#endif /*USE_AESNI*/ + + for ( ;nblocks; nblocks-- ) + { + for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) + outbuf[i] = inbuf[i] ^ *ivp++; + + if (0) + ; +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + do_padlock (ctx, 0, outbuf, outbuf); +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + else if (ctx->use_aesni) + do_aesni (ctx, 0, outbuf, outbuf); +#endif /*USE_AESNI*/ + else + do_encrypt (ctx, outbuf, outbuf ); + + memcpy (iv, outbuf, BLOCKSIZE); + inbuf += BLOCKSIZE; + if (!cbc_mac) + outbuf += BLOCKSIZE; + } + +#ifdef USE_AESNI + if (ctx->use_aesni) + aesni_cleanup (); +#endif /*USE_AESNI*/ + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + +/* Bulk encryption of complete blocks in CTR mode. Caller needs to + make sure that CTR is aligned on a 16 byte boundary if AESNI; the + minimum alignment is for an u32. This function is only intended + for the bulk encryption feature of cipher.c. CTR is expected to be + of size BLOCKSIZE. */ +void +_gcry_aes_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *p; + int i; + + if (0) + ; +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + aesni_prepare (); + for ( ;nblocks > 3 ; nblocks -= 4 ) + { + do_aesni_ctr_4 (ctx, ctr, outbuf, inbuf); + outbuf += 4*BLOCKSIZE; + inbuf += 4*BLOCKSIZE; + } + for ( ;nblocks; nblocks-- ) + { + do_aesni_ctr (ctx, ctr, outbuf, inbuf); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + aesni_cleanup (); + aesni_cleanup_2_4 (); + } +#endif /*USE_AESNI*/ + else + { + union { unsigned char x1[16]; u32 x32[4]; } tmp; + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + do_encrypt_aligned (ctx, tmp.x1, ctr); + /* XOR the input with the encrypted counter and store in output. */ + for (p=tmp.x1, i=0; i < BLOCKSIZE; i++) + *outbuf++ = (*p++ ^= *inbuf++); + /* Increment the counter. */ + for (i = BLOCKSIZE; i > 0; i--) + { + ctr[i-1]++; + if (ctr[i-1]) + break; + } + } + } + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + + +/* Decrypt one block. A and B need to be aligned on a 4 byte boundary + and the decryption must have been prepared. A and B may be the + same. */ +static void +do_decrypt_aligned (RIJNDAEL_context *ctx, + unsigned char *b, const unsigned char *a) +{ +#define rk (ctx->keyschdec) + int rounds = ctx->rounds; + int r; + union + { + u32 tempu32[4]; /* Force correct alignment. */ + byte temp[4][4]; + } u; + + + *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(a )) ^ *((u32_a_t*)rk[rounds][0]); + *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(a+ 4)) ^ *((u32_a_t*)rk[rounds][1]); + *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(a+ 8)) ^ *((u32_a_t*)rk[rounds][2]); + *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(a+12)) ^ *((u32_a_t*)rk[rounds][3]); + + *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]]) + ^ *((u32_a_t*)T6[u.temp[3][1]]) + ^ *((u32_a_t*)T7[u.temp[2][2]]) + ^ *((u32_a_t*)T8[u.temp[1][3]])); + *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]]) + ^ *((u32_a_t*)T6[u.temp[0][1]]) + ^ *((u32_a_t*)T7[u.temp[3][2]]) + ^ *((u32_a_t*)T8[u.temp[2][3]])); + *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]]) + ^ *((u32_a_t*)T6[u.temp[1][1]]) + ^ *((u32_a_t*)T7[u.temp[0][2]]) + ^ *((u32_a_t*)T8[u.temp[3][3]])); + *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]]) + ^ *((u32_a_t*)T6[u.temp[2][1]]) + ^ *((u32_a_t*)T7[u.temp[1][2]]) + ^ *((u32_a_t*)T8[u.temp[0][3]])); + + for (r = rounds-1; r > 1; r--) + { + *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[r][0]); + *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[r][1]); + *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[r][2]); + *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[r][3]); + *((u32_a_t*)(b )) = (*((u32_a_t*)T5[u.temp[0][0]]) + ^ *((u32_a_t*)T6[u.temp[3][1]]) + ^ *((u32_a_t*)T7[u.temp[2][2]]) + ^ *((u32_a_t*)T8[u.temp[1][3]])); + *((u32_a_t*)(b+ 4)) = (*((u32_a_t*)T5[u.temp[1][0]]) + ^ *((u32_a_t*)T6[u.temp[0][1]]) + ^ *((u32_a_t*)T7[u.temp[3][2]]) + ^ *((u32_a_t*)T8[u.temp[2][3]])); + *((u32_a_t*)(b+ 8)) = (*((u32_a_t*)T5[u.temp[2][0]]) + ^ *((u32_a_t*)T6[u.temp[1][1]]) + ^ *((u32_a_t*)T7[u.temp[0][2]]) + ^ *((u32_a_t*)T8[u.temp[3][3]])); + *((u32_a_t*)(b+12)) = (*((u32_a_t*)T5[u.temp[3][0]]) + ^ *((u32_a_t*)T6[u.temp[2][1]]) + ^ *((u32_a_t*)T7[u.temp[1][2]]) + ^ *((u32_a_t*)T8[u.temp[0][3]])); + } + + /* Last round is special. */ + *((u32_a_t*)u.temp[0]) = *((u32_a_t*)(b )) ^ *((u32_a_t*)rk[1][0]); + *((u32_a_t*)u.temp[1]) = *((u32_a_t*)(b+ 4)) ^ *((u32_a_t*)rk[1][1]); + *((u32_a_t*)u.temp[2]) = *((u32_a_t*)(b+ 8)) ^ *((u32_a_t*)rk[1][2]); + *((u32_a_t*)u.temp[3]) = *((u32_a_t*)(b+12)) ^ *((u32_a_t*)rk[1][3]); + b[ 0] = S5[u.temp[0][0]]; + b[ 1] = S5[u.temp[3][1]]; + b[ 2] = S5[u.temp[2][2]]; + b[ 3] = S5[u.temp[1][3]]; + b[ 4] = S5[u.temp[1][0]]; + b[ 5] = S5[u.temp[0][1]]; + b[ 6] = S5[u.temp[3][2]]; + b[ 7] = S5[u.temp[2][3]]; + b[ 8] = S5[u.temp[2][0]]; + b[ 9] = S5[u.temp[1][1]]; + b[10] = S5[u.temp[0][2]]; + b[11] = S5[u.temp[3][3]]; + b[12] = S5[u.temp[3][0]]; + b[13] = S5[u.temp[2][1]]; + b[14] = S5[u.temp[1][2]]; + b[15] = S5[u.temp[0][3]]; + *((u32_a_t*)(b )) ^= *((u32_a_t*)rk[0][0]); + *((u32_a_t*)(b+ 4)) ^= *((u32_a_t*)rk[0][1]); + *((u32_a_t*)(b+ 8)) ^= *((u32_a_t*)rk[0][2]); + *((u32_a_t*)(b+12)) ^= *((u32_a_t*)rk[0][3]); +#undef rk +} + + +/* Decrypt one block. AX and BX may be the same. */ +static void +do_decrypt (RIJNDAEL_context *ctx, byte *bx, const byte *ax) +{ + if ( !ctx->decryption_prepared ) + { + prepare_decryption ( ctx ); + _gcry_burn_stack (64); + ctx->decryption_prepared = 1; + } + + /* BX and AX are not necessary correctly aligned. Thus we might + need to copy them here. We try to align to a 16 bytes. */ + if (((size_t)ax & 0x0f) || ((size_t)bx & 0x0f)) + { + union + { + u32 dummy[4]; + byte a[16] ATTR_ALIGNED_16; + } a; + union + { + u32 dummy[4]; + byte b[16] ATTR_ALIGNED_16; + } b; + + memcpy (a.a, ax, 16); + do_decrypt_aligned (ctx, b.b, a.a); + memcpy (bx, b.b, 16); + } + else + { + do_decrypt_aligned (ctx, bx, ax); + } +} + + + + +static void +rijndael_decrypt (void *context, byte *b, const byte *a) +{ + RIJNDAEL_context *ctx = context; + + if (0) + ; +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + { + do_padlock (ctx, 1, b, a); + _gcry_burn_stack (48 + 2*sizeof(int) /* FIXME */); + } +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + aesni_prepare (); + do_aesni (ctx, 1, b, a); + aesni_cleanup (); + } +#endif /*USE_AESNI*/ + else + { + do_decrypt (ctx, b, a); + _gcry_burn_stack (56+2*sizeof(int)); + } +} + + +/* Bulk decryption of complete blocks in CFB mode. Caller needs to + make sure that IV is aligned on an unisgned lonhg boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + unsigned char temp; + int i; + + if (0) + ; +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + { + /* Fixme: Let Padlock do the CFBing. */ + for ( ;nblocks; nblocks-- ) + { + do_padlock (ctx, 0, iv, iv); + for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + } + } +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + else if (ctx->use_aesni) + { + aesni_prepare (); + for ( ;nblocks; nblocks-- ) + { + do_aesni_cfb (ctx, 1, iv, outbuf, inbuf); + outbuf += BLOCKSIZE; + inbuf += BLOCKSIZE; + } + aesni_cleanup (); + } +#endif /*USE_AESNI*/ + else + { + for ( ;nblocks; nblocks-- ) + { + do_encrypt_aligned (ctx, iv, iv); + for (ivp=iv,i=0; i < BLOCKSIZE; i++ ) + { + temp = *inbuf++; + *outbuf++ = *ivp ^ temp; + *ivp++ = temp; + } + } + } + + _gcry_burn_stack (48 + 2*sizeof(int)); +} + + +/* Bulk decryption of complete blocks in CBC mode. Caller needs to + make sure that IV is aligned on an unsigned long boundary. This + function is only intended for the bulk encryption feature of + cipher.c. */ +void +_gcry_aes_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + unsigned int nblocks) +{ + RIJNDAEL_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char *ivp; + int i; + unsigned char savebuf[BLOCKSIZE]; + +#ifdef USE_AESNI + if (ctx->use_aesni) + aesni_prepare (); +#endif /*USE_AESNI*/ + + for ( ;nblocks; nblocks-- ) + { + /* We need to save INBUF away because it may be identical to + OUTBUF. */ + memcpy (savebuf, inbuf, BLOCKSIZE); + + if (0) + ; +#ifdef USE_PADLOCK + else if (ctx->use_padlock) + do_padlock (ctx, 1, outbuf, inbuf); +#endif /*USE_PADLOCK*/ +#ifdef USE_AESNI + else if (ctx->use_aesni) + do_aesni (ctx, 1, outbuf, inbuf); +#endif /*USE_AESNI*/ + else + do_decrypt (ctx, outbuf, inbuf); + + for (ivp=iv, i=0; i < BLOCKSIZE; i++ ) + outbuf[i] ^= *ivp++; + memcpy (iv, savebuf, BLOCKSIZE); + inbuf += BLOCKSIZE; + outbuf += BLOCKSIZE; + } + +#ifdef USE_AESNI + if (ctx->use_aesni) + aesni_cleanup (); +#endif /*USE_AESNI*/ + + _gcry_burn_stack (48 + 2*sizeof(int) + BLOCKSIZE + 4*sizeof (char*)); +} + + + + +/* Run the self-tests for AES 128. Returns NULL on success. */ +static const char* +selftest_basic_128 (void) +{ + RIJNDAEL_context ctx; + unsigned char scratch[16]; + + /* The test vectors are from the AES supplied ones; more or less + randomly taken from ecb_tbl.txt (I=42,81,14) */ +#if 1 + static const unsigned char plaintext_128[16] = + { + 0x01,0x4B,0xAF,0x22,0x78,0xA6,0x9D,0x33, + 0x1D,0x51,0x80,0x10,0x36,0x43,0xE9,0x9A + }; + static const unsigned char key_128[16] = + { + 0xE8,0xE9,0xEA,0xEB,0xED,0xEE,0xEF,0xF0, + 0xF2,0xF3,0xF4,0xF5,0xF7,0xF8,0xF9,0xFA + }; + static const unsigned char ciphertext_128[16] = + { + 0x67,0x43,0xC3,0xD1,0x51,0x9A,0xB4,0xF2, + 0xCD,0x9A,0x78,0xAB,0x09,0xA5,0x11,0xBD + }; +#else + /* Test vectors from fips-197, appendix C. */ +# warning debug test vectors in use + static const unsigned char plaintext_128[16] = + { + 0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77, + 0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff + }; + static const unsigned char key_128[16] = + { + 0x00,0x01,0x02,0x03,0x04,0x05,0x06,0x07, + 0x08,0x09,0x0a,0x0b,0x0c,0x0d,0x0e,0x0f + /* 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, */ + /* 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c */ + }; + static const unsigned char ciphertext_128[16] = + { + 0x69,0xc4,0xe0,0xd8,0x6a,0x7b,0x04,0x30, + 0xd8,0xcd,0xb7,0x80,0x70,0xb4,0xc5,0x5a + }; +#endif + + rijndael_setkey (&ctx, key_128, sizeof (key_128)); + rijndael_encrypt (&ctx, scratch, plaintext_128); + if (memcmp (scratch, ciphertext_128, sizeof (ciphertext_128))) + return "AES-128 test encryption failed."; + rijndael_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext_128, sizeof (plaintext_128))) + return "AES-128 test decryption failed."; + + return NULL; +} + +/* Run the self-tests for AES 192. Returns NULL on success. */ +static const char* +selftest_basic_192 (void) +{ + RIJNDAEL_context ctx; + unsigned char scratch[16]; + + static unsigned char plaintext_192[16] = + { + 0x76,0x77,0x74,0x75,0xF1,0xF2,0xF3,0xF4, + 0xF8,0xF9,0xE6,0xE7,0x77,0x70,0x71,0x72 + }; + static unsigned char key_192[24] = + { + 0x04,0x05,0x06,0x07,0x09,0x0A,0x0B,0x0C, + 0x0E,0x0F,0x10,0x11,0x13,0x14,0x15,0x16, + 0x18,0x19,0x1A,0x1B,0x1D,0x1E,0x1F,0x20 + }; + static const unsigned char ciphertext_192[16] = + { + 0x5D,0x1E,0xF2,0x0D,0xCE,0xD6,0xBC,0xBC, + 0x12,0x13,0x1A,0xC7,0xC5,0x47,0x88,0xAA + }; + + rijndael_setkey (&ctx, key_192, sizeof(key_192)); + rijndael_encrypt (&ctx, scratch, plaintext_192); + if (memcmp (scratch, ciphertext_192, sizeof (ciphertext_192))) + return "AES-192 test encryption failed."; + rijndael_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext_192, sizeof (plaintext_192))) + return "AES-192 test decryption failed."; + + return NULL; +} + + +/* Run the self-tests for AES 256. Returns NULL on success. */ +static const char* +selftest_basic_256 (void) +{ + RIJNDAEL_context ctx; + unsigned char scratch[16]; + + static unsigned char plaintext_256[16] = + { + 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 + }; + static unsigned char key_256[32] = + { + 0x08,0x09,0x0A,0x0B,0x0D,0x0E,0x0F,0x10, + 0x12,0x13,0x14,0x15,0x17,0x18,0x19,0x1A, + 0x1C,0x1D,0x1E,0x1F,0x21,0x22,0x23,0x24, + 0x26,0x27,0x28,0x29,0x2B,0x2C,0x2D,0x2E + }; + static const unsigned char ciphertext_256[16] = + { + 0x08,0x0E,0x95,0x17,0xEB,0x16,0x77,0x71, + 0x9A,0xCF,0x72,0x80,0x86,0x04,0x0A,0xE3 + }; + + rijndael_setkey (&ctx, key_256, sizeof(key_256)); + rijndael_encrypt (&ctx, scratch, plaintext_256); + if (memcmp (scratch, ciphertext_256, sizeof (ciphertext_256))) + return "AES-256 test encryption failed."; + rijndael_decrypt (&ctx, scratch, scratch); + if (memcmp (scratch, plaintext_256, sizeof (plaintext_256))) + return "AES-256 test decryption failed."; + + return NULL; +} + +/* Run all the self-tests and return NULL on success. This function + is used for the on-the-fly self-tests. */ +static const char * +selftest (void) +{ + const char *r; + + if ( (r = selftest_basic_128 ()) + || (r = selftest_basic_192 ()) + || (r = selftest_basic_256 ()) ) + return r; + + return r; +} + + +/* SP800-38a.pdf for AES-128. */ +static const char * +selftest_fips_128_38a (int requested_mode) +{ + struct tv + { + int mode; + const unsigned char key[16]; + const unsigned char iv[16]; + struct + { + const unsigned char input[16]; + const unsigned char output[16]; + } data[4]; + } tv[2] = + { + { + GCRY_CIPHER_MODE_CFB, /* F.3.13, CFB128-AES128 */ + { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + { + { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, + 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, + { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, + 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, + + { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, + 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, + { 0xc8, 0xa6, 0x45, 0x37, 0xa0, 0xb3, 0xa9, 0x3f, + 0xcd, 0xe3, 0xcd, 0xad, 0x9f, 0x1c, 0xe5, 0x8b } }, + + { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, + 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, + { 0x26, 0x75, 0x1f, 0x67, 0xa3, 0xcb, 0xb1, 0x40, + 0xb1, 0x80, 0x8c, 0xf1, 0x87, 0xa4, 0xf4, 0xdf } }, + + { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, + 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, + { 0xc0, 0x4b, 0x05, 0x35, 0x7c, 0x5d, 0x1c, 0x0e, + 0xea, 0xc4, 0xc6, 0x6f, 0x9f, 0xf7, 0xf2, 0xe6 } } + } + }, + { + GCRY_CIPHER_MODE_OFB, + { 0x2b, 0x7e, 0x15, 0x16, 0x28, 0xae, 0xd2, 0xa6, + 0xab, 0xf7, 0x15, 0x88, 0x09, 0xcf, 0x4f, 0x3c }, + { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f }, + { + { { 0x6b, 0xc1, 0xbe, 0xe2, 0x2e, 0x40, 0x9f, 0x96, + 0xe9, 0x3d, 0x7e, 0x11, 0x73, 0x93, 0x17, 0x2a }, + { 0x3b, 0x3f, 0xd9, 0x2e, 0xb7, 0x2d, 0xad, 0x20, + 0x33, 0x34, 0x49, 0xf8, 0xe8, 0x3c, 0xfb, 0x4a } }, + + { { 0xae, 0x2d, 0x8a, 0x57, 0x1e, 0x03, 0xac, 0x9c, + 0x9e, 0xb7, 0x6f, 0xac, 0x45, 0xaf, 0x8e, 0x51 }, + { 0x77, 0x89, 0x50, 0x8d, 0x16, 0x91, 0x8f, 0x03, + 0xf5, 0x3c, 0x52, 0xda, 0xc5, 0x4e, 0xd8, 0x25 } }, + + { { 0x30, 0xc8, 0x1c, 0x46, 0xa3, 0x5c, 0xe4, 0x11, + 0xe5, 0xfb, 0xc1, 0x19, 0x1a, 0x0a, 0x52, 0xef }, + { 0x97, 0x40, 0x05, 0x1e, 0x9c, 0x5f, 0xec, 0xf6, + 0x43, 0x44, 0xf7, 0xa8, 0x22, 0x60, 0xed, 0xcc } }, + + { { 0xf6, 0x9f, 0x24, 0x45, 0xdf, 0x4f, 0x9b, 0x17, + 0xad, 0x2b, 0x41, 0x7b, 0xe6, 0x6c, 0x37, 0x10 }, + { 0x30, 0x4c, 0x65, 0x28, 0xf6, 0x59, 0xc7, 0x78, + 0x66, 0xa5, 0x10, 0xd9, 0xc1, 0xd6, 0xae, 0x5e } }, + } + } + }; + unsigned char scratch[16]; + gpg_error_t err; + int tvi, idx; + gcry_cipher_hd_t hdenc = NULL; + gcry_cipher_hd_t hddec = NULL; + +#define Fail(a) do { \ + _gcry_cipher_close (hdenc); \ + _gcry_cipher_close (hddec); \ + return a; \ + } while (0) + + gcry_assert (sizeof tv[0].data[0].input == sizeof scratch); + gcry_assert (sizeof tv[0].data[0].output == sizeof scratch); + + for (tvi=0; tvi < DIM (tv); tvi++) + if (tv[tvi].mode == requested_mode) + break; + if (tvi == DIM (tv)) + Fail ("no test data for this mode"); + + err = _gcry_cipher_open (&hdenc, GCRY_CIPHER_AES, tv[tvi].mode, 0); + if (err) + Fail ("open"); + err = _gcry_cipher_open (&hddec, GCRY_CIPHER_AES, tv[tvi].mode, 0); + if (err) + Fail ("open"); + err = _gcry_cipher_setkey (hdenc, tv[tvi].key, sizeof tv[tvi].key); + if (!err) + err = _gcry_cipher_setkey (hddec, tv[tvi].key, sizeof tv[tvi].key); + if (err) + Fail ("set key"); + err = _gcry_cipher_setiv (hdenc, tv[tvi].iv, sizeof tv[tvi].iv); + if (!err) + err = _gcry_cipher_setiv (hddec, tv[tvi].iv, sizeof tv[tvi].iv); + if (err) + Fail ("set IV"); + for (idx=0; idx < DIM (tv[tvi].data); idx++) + { + err = _gcry_cipher_encrypt (hdenc, scratch, sizeof scratch, + tv[tvi].data[idx].input, + sizeof tv[tvi].data[idx].input); + if (err) + Fail ("encrypt command"); + if (memcmp (scratch, tv[tvi].data[idx].output, sizeof scratch)) + Fail ("encrypt mismatch"); + err = _gcry_cipher_decrypt (hddec, scratch, sizeof scratch, + tv[tvi].data[idx].output, + sizeof tv[tvi].data[idx].output); + if (err) + Fail ("decrypt command"); + if (memcmp (scratch, tv[tvi].data[idx].input, sizeof scratch)) + Fail ("decrypt mismatch"); + } + +#undef Fail + _gcry_cipher_close (hdenc); + _gcry_cipher_close (hddec); + return NULL; +} + + +/* Complete selftest for AES-128 with all modes and driver code. */ +static gpg_err_code_t +selftest_fips_128 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + what = "low-level"; + errtxt = selftest_basic_128 (); + if (errtxt) + goto failed; + + if (extended) + { + what = "cfb"; + errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_CFB); + if (errtxt) + goto failed; + + what = "ofb"; + errtxt = selftest_fips_128_38a (GCRY_CIPHER_MODE_OFB); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_AES128, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +/* Complete selftest for AES-192. */ +static gpg_err_code_t +selftest_fips_192 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + (void)extended; /* No extended tests available. */ + + what = "low-level"; + errtxt = selftest_basic_192 (); + if (errtxt) + goto failed; + + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_AES192, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Complete selftest for AES-256. */ +static gpg_err_code_t +selftest_fips_256 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + (void)extended; /* No extended tests available. */ + + what = "low-level"; + errtxt = selftest_basic_256 (); + if (errtxt) + goto failed; + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_AES256, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_CIPHER_AES128: + ec = selftest_fips_128 (extended, report); + break; + case GCRY_CIPHER_AES192: + ec = selftest_fips_192 (extended, report); + break; + case GCRY_CIPHER_AES256: + ec = selftest_fips_256 (extended, report); + break; + default: + ec = GPG_ERR_CIPHER_ALGO; + break; + + } + return ec; +} + + + + +static const char *rijndael_names[] = + { + "RIJNDAEL", + "AES128", + "AES-128", + NULL + }; + +static gcry_cipher_oid_spec_t rijndael_oids[] = + { + { "2.16.840.1.101.3.4.1.1", GCRY_CIPHER_MODE_ECB }, + { "2.16.840.1.101.3.4.1.2", GCRY_CIPHER_MODE_CBC }, + { "2.16.840.1.101.3.4.1.3", GCRY_CIPHER_MODE_OFB }, + { "2.16.840.1.101.3.4.1.4", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_aes = + { + "AES", rijndael_names, rijndael_oids, 16, 128, sizeof (RIJNDAEL_context), + rijndael_setkey, rijndael_encrypt, rijndael_decrypt + }; +cipher_extra_spec_t _gcry_cipher_extraspec_aes = + { + run_selftests + }; + +static const char *rijndael192_names[] = + { + "RIJNDAEL192", + "AES-192", + NULL + }; + +static gcry_cipher_oid_spec_t rijndael192_oids[] = + { + { "2.16.840.1.101.3.4.1.21", GCRY_CIPHER_MODE_ECB }, + { "2.16.840.1.101.3.4.1.22", GCRY_CIPHER_MODE_CBC }, + { "2.16.840.1.101.3.4.1.23", GCRY_CIPHER_MODE_OFB }, + { "2.16.840.1.101.3.4.1.24", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_aes192 = + { + "AES192", rijndael192_names, rijndael192_oids, 16, 192, sizeof (RIJNDAEL_context), + rijndael_setkey, rijndael_encrypt, rijndael_decrypt + }; +cipher_extra_spec_t _gcry_cipher_extraspec_aes192 = + { + run_selftests + }; + +static const char *rijndael256_names[] = + { + "RIJNDAEL256", + "AES-256", + NULL + }; + +static gcry_cipher_oid_spec_t rijndael256_oids[] = + { + { "2.16.840.1.101.3.4.1.41", GCRY_CIPHER_MODE_ECB }, + { "2.16.840.1.101.3.4.1.42", GCRY_CIPHER_MODE_CBC }, + { "2.16.840.1.101.3.4.1.43", GCRY_CIPHER_MODE_OFB }, + { "2.16.840.1.101.3.4.1.44", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_aes256 = + { + "AES256", rijndael256_names, rijndael256_oids, 16, 256, + sizeof (RIJNDAEL_context), + rijndael_setkey, rijndael_encrypt, rijndael_decrypt + }; + +cipher_extra_spec_t _gcry_cipher_extraspec_aes256 = + { + run_selftests + }; -- cgit v1.2.3