summaryrefslogtreecommitdiffstats
path: root/comm/third_party/libgcrypt/cipher/rijndael-s390x.c
diff options
context:
space:
mode:
Diffstat (limited to 'comm/third_party/libgcrypt/cipher/rijndael-s390x.c')
-rw-r--r--comm/third_party/libgcrypt/cipher/rijndael-s390x.c1155
1 files changed, 1155 insertions, 0 deletions
diff --git a/comm/third_party/libgcrypt/cipher/rijndael-s390x.c b/comm/third_party/libgcrypt/cipher/rijndael-s390x.c
new file mode 100644
index 0000000000..aea65c5a3d
--- /dev/null
+++ b/comm/third_party/libgcrypt/cipher/rijndael-s390x.c
@@ -0,0 +1,1155 @@
+/* Rijndael (AES) for GnuPG - s390x/zSeries AES implementation
+ * Copyright (C) 2020 Jussi Kivilinna <jussi.kivilinna@iki.fi>
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <config.h>
+
+#include "rijndael-internal.h"
+#include "cipher-internal.h"
+#include "bufhelp.h"
+
+#ifdef USE_S390X_CRYPTO
+
+#include "asm-inline-s390x.h"
+
+#define NO_INLINE __attribute__((noinline))
+
+struct aes_s390x_gcm_params_s
+{
+ u32 reserved[3];
+ u32 counter_value;
+ u64 tag[2];
+ u64 hash_subkey[2];
+ u64 total_aad_length;
+ u64 total_cipher_length;
+ u32 initial_counter_value[4];
+ u64 key[4];
+};
+
+#define DECL_QUERY_FUNC(instruction, opcode) \
+ static u128_t instruction ##_query(void) \
+ { \
+ static u128_t function_codes = 0; \
+ static int initialized = 0; \
+ register unsigned long reg0 asm("0") = 0; \
+ register void *reg1 asm("1") = &function_codes; \
+ u128_t r1, r2; \
+ \
+ if (initialized) \
+ return function_codes; \
+ \
+ asm volatile ("0: .insn rre," #opcode " << 16, %[r1], %[r2]\n\t" \
+ " brc 1,0b\n\t" \
+ : [r1] "=a" (r1), [r2] "=a" (r2) \
+ : [reg0] "r" (reg0), [reg1] "r" (reg1) \
+ : "cc", "memory"); \
+ \
+ initialized = 1; \
+ return function_codes; \
+ }
+
+#define DECL_EXECUTE_FUNC(instruction, opcode, param_const) \
+ static ALWAYS_INLINE size_t \
+ instruction ##_execute(unsigned int func, param_const void *param_block, \
+ void *dst, const void *src, size_t src_len) \
+ { \
+ register unsigned long reg0 asm("0") = func; \
+ register param_const byte *reg1 asm("1") = param_block; \
+ u128_t r1 = ((u128_t)(uintptr_t)dst << 64); \
+ u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len; \
+ \
+ asm volatile ("0: .insn rre," #opcode " << 16, %[r1], %[r2]\n\t" \
+ " brc 1,0b\n\t" \
+ : [r1] "+a" (r1), [r2] "+a" (r2) \
+ : [func] "r" (reg0), [param_ptr] "r" (reg1) \
+ : "cc", "memory"); \
+ \
+ return (u64)r2; \
+ }
+
+DECL_QUERY_FUNC(km, 0xb92e);
+DECL_QUERY_FUNC(kmc, 0xb92f);
+DECL_QUERY_FUNC(kmac, 0xb91e);
+DECL_QUERY_FUNC(kmf, 0xb92a);
+DECL_QUERY_FUNC(kmo, 0xb92b);
+
+DECL_EXECUTE_FUNC(km, 0xb92e, const);
+DECL_EXECUTE_FUNC(kmc, 0xb92f, );
+DECL_EXECUTE_FUNC(kmac, 0xb91e, );
+DECL_EXECUTE_FUNC(kmf, 0xb92a, );
+DECL_EXECUTE_FUNC(kmo, 0xb92b, );
+
+static u128_t kma_query(void)
+{
+ static u128_t function_codes = 0;
+ static int initialized = 0;
+ register unsigned long reg0 asm("0") = 0;
+ register void *reg1 asm("1") = &function_codes;
+ u128_t r1, r2, r3;
+
+ if (initialized)
+ return function_codes;
+
+ asm volatile ("0: .insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "=a" (r1), [r2] "=a" (r2), [r3] "=a" (r3)
+ : [reg0] "r" (reg0), [reg1] "r" (reg1)
+ : "cc", "memory");
+
+ initialized = 1;
+ return function_codes;
+}
+
+static ALWAYS_INLINE void
+kma_execute(unsigned int func, void *param_block, byte *dst, const byte *src,
+ size_t src_len, const byte *aad, size_t aad_len)
+{
+ register unsigned long reg0 asm("0") = func;
+ register byte *reg1 asm("1") = param_block;
+ u128_t r1 = ((u128_t)(uintptr_t)dst << 64);
+ u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len;
+ u128_t r3 = ((u128_t)(uintptr_t)aad << 64) | (u64)aad_len;
+
+ asm volatile ("0: .insn rrf,0xb929 << 16, %[r1], %[r2], %[r3], 0\n\t"
+ " brc 1,0b\n\t"
+ : [r1] "+a" (r1), [r2] "+a" (r2), [r3] "+a" (r3),
+ [func] "+r" (reg0)
+ : [param_ptr] "r" (reg1)
+ : "cc", "memory");
+}
+
+unsigned int _gcry_aes_s390x_encrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src)
+{
+ km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, dst, src,
+ BLOCKSIZE);
+ return 0;
+}
+
+unsigned int _gcry_aes_s390x_decrypt(const RIJNDAEL_context *ctx,
+ unsigned char *dst,
+ const unsigned char *src)
+{
+ km_execute (ctx->km_func | KM_DECRYPT, ctx->keyschenc, dst, src,
+ BLOCKSIZE);
+ return 0;
+}
+
+static void aes_s390x_cbc_enc(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int cbc_mac)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ u128_t params[3];
+
+ /* Prepare parameter block. */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ if (cbc_mac)
+ {
+ kmac_execute (ctx->kmac_func | KM_ENCRYPT, &params, NULL, in,
+ nblocks * BLOCKSIZE);
+ memcpy (out, &params[0], BLOCKSIZE);
+ }
+ else
+ {
+ kmc_execute (ctx->kmc_func | KM_ENCRYPT, &params, out, in,
+ nblocks * BLOCKSIZE);
+ }
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cbc_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ u128_t params[3];
+
+ /* Prepare parameter block (ICV & key). */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ kmc_execute (ctx->kmc_func | KM_DECRYPT, &params, out, in,
+ nblocks * BLOCKSIZE);
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cfb128_enc(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ u128_t params[3];
+
+ /* Prepare parameter block. */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ function = ctx->kmf_func | KM_ENCRYPT | KMF_LCFB_16;
+ kmf_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_cfb128_dec(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ u128_t blocks[64];
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ size_t max_blocks_used = 0;
+
+ /* AES128-CFB128 decryption speed using KMF was observed to be the same as
+ * the KMF encryption, ~1.03 cpb. Expection was to see similar performance
+ * as for AES128-CBC decryption as decryption for both modes should be
+ * parallalizeble (CBC shows ~0.22 cpb). Therefore there is quite a bit
+ * of room for improvement and implementation below using KM instruction
+ * shows ~0.70 cpb speed, ~30% improvement over KMF instruction.
+ */
+
+ while (nblocks >= 64)
+ {
+ /* Copy IV to encrypt buffer, copy (nblocks - 1) input blocks to
+ * encrypt buffer and update IV. */
+ asm volatile ("mvc 0(16, %[blocks]), 0(%[iv])\n\t"
+ "mvc 16(240, %[blocks]), 0(%[in])\n\t"
+ "mvc 256(256, %[blocks]), 240(%[in])\n\t"
+ "mvc 512(256, %[blocks]), 496(%[in])\n\t"
+ "mvc 768(256, %[blocks]), 752(%[in])\n\t"
+ "mvc 0(16, %[iv]), 1008(%[in])\n\t"
+ :
+ : [in] "a" (in), [out] "a" (out), [blocks] "a" (blocks),
+ [iv] "a" (iv)
+ : "memory");
+
+ /* Perform encryption of temporary buffer. */
+ km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, blocks, blocks,
+ 64 * BLOCKSIZE);
+
+ /* Xor encrypt buffer with input blocks and store to output blocks. */
+ asm volatile ("xc 0(256, %[blocks]), 0(%[in])\n\t"
+ "xc 256(256, %[blocks]), 256(%[in])\n\t"
+ "xc 512(256, %[blocks]), 512(%[in])\n\t"
+ "xc 768(256, %[blocks]), 768(%[in])\n\t"
+ "mvc 0(256, %[out]), 0(%[blocks])\n\t"
+ "mvc 256(256, %[out]), 256(%[blocks])\n\t"
+ "mvc 512(256, %[out]), 512(%[blocks])\n\t"
+ "mvc 768(256, %[out]), 768(%[blocks])\n\t"
+ :
+ : [in] "a" (in), [out] "a" (out), [blocks] "a" (blocks)
+ : "memory");
+
+ max_blocks_used = 64;
+ in += 64 * BLOCKSIZE;
+ out += 64 * BLOCKSIZE;
+ nblocks -= 64;
+ }
+
+ if (nblocks)
+ {
+ unsigned int pos = 0;
+ size_t in_nblocks = nblocks;
+ size_t num_in = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ /* Copy IV to encrypt buffer. */
+ asm volatile ("mvc 0(16, %[blocks]), 0(%[iv])\n\t"
+ :
+ : [blocks] "a" (blocks), [iv] "a" (iv)
+ : "memory");
+ pos += 1;
+
+#define CFB_MOVE_BLOCKS(block_oper, move_nbytes) \
+ block_oper (in_nblocks - 1 >= move_nbytes / BLOCKSIZE) \
+ { \
+ unsigned int move_nblocks = move_nbytes / BLOCKSIZE; \
+ asm volatile ("mvc 0(" #move_nbytes ", %[blocks_x]), 0(%[in])\n\t" \
+ : \
+ : [blocks_x] "a" (&blocks[pos]), [in] "a" (in) \
+ : "memory"); \
+ num_in += move_nblocks; \
+ in += move_nblocks * BLOCKSIZE; \
+ pos += move_nblocks; \
+ in_nblocks -= move_nblocks; \
+ }
+
+ /* Copy (nblocks - 1) input blocks to encrypt buffer. */
+ CFB_MOVE_BLOCKS(while, 256);
+ CFB_MOVE_BLOCKS(if, 128);
+ CFB_MOVE_BLOCKS(if, 64);
+ CFB_MOVE_BLOCKS(if, 32);
+ CFB_MOVE_BLOCKS(if, 16);
+
+#undef CFB_MOVE_BLOCKS
+
+ /* Update IV. */
+ asm volatile ("mvc 0(16, %[iv]), 0(%[in])\n\t"
+ :
+ : [iv] "a" (iv), [in] "a" (in)
+ : "memory");
+ num_in += 1;
+ in += BLOCKSIZE;
+
+ /* Perform encryption of temporary buffer. */
+ km_execute (ctx->km_func | KM_ENCRYPT, ctx->keyschenc, blocks, blocks,
+ nblocks * BLOCKSIZE);
+
+ /* Xor encrypt buffer with input blocks and store to output blocks. */
+ pos = 0;
+ in -= nblocks * BLOCKSIZE;
+
+#define CFB_XOR_BLOCKS(block_oper, xor_nbytes) \
+ block_oper (nblocks >= xor_nbytes / BLOCKSIZE) \
+ { \
+ unsigned int xor_nblocks = xor_nbytes / BLOCKSIZE; \
+ asm volatile ("xc 0(" #xor_nbytes ", %[blocks_x]), 0(%[in])\n\t" \
+ "mvc 0(" #xor_nbytes ", %[out]), 0(%[blocks_x])\n\t" \
+ : \
+ : [blocks_x] "a" (&blocks[pos]), [out] "a" (out), \
+ [in] "a" (in) \
+ : "memory"); \
+ out += xor_nblocks * BLOCKSIZE; \
+ in += xor_nblocks * BLOCKSIZE; \
+ nblocks -= xor_nblocks; \
+ pos += xor_nblocks; \
+ }
+
+ CFB_XOR_BLOCKS(while, 256);
+ CFB_XOR_BLOCKS(if, 128);
+ CFB_XOR_BLOCKS(if, 64);
+ CFB_XOR_BLOCKS(if, 32);
+ CFB_XOR_BLOCKS(if, 16);
+
+#undef CFB_XOR_BLOCKS
+ }
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+}
+
+static void aes_s390x_ofb_enc(void *context, unsigned char *iv,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ u128_t params[3];
+
+ /* Prepare parameter block. */
+ memcpy (&params[0], iv, BLOCKSIZE);
+ memcpy (&params[1], ctx->keyschenc, 32);
+
+ function = ctx->kmo_func | KM_ENCRYPT;
+ kmo_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+ /* Update IV with OCV. */
+ memcpy (iv, &params[0], BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static void aes_s390x_ctr128_enc(void *context, unsigned char *ctr,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ struct aes_s390x_gcm_params_s params;
+
+ memset (&params.hash_subkey, 0, sizeof(params.hash_subkey));
+ memcpy (&params.key, ctx->keyschenc, 32);
+
+ function = ctx->kma_func | KM_DECRYPT | KMA_HS | KMA_LAAD;
+
+ while (nblocks)
+ {
+ u64 to_overflow = (u64)0xFFFFFFFFU + 1 - buf_get_be32 (ctr + 12);
+ u64 ncurr = nblocks > to_overflow ? to_overflow : nblocks;
+
+ /* Prepare parameter block. */
+ memset (&params.reserved, 0, sizeof(params.reserved));
+ buf_put_be32 (&params.counter_value, buf_get_be32(ctr + 12) - 1);
+ memcpy (&params.initial_counter_value, ctr, 16);
+ params.initial_counter_value[3] = params.counter_value;
+ memset (&params.tag, 0, sizeof(params.tag));
+ params.total_aad_length = 0;
+ params.total_cipher_length = 0;
+
+ /* Update counter. */
+ cipher_block_add (ctr, ncurr, BLOCKSIZE);
+ if (ncurr == (u64)0xFFFFFFFFU + 1)
+ cipher_block_add (ctr, 1, BLOCKSIZE);
+
+ /* Perform CTR using KMA-GCM. */
+ kma_execute (function, &params, out, in, ncurr * BLOCKSIZE, NULL, 0);
+
+ out += ncurr * BLOCKSIZE;
+ in += ncurr * BLOCKSIZE;
+ nblocks -= ncurr;
+ }
+
+ wipememory (&params, sizeof(params));
+}
+
+static size_t aes_s390x_gcm_crypt(gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks,
+ int encrypt)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ byte *ctr = c->u_ctr.ctr;
+ unsigned int function;
+ struct aes_s390x_gcm_params_s params;
+
+ function = ctx->kma_func | (encrypt ? KM_ENCRYPT : KM_DECRYPT)
+ | KMA_HS | KMA_LAAD;
+
+ /* Prepare parameter block. */
+ memset (&params.reserved, 0, sizeof(params.reserved));
+ buf_put_be32 (&params.counter_value, buf_get_be32(ctr + 12) - 1);
+ memcpy (&params.tag, c->u_mode.gcm.u_tag.tag, 16);
+ memcpy (&params.hash_subkey, c->u_mode.gcm.u_ghash_key.key, 16);
+ params.total_aad_length = 0;
+ params.total_cipher_length = 0;
+ memcpy (&params.initial_counter_value, ctr, 12);
+ params.initial_counter_value[3] = params.counter_value;
+ memcpy (&params.key, ctx->keyschenc, 32);
+
+ /* Update counter (CTR32). */
+ buf_put_be32(ctr + 12, buf_get_be32(ctr + 12) + nblocks);
+
+ /* Perform KMA-GCM. */
+ kma_execute (function, &params, out, in, nblocks * BLOCKSIZE, NULL, 0);
+
+ /* Update tag. */
+ memcpy (c->u_mode.gcm.u_tag.tag, &params.tag, 16);
+
+ wipememory (&params, sizeof(params));
+
+ return 0;
+}
+
+static void aes_s390x_xts_crypt(void *context, unsigned char *tweak,
+ void *outbuf_arg, const void *inbuf_arg,
+ size_t nblocks, int encrypt)
+{
+ RIJNDAEL_context *ctx = context;
+ byte *out = outbuf_arg;
+ const byte *in = inbuf_arg;
+ unsigned int function;
+ u128_t params[3];
+ u128_t *params_tweak;
+
+ if (ctx->rounds < 12)
+ {
+ memcpy (&params[0], ctx->keyschenc, 16);
+ params_tweak = &params[1];
+ memcpy (params_tweak, tweak, BLOCKSIZE);
+ }
+ else if (ctx->rounds == 12)
+ {
+ BUG(); /* KM-XTS-AES-192 not defined. */
+ }
+ else
+ {
+ memcpy (&params[0], ctx->keyschenc, 32);
+ params_tweak = &params[2];
+ memcpy (params_tweak, tweak, BLOCKSIZE);
+ }
+
+ function = ctx->km_func_xts | (encrypt ? KM_ENCRYPT : KM_DECRYPT);
+ km_execute (function, &params, out, in, nblocks * BLOCKSIZE);
+
+ /* Update tweak with XTSP. */
+ memcpy (tweak, params_tweak, BLOCKSIZE);
+
+ wipememory (&params, sizeof(params));
+}
+
+static NO_INLINE void
+aes_s390x_ocb_prepare_Ls (gcry_cipher_hd_t c, u64 blkn, const void *Ls[64],
+ const void ***pl)
+{
+ unsigned int n = 64 - (blkn % 64);
+ int i;
+
+ /* Prepare L pointers. */
+ *pl = &Ls[(63 + n) % 64];
+ for (i = 0; i < 64; i += 8, n = (n + 8) % 64)
+ {
+ static const int lastL[8] = { 3, 4, 3, 5, 3, 4, 3, 0 };
+
+ Ls[(0 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(1 + n) % 64] = c->u_mode.ocb.L[1];
+ Ls[(2 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(3 + n) % 64] = c->u_mode.ocb.L[2];
+ Ls[(4 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(5 + n) % 64] = c->u_mode.ocb.L[1];
+ Ls[(6 + n) % 64] = c->u_mode.ocb.L[0];
+ Ls[(7 + n) % 64] = c->u_mode.ocb.L[lastL[i / 8]];
+ }
+}
+
+static NO_INLINE void
+aes_s390x_ocb_checksum (unsigned char *checksum, const void *plainbuf_arg,
+ size_t nblks)
+{
+ const char *plainbuf = plainbuf_arg;
+ u64 tmp0[2];
+ u64 tmp1[2] = { 0, 0 };
+ u64 tmp2[2] = { 0, 0 };
+ u64 tmp3[2] = { 0, 0 };
+
+ cipher_block_cpy (tmp0, checksum, BLOCKSIZE);
+
+ if (nblks >= 4)
+ {
+ while (nblks >= 4)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1 (tmp0, plainbuf + 0 * BLOCKSIZE, BLOCKSIZE);
+ cipher_block_xor_1 (tmp1, plainbuf + 1 * BLOCKSIZE, BLOCKSIZE);
+ cipher_block_xor_1 (tmp2, plainbuf + 2 * BLOCKSIZE, BLOCKSIZE);
+ cipher_block_xor_1 (tmp3, plainbuf + 3 * BLOCKSIZE, BLOCKSIZE);
+
+ plainbuf += 4 * BLOCKSIZE;
+ nblks -= 4;
+ }
+
+ cipher_block_xor_1 (tmp0, tmp1, BLOCKSIZE);
+ cipher_block_xor_1 (tmp2, tmp3, BLOCKSIZE);
+ cipher_block_xor_1 (tmp0, tmp2, BLOCKSIZE);
+
+ wipememory (tmp1, sizeof(tmp1));
+ wipememory (tmp2, sizeof(tmp2));
+ wipememory (tmp3, sizeof(tmp3));
+ }
+
+ while (nblks > 0)
+ {
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ cipher_block_xor_1 (tmp0, plainbuf, BLOCKSIZE);
+
+ plainbuf += BLOCKSIZE;
+ nblks--;
+ }
+
+ cipher_block_cpy (checksum, tmp0, BLOCKSIZE);
+
+ wipememory (tmp0, sizeof(tmp0));
+}
+
+static NO_INLINE size_t
+aes_s390x_ocb_enc (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ size_t nblocks = nblocks_arg;
+ u128_t blocks[64];
+ u128_t offset;
+ size_t max_blocks_used = 0;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+ unsigned int function = ctx->km_func | KM_ENCRYPT;
+ const void *Ls[64];
+ const void **pl;
+
+ aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+ /* Checksumming could be done inline in OCB_INPUT macros, but register
+ * pressure becomes too heavy and performance would end up being worse.
+ * For decryption, checksumming is part of OCB_OUTPUT macros as
+ * output handling is less demanding and can handle the additional
+ * computation. */
+ aes_s390x_ocb_checksum (c->u_ctr.ctr, inbuf_arg, nblocks_arg);
+
+ cipher_block_cpy (&offset, &c->u_iv.iv, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+ cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+ cipher_block_xor (outbuf + (n) * BLOCKSIZE, inbuf + (n) * BLOCKSIZE, \
+ &offset, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+ OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+ OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+ OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+ OCB_INPUT_4((n) + 12);
+
+#define OCB_OUTPUT(n) \
+ cipher_block_xor_1 (outbuf + (n) * BLOCKSIZE, &blocks[n], BLOCKSIZE)
+
+#define OCB_OUTPUT_4(n) \
+ OCB_OUTPUT((n) + 0); OCB_OUTPUT((n) + 1); OCB_OUTPUT((n) + 2); \
+ OCB_OUTPUT((n) + 3)
+
+#define OCB_OUTPUT_16(n) \
+ OCB_OUTPUT_4((n) + 0); OCB_OUTPUT_4((n) + 4); OCB_OUTPUT_4((n) + 8); \
+ OCB_OUTPUT_4((n) + 12);
+
+ while (nblocks >= 64)
+ {
+ blkn += 64;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ OCB_INPUT_16(0);
+ OCB_INPUT_16(16);
+ OCB_INPUT_16(32);
+ OCB_INPUT_16(48);
+
+ km_execute (function, ctx->keyschenc, outbuf, outbuf, 64 * BLOCKSIZE);
+
+ asm volatile ("xc 0(256, %[out]), 0(%[blocks])\n\t"
+ "xc 256(256, %[out]), 256(%[blocks])\n\t"
+ "xc 512(256, %[out]), 512(%[blocks])\n\t"
+ "xc 768(256, %[out]), 768(%[blocks])\n\t"
+ :
+ : [out] "a" (outbuf), [blocks] "a" (blocks)
+ : "memory");
+
+ max_blocks_used = 64;
+ inbuf += 64 * BLOCKSIZE;
+ outbuf += 64 * BLOCKSIZE;
+ nblocks -= 64;
+ }
+
+ if (nblocks)
+ {
+ unsigned int pos = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ blkn += nblocks;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ while (nblocks >= 16)
+ {
+ OCB_INPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_INPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_INPUT(pos + 0);
+ OCB_INPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_INPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+
+ nblocks = pos;
+ pos = 0;
+ km_execute (function, ctx->keyschenc, outbuf, outbuf,
+ nblocks * BLOCKSIZE);
+
+ while (nblocks >= 16)
+ {
+ OCB_OUTPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_OUTPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_OUTPUT(pos + 0);
+ OCB_OUTPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_OUTPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+ }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+#undef OCB_OUTPUT
+#undef OCB_OUTPUT_4
+#undef OCB_OUTPUT_16
+
+ c->u_mode.ocb.data_nblocks = blkn;
+ cipher_block_cpy (&c->u_iv.iv, &offset, BLOCKSIZE);
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+ return 0;
+}
+
+static NO_INLINE size_t
+aes_s390x_ocb_dec (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ unsigned char *outbuf = outbuf_arg;
+ const unsigned char *inbuf = inbuf_arg;
+ size_t nblocks = nblocks_arg;
+ u128_t blocks[64];
+ u128_t offset;
+ size_t max_blocks_used = 0;
+ u64 blkn = c->u_mode.ocb.data_nblocks;
+ unsigned int function = ctx->km_func | KM_DECRYPT;
+ const void *Ls[64];
+ const void **pl;
+
+ aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+ cipher_block_cpy (&offset, &c->u_iv.iv, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+ cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+ cipher_block_xor (outbuf + (n) * BLOCKSIZE, inbuf + (n) * BLOCKSIZE, \
+ &offset, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+ OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+ OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+ OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+ OCB_INPUT_4((n) + 12);
+
+#define OCB_OUTPUT(n) \
+ cipher_block_xor_1 (&blocks[n], outbuf + (n) * BLOCKSIZE, BLOCKSIZE); \
+ cipher_block_xor_1 (c->u_ctr.ctr, &blocks[n], BLOCKSIZE); \
+ cipher_block_cpy (outbuf + (n) * BLOCKSIZE, &blocks[n], BLOCKSIZE);
+
+#define OCB_OUTPUT_4(n) \
+ OCB_OUTPUT((n) + 0); OCB_OUTPUT((n) + 1); OCB_OUTPUT((n) + 2); \
+ OCB_OUTPUT((n) + 3)
+
+#define OCB_OUTPUT_16(n) \
+ OCB_OUTPUT_4((n) + 0); OCB_OUTPUT_4((n) + 4); OCB_OUTPUT_4((n) + 8); \
+ OCB_OUTPUT_4((n) + 12);
+
+ while (nblocks >= 64)
+ {
+ blkn += 64;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ OCB_INPUT_16(0);
+ OCB_INPUT_16(16);
+ OCB_INPUT_16(32);
+ OCB_INPUT_16(48);
+
+ km_execute (function, ctx->keyschenc, outbuf, outbuf, 64 * BLOCKSIZE);
+
+ asm volatile ("xc 0(256, %[out]), 0(%[blocks])\n\t"
+ "xc 256(256, %[out]), 256(%[blocks])\n\t"
+ "xc 512(256, %[out]), 512(%[blocks])\n\t"
+ "xc 768(256, %[out]), 768(%[blocks])\n\t"
+ :
+ : [out] "a" (outbuf), [blocks] "a" (blocks)
+ : "memory");
+
+ max_blocks_used = 64;
+ inbuf += 64 * BLOCKSIZE;
+ outbuf += 64 * BLOCKSIZE;
+ nblocks -= 64;
+ }
+
+ if (nblocks)
+ {
+ unsigned int pos = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ blkn += nblocks;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ while (nblocks >= 16)
+ {
+ OCB_INPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_INPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_INPUT(pos + 0);
+ OCB_INPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_INPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+
+ nblocks = pos;
+ pos = 0;
+ km_execute (function, ctx->keyschenc, outbuf, outbuf,
+ nblocks * BLOCKSIZE);
+
+ while (nblocks >= 16)
+ {
+ OCB_OUTPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_OUTPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_OUTPUT(pos + 0);
+ OCB_OUTPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_OUTPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+ }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+#undef OCB_OUTPUT
+#undef OCB_OUTPUT_4
+#undef OCB_OUTPUT_16
+
+ c->u_mode.ocb.data_nblocks = blkn;
+ cipher_block_cpy (&c->u_iv.iv, &offset, BLOCKSIZE);
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+ return 0;
+}
+
+static size_t
+aes_s390x_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg,
+ const void *inbuf_arg, size_t nblocks_arg, int encrypt)
+{
+ if (encrypt)
+ return aes_s390x_ocb_enc (c, outbuf_arg, inbuf_arg, nblocks_arg);
+ else
+ return aes_s390x_ocb_dec (c, outbuf_arg, inbuf_arg, nblocks_arg);
+}
+
+static size_t
+aes_s390x_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg,
+ size_t nblocks_arg)
+{
+ RIJNDAEL_context *ctx = (void *)&c->context.c;
+ const unsigned char *abuf = abuf_arg;
+ u128_t blocks[64];
+ u128_t offset;
+ size_t max_blocks_used = 0;
+ u64 blkn = c->u_mode.ocb.aad_nblocks;
+ unsigned int function = ctx->km_func | KM_ENCRYPT;
+ const void *Ls[64];
+ const void **pl;
+
+ aes_s390x_ocb_prepare_Ls (c, blkn, Ls, &pl);
+
+ cipher_block_cpy (&offset, c->u_mode.ocb.aad_offset, BLOCKSIZE);
+
+#define OCB_INPUT(n) \
+ cipher_block_xor_2dst (&blocks[n], &offset, Ls[n], BLOCKSIZE); \
+ cipher_block_xor_1 (&blocks[n], abuf + (n) * BLOCKSIZE, BLOCKSIZE)
+
+#define OCB_INPUT_4(n) \
+ OCB_INPUT((n) + 0); OCB_INPUT((n) + 1); OCB_INPUT((n) + 2); \
+ OCB_INPUT((n) + 3)
+
+#define OCB_INPUT_16(n) \
+ OCB_INPUT_4((n) + 0); OCB_INPUT_4((n) + 4); OCB_INPUT_4((n) + 8); \
+ OCB_INPUT_4((n) + 12);
+
+ while (nblocks_arg >= 64)
+ {
+ blkn += 64;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ OCB_INPUT_16(0);
+ OCB_INPUT_16(16);
+ OCB_INPUT_16(32);
+ OCB_INPUT_16(48);
+
+ km_execute (function, ctx->keyschenc, blocks, blocks, 64 * BLOCKSIZE);
+
+ aes_s390x_ocb_checksum (c->u_mode.ocb.aad_sum, blocks, 64);
+
+ max_blocks_used = 64;
+ abuf += 64 * BLOCKSIZE;
+ nblocks_arg -= 64;
+ }
+
+ if (nblocks_arg > 0)
+ {
+ size_t nblocks = nblocks_arg;
+ unsigned int pos = 0;
+
+ max_blocks_used = max_blocks_used < nblocks ? nblocks : max_blocks_used;
+
+ blkn += nblocks;
+ *pl = ocb_get_l(c, blkn - blkn % 64);
+
+ while (nblocks >= 16)
+ {
+ OCB_INPUT_16(pos + 0);
+ pos += 16;
+ nblocks -= 16;
+ }
+ while (nblocks >= 4)
+ {
+ OCB_INPUT_4(pos + 0);
+ pos += 4;
+ nblocks -= 4;
+ }
+ if (nblocks >= 2)
+ {
+ OCB_INPUT(pos + 0);
+ OCB_INPUT(pos + 1);
+ pos += 2;
+ nblocks -= 2;
+ }
+ if (nblocks >= 1)
+ {
+ OCB_INPUT(pos + 0);
+ pos += 1;
+ nblocks -= 1;
+ }
+
+ nblocks = pos;
+ nblocks_arg -= pos;
+ pos = 0;
+ km_execute (function, ctx->keyschenc, blocks, blocks,
+ nblocks * BLOCKSIZE);
+
+ aes_s390x_ocb_checksum (c->u_mode.ocb.aad_sum, blocks, nblocks);
+ }
+
+#undef OCB_INPUT
+#undef OCB_INPUT_4
+#undef OCB_INPUT_16
+
+ c->u_mode.ocb.aad_nblocks = blkn;
+ cipher_block_cpy (c->u_mode.ocb.aad_offset, &offset, BLOCKSIZE);
+
+ if (max_blocks_used)
+ wipememory (&blocks, max_blocks_used * BLOCKSIZE);
+
+ return 0;
+}
+
+int _gcry_aes_s390x_setup_acceleration(RIJNDAEL_context *ctx,
+ unsigned int keylen,
+ unsigned int hwfeatures,
+ cipher_bulk_ops_t *bulk_ops)
+{
+ unsigned int func;
+ unsigned int func_xts;
+ u128_t func_mask;
+ u128_t func_xts_mask;
+
+ if (!(hwfeatures & HWF_S390X_MSA))
+ return 0;
+
+ switch (keylen)
+ {
+ default:
+ case 16:
+ func = KM_FUNCTION_AES_128;
+ func_xts = KM_FUNCTION_XTS_AES_128;
+ func_mask = km_function_to_mask(KM_FUNCTION_AES_128);
+ func_xts_mask = km_function_to_mask(KM_FUNCTION_XTS_AES_128);
+ break;
+ case 24:
+ func = KM_FUNCTION_AES_192;
+ func_xts = 0;
+ func_mask = km_function_to_mask(KM_FUNCTION_AES_192);
+ func_xts_mask = 0; /* XTS-AES192 not available. */
+ break;
+ case 32:
+ func = KM_FUNCTION_AES_256;
+ func_xts = KM_FUNCTION_XTS_AES_256;
+ func_mask = km_function_to_mask(KM_FUNCTION_AES_256);
+ func_xts_mask = km_function_to_mask(KM_FUNCTION_AES_256);
+ break;
+ }
+
+ /* Query KM for supported algorithms and check if acceleration for
+ * requested key-length is available. */
+ if (!(km_query () & func_mask))
+ return 0;
+
+ ctx->km_func = func;
+
+ /* Query KM for supported XTS algorithms. */
+ if (km_query () & func_xts_mask)
+ ctx->km_func_xts = func_xts;
+
+ /* Query KMC for supported algorithms. */
+ if (kmc_query () & func_mask)
+ ctx->kmc_func = func;
+
+ /* Query KMAC for supported algorithms. */
+ if (kmac_query () & func_mask)
+ ctx->kmac_func = func;
+
+ if (hwfeatures & HWF_S390X_MSA_4)
+ {
+ /* Query KMF for supported algorithms. */
+ if (kmf_query () & func_mask)
+ ctx->kmf_func = func;
+
+ /* Query KMO for supported algorithms. */
+ if (kmo_query () & func_mask)
+ ctx->kmo_func = func;
+ }
+
+ if (hwfeatures & HWF_S390X_MSA_8)
+ {
+ /* Query KMA for supported algorithms. */
+ if (kma_query () & func_mask)
+ ctx->kma_func = func;
+ }
+
+ /* Setup zSeries bulk encryption/decryption routines. */
+
+ if (ctx->km_func)
+ {
+ bulk_ops->ocb_crypt = aes_s390x_ocb_crypt;
+ bulk_ops->ocb_auth = aes_s390x_ocb_auth;
+
+ /* CFB128 decryption uses KM instruction, instead of KMF. */
+ bulk_ops->cfb_dec = aes_s390x_cfb128_dec;
+ }
+
+ if (ctx->km_func_xts)
+ {
+ bulk_ops->xts_crypt = aes_s390x_xts_crypt;
+ }
+
+ if (ctx->kmc_func)
+ {
+ if(ctx->kmac_func)
+ {
+ /* Either KMC or KMAC used depending on 'cbc_mac' parameter. */
+ bulk_ops->cbc_enc = aes_s390x_cbc_enc;
+ }
+
+ bulk_ops->cbc_dec = aes_s390x_cbc_dec;
+ }
+
+ if (ctx->kmf_func)
+ {
+ bulk_ops->cfb_enc = aes_s390x_cfb128_enc;
+ }
+
+ if (ctx->kmo_func)
+ {
+ bulk_ops->ofb_enc = aes_s390x_ofb_enc;
+ }
+
+ if (ctx->kma_func)
+ {
+ bulk_ops->ctr_enc = aes_s390x_ctr128_enc;
+
+ if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH))
+ {
+ /* KIMD based GHASH implementation is required with AES-GCM
+ * acceleration. */
+ bulk_ops->gcm_crypt = aes_s390x_gcm_crypt;
+ }
+ }
+
+ return 1;
+}
+
+void _gcry_aes_s390x_setkey(RIJNDAEL_context *ctx, const byte *key)
+{
+ unsigned int keylen = 16 + (ctx->rounds - 10) * 4;
+ memcpy (ctx->keyschenc, key, keylen);
+}
+
+void _gcry_aes_s390x_prepare_decryption(RIJNDAEL_context *ctx)
+{
+ /* Do nothing. */
+ (void)ctx;
+}
+
+#endif /* USE_S390X_CRYPTO */