From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- comm/third_party/libgcrypt/cipher/salsa20.c | 600 ++++++++++++++++++++++++++++ 1 file changed, 600 insertions(+) create mode 100644 comm/third_party/libgcrypt/cipher/salsa20.c (limited to 'comm/third_party/libgcrypt/cipher/salsa20.c') diff --git a/comm/third_party/libgcrypt/cipher/salsa20.c b/comm/third_party/libgcrypt/cipher/salsa20.c new file mode 100644 index 0000000000..d8c5c81f30 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/salsa20.c @@ -0,0 +1,600 @@ +/* salsa20.c - Bernstein's Salsa20 cipher + * Copyright (C) 2012 Simon Josefsson, Niels Möller + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * For a description of the algorithm, see: + * http://cr.yp.to/snuffle/spec.pdf + * http://cr.yp.to/snuffle/design.pdf + */ + +/* The code is based on the code in Nettle + (git commit id 9d2d8ddaee35b91a4e1a32ae77cba04bea3480e7) + which in turn is based on + salsa20-ref.c version 20051118 + D. J. Bernstein + Public domain. +*/ + + +#include +#include +#include +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-internal.h" + + +/* USE_AMD64 indicates whether to compile with AMD64 code. */ +#undef USE_AMD64 +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64 1 +#endif + +/* USE_ARM_NEON_ASM indicates whether to enable ARM NEON assembly code. */ +#undef USE_ARM_NEON_ASM +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_ARM_NEON_ASM 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +#define SALSA20_MIN_KEY_SIZE 16 /* Bytes. */ +#define SALSA20_MAX_KEY_SIZE 32 /* Bytes. */ +#define SALSA20_BLOCK_SIZE 64 /* Bytes. */ +#define SALSA20_IV_SIZE 8 /* Bytes. */ +#define SALSA20_INPUT_LENGTH 16 /* Bytes. */ + +/* Number of rounds. The standard uses 20 rounds. In any case the + number of rounds must be even. */ +#define SALSA20_ROUNDS 20 +#define SALSA20R12_ROUNDS 12 + + +struct SALSA20_context_s; + +typedef unsigned int (*salsa20_core_t) (u32 *dst, struct SALSA20_context_s *ctx, + unsigned int rounds); +typedef void (* salsa20_keysetup_t)(struct SALSA20_context_s *ctx, + const byte *key, int keylen); +typedef void (* salsa20_ivsetup_t)(struct SALSA20_context_s *ctx, + const byte *iv); + +typedef struct SALSA20_context_s +{ + /* Indices 1-4 and 11-14 holds the key (two identical copies for the + shorter key size), indices 0, 5, 10, 15 are constant, indices 6, 7 + are the IV, and indices 8, 9 are the block counter: + + C K K K + K C I I + B B C K + K K K C + */ + u32 input[SALSA20_INPUT_LENGTH]; + u32 pad[SALSA20_INPUT_LENGTH]; + unsigned int unused; /* bytes in the pad. */ +#ifdef USE_ARM_NEON_ASM + int use_neon; +#endif + salsa20_keysetup_t keysetup; + salsa20_ivsetup_t ivsetup; + salsa20_core_t core; +} SALSA20_context_t; + + +/* The masking of the right shift is needed to allow n == 0 (using + just 32 - n and 64 - n results in undefined behaviour). Most uses + of these macros use a constant and non-zero rotation count. */ +#define ROTL32(n,x) (((x)<<(n)) | ((x)>>((-(n)&31)))) + + +#define LE_SWAP32(v) le_bswap32(v) + +#define LE_READ_UINT32(p) buf_get_le32(p) + + +static void salsa20_setiv (void *context, const byte *iv, size_t ivlen); +static const char *selftest (void); + + +#ifdef USE_AMD64 + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +#else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +#endif + +/* AMD64 assembly implementations of Salsa20. */ +void _gcry_salsa20_amd64_keysetup(u32 *ctxinput, const void *key, int keybits) + ASM_FUNC_ABI; +void _gcry_salsa20_amd64_ivsetup(u32 *ctxinput, const void *iv) + ASM_FUNC_ABI; +unsigned int +_gcry_salsa20_amd64_encrypt_blocks(u32 *ctxinput, const void *src, void *dst, + size_t len, int rounds) ASM_FUNC_ABI; + +static void +salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen) +{ + _gcry_salsa20_amd64_keysetup(ctx->input, key, keylen * 8); +} + +static void +salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv) +{ + _gcry_salsa20_amd64_ivsetup(ctx->input, iv); +} + +static unsigned int +salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds) +{ + memset(dst, 0, SALSA20_BLOCK_SIZE); + return _gcry_salsa20_amd64_encrypt_blocks(ctx->input, dst, dst, 1, rounds) + + ASM_EXTRA_STACK; +} + +#else /* USE_AMD64 */ + + + +#if 0 +# define SALSA20_CORE_DEBUG(i) do { \ + unsigned debug_j; \ + for (debug_j = 0; debug_j < 16; debug_j++) \ + { \ + if (debug_j == 0) \ + fprintf(stderr, "%2d:", (i)); \ + else if (debug_j % 4 == 0) \ + fprintf(stderr, "\n "); \ + fprintf(stderr, " %8x", pad[debug_j]); \ + } \ + fprintf(stderr, "\n"); \ + } while (0) +#else +# define SALSA20_CORE_DEBUG(i) +#endif + +#define QROUND(x0, x1, x2, x3) \ + do { \ + x1 ^= ROTL32 ( 7, x0 + x3); \ + x2 ^= ROTL32 ( 9, x1 + x0); \ + x3 ^= ROTL32 (13, x2 + x1); \ + x0 ^= ROTL32 (18, x3 + x2); \ + } while(0) + +static unsigned int +salsa20_core (u32 *dst, SALSA20_context_t *ctx, unsigned rounds) +{ + u32 pad[SALSA20_INPUT_LENGTH], *src = ctx->input; + unsigned int i; + + memcpy (pad, src, sizeof(pad)); + for (i = 0; i < rounds; i += 2) + { + SALSA20_CORE_DEBUG (i); + QROUND (pad[0], pad[4], pad[8], pad[12]); + QROUND (pad[5], pad[9], pad[13], pad[1] ); + QROUND (pad[10], pad[14], pad[2], pad[6] ); + QROUND (pad[15], pad[3], pad[7], pad[11]); + + SALSA20_CORE_DEBUG (i+1); + QROUND (pad[0], pad[1], pad[2], pad[3] ); + QROUND (pad[5], pad[6], pad[7], pad[4] ); + QROUND (pad[10], pad[11], pad[8], pad[9] ); + QROUND (pad[15], pad[12], pad[13], pad[14]); + } + SALSA20_CORE_DEBUG (i); + + for (i = 0; i < SALSA20_INPUT_LENGTH; i++) + { + u32 t = pad[i] + src[i]; + dst[i] = LE_SWAP32 (t); + } + + /* Update counter. */ + if (!++src[8]) + src[9]++; + + /* burn_stack */ + return ( 3*sizeof (void*) \ + + 2*sizeof (void*) \ + + 64 \ + + sizeof (unsigned int) \ + + sizeof (u32) ); +} +#undef QROUND +#undef SALSA20_CORE_DEBUG + +static void +salsa20_keysetup(SALSA20_context_t *ctx, const byte *key, int keylen) +{ + /* These constants are the little endian encoding of the string + "expand 32-byte k". For the 128 bit variant, the "32" in that + string will be fixed up to "16". */ + ctx->input[0] = 0x61707865; /* "apxe" */ + ctx->input[5] = 0x3320646e; /* "3 dn" */ + ctx->input[10] = 0x79622d32; /* "yb-2" */ + ctx->input[15] = 0x6b206574; /* "k et" */ + + ctx->input[1] = LE_READ_UINT32(key + 0); + ctx->input[2] = LE_READ_UINT32(key + 4); + ctx->input[3] = LE_READ_UINT32(key + 8); + ctx->input[4] = LE_READ_UINT32(key + 12); + if (keylen == SALSA20_MAX_KEY_SIZE) /* 256 bits */ + { + ctx->input[11] = LE_READ_UINT32(key + 16); + ctx->input[12] = LE_READ_UINT32(key + 20); + ctx->input[13] = LE_READ_UINT32(key + 24); + ctx->input[14] = LE_READ_UINT32(key + 28); + } + else /* 128 bits */ + { + ctx->input[11] = ctx->input[1]; + ctx->input[12] = ctx->input[2]; + ctx->input[13] = ctx->input[3]; + ctx->input[14] = ctx->input[4]; + + ctx->input[5] -= 0x02000000; /* Change to "1 dn". */ + ctx->input[10] += 0x00000004; /* Change to "yb-6". */ + } +} + +static void salsa20_ivsetup(SALSA20_context_t *ctx, const byte *iv) +{ + ctx->input[6] = LE_READ_UINT32(iv + 0); + ctx->input[7] = LE_READ_UINT32(iv + 4); + /* Reset the block counter. */ + ctx->input[8] = 0; + ctx->input[9] = 0; +} + +#endif /*!USE_AMD64*/ + +#ifdef USE_ARM_NEON_ASM + +/* ARM NEON implementation of Salsa20. */ +unsigned int +_gcry_arm_neon_salsa20_encrypt(void *c, const void *m, unsigned int nblks, + void *k, unsigned int rounds); + +static unsigned int +salsa20_core_neon (u32 *dst, SALSA20_context_t *ctx, unsigned int rounds) +{ + return _gcry_arm_neon_salsa20_encrypt(dst, NULL, 1, ctx->input, rounds); +} + +static void salsa20_ivsetup_neon(SALSA20_context_t *ctx, const byte *iv) +{ + memcpy(ctx->input + 8, iv, 8); + /* Reset the block counter. */ + memset(ctx->input + 10, 0, 8); +} + +static void +salsa20_keysetup_neon(SALSA20_context_t *ctx, const byte *key, int klen) +{ + static const unsigned char sigma32[16] = "expand 32-byte k"; + static const unsigned char sigma16[16] = "expand 16-byte k"; + + if (klen == 16) + { + memcpy (ctx->input, key, 16); + memcpy (ctx->input + 4, key, 16); /* Duplicate 128-bit key. */ + memcpy (ctx->input + 12, sigma16, 16); + } + else + { + /* 32-byte key */ + memcpy (ctx->input, key, 32); + memcpy (ctx->input + 12, sigma32, 16); + } +} + +#endif /*USE_ARM_NEON_ASM*/ + + +static gcry_err_code_t +salsa20_do_setkey (SALSA20_context_t *ctx, + const byte *key, unsigned int keylen) +{ + static int initialized; + static const char *selftest_failed; + + if (!initialized ) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("SALSA20 selftest failed (%s)\n", selftest_failed ); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen != SALSA20_MIN_KEY_SIZE + && keylen != SALSA20_MAX_KEY_SIZE) + return GPG_ERR_INV_KEYLEN; + + /* Default ops. */ + ctx->keysetup = salsa20_keysetup; + ctx->ivsetup = salsa20_ivsetup; + ctx->core = salsa20_core; + +#ifdef USE_ARM_NEON_ASM + ctx->use_neon = (_gcry_get_hw_features () & HWF_ARM_NEON) != 0; + if (ctx->use_neon) + { + /* Use ARM NEON ops instead. */ + ctx->keysetup = salsa20_keysetup_neon; + ctx->ivsetup = salsa20_ivsetup_neon; + ctx->core = salsa20_core_neon; + } +#endif + + ctx->keysetup (ctx, key, keylen); + + /* We default to a zero nonce. */ + salsa20_setiv (ctx, NULL, 0); + + return 0; +} + + +static gcry_err_code_t +salsa20_setkey (void *context, const byte *key, unsigned int keylen, + cipher_bulk_ops_t *bulk_ops) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + gcry_err_code_t rc = salsa20_do_setkey (ctx, key, keylen); + (void)bulk_ops; + _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); + return rc; +} + + +static void +salsa20_setiv (void *context, const byte *iv, size_t ivlen) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + byte tmp[SALSA20_IV_SIZE]; + + if (iv && ivlen != SALSA20_IV_SIZE) + log_info ("WARNING: salsa20_setiv: bad ivlen=%u\n", (u32)ivlen); + + if (!iv || ivlen != SALSA20_IV_SIZE) + memset (tmp, 0, sizeof(tmp)); + else + memcpy (tmp, iv, SALSA20_IV_SIZE); + + ctx->ivsetup (ctx, tmp); + + /* Reset the unused pad bytes counter. */ + ctx->unused = 0; + + wipememory (tmp, sizeof(tmp)); +} + + + +/* Note: This function requires LENGTH > 0. */ +static void +salsa20_do_encrypt_stream (SALSA20_context_t *ctx, + byte *outbuf, const byte *inbuf, + size_t length, unsigned rounds) +{ + unsigned int nburn, burn = 0; + + if (ctx->unused) + { + unsigned char *p = (void*)ctx->pad; + size_t n; + + gcry_assert (ctx->unused < SALSA20_BLOCK_SIZE); + + n = ctx->unused; + if (n > length) + n = length; + buf_xor (outbuf, inbuf, p + SALSA20_BLOCK_SIZE - ctx->unused, n); + length -= n; + outbuf += n; + inbuf += n; + ctx->unused -= n; + if (!length) + return; + gcry_assert (!ctx->unused); + } + +#ifdef USE_AMD64 + if (length >= SALSA20_BLOCK_SIZE) + { + size_t nblocks = length / SALSA20_BLOCK_SIZE; + burn = _gcry_salsa20_amd64_encrypt_blocks(ctx->input, inbuf, outbuf, + nblocks, rounds); + burn += ASM_EXTRA_STACK; + length -= SALSA20_BLOCK_SIZE * nblocks; + outbuf += SALSA20_BLOCK_SIZE * nblocks; + inbuf += SALSA20_BLOCK_SIZE * nblocks; + } +#endif + +#ifdef USE_ARM_NEON_ASM + if (ctx->use_neon && length >= SALSA20_BLOCK_SIZE) + { + unsigned int nblocks = length / SALSA20_BLOCK_SIZE; + _gcry_arm_neon_salsa20_encrypt (outbuf, inbuf, nblocks, ctx->input, + rounds); + length -= SALSA20_BLOCK_SIZE * nblocks; + outbuf += SALSA20_BLOCK_SIZE * nblocks; + inbuf += SALSA20_BLOCK_SIZE * nblocks; + } +#endif + + while (length > 0) + { + /* Create the next pad and bump the block counter. Note that it + is the user's duty to change to another nonce not later than + after 2^70 processed bytes. */ + nburn = ctx->core (ctx->pad, ctx, rounds); + burn = nburn > burn ? nburn : burn; + + if (length <= SALSA20_BLOCK_SIZE) + { + buf_xor (outbuf, inbuf, ctx->pad, length); + ctx->unused = SALSA20_BLOCK_SIZE - length; + break; + } + buf_xor (outbuf, inbuf, ctx->pad, SALSA20_BLOCK_SIZE); + length -= SALSA20_BLOCK_SIZE; + outbuf += SALSA20_BLOCK_SIZE; + inbuf += SALSA20_BLOCK_SIZE; + } + + _gcry_burn_stack (burn); +} + + +static void +salsa20_encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + + if (length) + salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20_ROUNDS); +} + + +static void +salsa20r12_encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + SALSA20_context_t *ctx = (SALSA20_context_t *)context; + + if (length) + salsa20_do_encrypt_stream (ctx, outbuf, inbuf, length, SALSA20R12_ROUNDS); +} + + +static const char* +selftest (void) +{ + byte ctxbuf[sizeof(SALSA20_context_t) + 15]; + SALSA20_context_t *ctx; + byte scratch[8+1]; + byte buf[256+64+4]; + int i; + + static byte key_1[] = + { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const byte nonce_1[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const byte plaintext_1[] = + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }; + static const byte ciphertext_1[] = + { 0xE3, 0xBE, 0x8F, 0xDD, 0x8B, 0xEC, 0xA2, 0xE3}; + + /* 16-byte alignment required for amd64 implementation. */ + ctx = (SALSA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); + + salsa20_setkey (ctx, key_1, sizeof key_1, NULL); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + scratch[8] = 0; + salsa20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); + if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) + return "Salsa20 encryption test 1 failed."; + if (scratch[8]) + return "Salsa20 wrote too much."; + salsa20_setkey( ctx, key_1, sizeof(key_1), NULL); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + salsa20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); + if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) + return "Salsa20 decryption test 1 failed."; + + for (i = 0; i < sizeof buf; i++) + buf[i] = i; + salsa20_setkey (ctx, key_1, sizeof key_1, NULL); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + /*encrypt*/ + salsa20_encrypt_stream (ctx, buf, buf, sizeof buf); + /*decrypt*/ + salsa20_setkey (ctx, key_1, sizeof key_1, NULL); + salsa20_setiv (ctx, nonce_1, sizeof nonce_1); + salsa20_encrypt_stream (ctx, buf, buf, 1); + salsa20_encrypt_stream (ctx, buf+1, buf+1, (sizeof buf)-1-1); + salsa20_encrypt_stream (ctx, buf+(sizeof buf)-1, buf+(sizeof buf)-1, 1); + for (i = 0; i < sizeof buf; i++) + if (buf[i] != (byte)i) + return "Salsa20 encryption test 2 failed."; + + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_salsa20 = + { + GCRY_CIPHER_SALSA20, + {0, 0}, /* flags */ + "SALSA20", /* name */ + NULL, /* aliases */ + NULL, /* oids */ + 1, /* blocksize in bytes. */ + SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */ + sizeof (SALSA20_context_t), + salsa20_setkey, + NULL, + NULL, + salsa20_encrypt_stream, + salsa20_encrypt_stream, + NULL, + NULL, + salsa20_setiv + }; + +gcry_cipher_spec_t _gcry_cipher_spec_salsa20r12 = + { + GCRY_CIPHER_SALSA20R12, + {0, 0}, /* flags */ + "SALSA20R12", /* name */ + NULL, /* aliases */ + NULL, /* oids */ + 1, /* blocksize in bytes. */ + SALSA20_MAX_KEY_SIZE*8, /* standard key length in bits. */ + sizeof (SALSA20_context_t), + salsa20_setkey, + NULL, + NULL, + salsa20r12_encrypt_stream, + salsa20r12_encrypt_stream, + NULL, + NULL, + salsa20_setiv + }; -- cgit v1.2.3