diff options
Diffstat (limited to '')
-rw-r--r-- | web/server/h2o/libh2o/deps/picotls/deps/cifra/src/norx.c | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/web/server/h2o/libh2o/deps/picotls/deps/cifra/src/norx.c b/web/server/h2o/libh2o/deps/picotls/deps/cifra/src/norx.c new file mode 100644 index 000000000..8f0aba6b0 --- /dev/null +++ b/web/server/h2o/libh2o/deps/picotls/deps/cifra/src/norx.c @@ -0,0 +1,410 @@ +/* + * cifra - embedded cryptography library + * Written in 2014 by Joseph Birr-Pixton <jpixton@gmail.com> + * + * To the extent possible under law, the author(s) have dedicated all + * copyright and related and neighboring rights to this software to the + * public domain worldwide. This software is distributed without any + * warranty. + * + * You should have received a copy of the CC0 Public Domain Dedication + * along with this software. If not, see + * <http://creativecommons.org/publicdomain/zero/1.0/>. + */ + + +#include "norx.h" +#include "bitops.h" +#include "handy.h" +#include "blockwise.h" +#include "tassert.h" + +#include <string.h> + +typedef struct +{ + uint32_t s[16]; +} norx32_ctx; + +/* Domain separation constants */ +#define DOMAIN_HEADER 0x01 +#define DOMAIN_PAYLOAD 0x02 +#define DOMAIN_TRAILER 0x04 +#define DOMAIN_TAG 0x08 + +#define WORD_BYTES 4 +#define WORD_BITS 32 +#define ROUNDS 4 +#define DEGREE 1 +#define TAG_BITS 128 +#define RATE_BYTES 48 +#define RATE_WORDS 12 + +static void permute(norx32_ctx *ctx) +{ +#ifdef CORTEX_M0 + /* Register usage: A-D r2, r3, r4, r5. + * Temps: r6 */ + +#define in(xx) "%[" #xx "]" + + /* Load numbered slots of S into r2-r5 */ +#define LOAD(u, v, w, x) \ + " ldr r2, [%[S], " in(u) "]\n" \ + " ldr r3, [%[S], " in(v) "]\n" \ + " ldr r4, [%[S], " in(w) "]\n" \ + " ldr r5, [%[S], " in(x) "]\n" + + /* Store r2-r5 into numbered slots of S */ +#define STORE(u, v, w, x) \ + " str r2, [%[S], " in(u) "]\n" \ + " str r3, [%[S], " in(v) "]\n" \ + " str r4, [%[S], " in(w) "]\n" \ + " str r5, [%[S], " in(x) "]\n" + + /* This is H() plus the xor and rotate in one step of G. + * rx is the register containing x (read/write) + * ry is the register containing y (read) + * rw is the register containing d (read/write) + * rot is the rotation constant r_n */ +#define P(rx, ry, rw, rot) \ + " mov r6, " #rx "\n" \ + " and " #rx ", " #ry "\n" \ + " lsl " #rx ", #1\n" \ + " eor " #rx ", r6\n" \ + " eor " #rx ", " #ry "\n" \ + " mov r6, #" #rot "\n" \ + " eor " #rw ", " #rx "\n" \ + " ror " #rw ", r6\n" + + /* The function G. s is the state array, a-d are indices + * into it. */ +#define G(s, a, b, c, d) \ + __asm__ ( \ + LOAD(A, B, C, D) \ + P(r2, r3, r5, 8) \ + P(r4, r5, r3, 11) \ + P(r2, r3, r5, 16) \ + P(r4, r5, r3, 31) \ + STORE(A, B, C, D) \ + : \ + : [S] "r" (s), \ + [A] "i" (a << 2), \ + [B] "i" (b << 2), \ + [C] "i" (c << 2), \ + [D] "i" (d << 2) \ + : "memory", "cc", "r2", "r3", "r4", "r5", "r6"); +#else + + /* This is one quarter of G; the function H plus xor/rotate. */ +#define P(u, v, w, rr) \ + (u) = ((u) ^ (v)) ^ (((u) & (v)) << 1); \ + (w) = rotr32((u) ^ (w), rr); + +#define G(s, a, b, c, d) \ + P(s[a], s[b], s[d], 8) \ + P(s[c], s[d], s[b], 11) \ + P(s[a], s[b], s[d], 16) \ + P(s[c], s[d], s[b], 31) +#endif + + for (int i = 0; i < ROUNDS; i++) + { + /* columns */ + G(ctx->s, 0, 4, 8, 12); + G(ctx->s, 1, 5, 9, 13); + G(ctx->s, 2, 6, 10, 14); + G(ctx->s, 3, 7, 11, 15); + + /* diagonals */ + G(ctx->s, 0, 5, 10, 15); + G(ctx->s, 1, 6, 11, 12); + G(ctx->s, 2, 7, 8, 13); + G(ctx->s, 3, 4, 9, 14); + } + +#undef G +#undef P +} + +static void init(norx32_ctx *ctx, + const uint8_t key[16], + const uint8_t nonce[8]) +{ + /* 1. Basic setup */ + ctx->s[0] = read32_le(nonce + 0); + ctx->s[1] = read32_le(nonce + 4); + ctx->s[2] = 0xb707322f; + ctx->s[3] = 0xa0c7c90d; + + ctx->s[4] = read32_le(key + 0); + ctx->s[5] = read32_le(key + 4); + ctx->s[6] = read32_le(key + 8); + ctx->s[7] = read32_le(key + 12); + + ctx->s[8] = 0xa3d8d930; + ctx->s[9] = 0x3fa8b72c; + ctx->s[10] = 0xed84eb49; + ctx->s[11] = 0xedca4787; + + ctx->s[12] = 0x335463eb; + ctx->s[13] = 0xf994220b; + ctx->s[14] = 0xbe0bf5c9; + ctx->s[15] = 0xd7c49104; + + /* 2. Parameter integration + * w = 32 + * l = 4 + * p = 1 + * t = 128 + */ + ctx->s[12] ^= WORD_BITS; + ctx->s[13] ^= ROUNDS; + ctx->s[14] ^= DEGREE; + ctx->s[15] ^= TAG_BITS; + + permute(ctx); +} + +/* Input domain separation constant for next step, and final permutation of + * preceeding step. */ +static void switch_domain(norx32_ctx *ctx, uint32_t constant) +{ + ctx->s[15] ^= constant; + permute(ctx); +} + +typedef struct +{ + norx32_ctx *ctx; + uint32_t type; +} blockctx; + +static void input_block_final(void *vctx, const uint8_t *data) +{ + blockctx *bctx = vctx; + norx32_ctx *ctx = bctx->ctx; + + /* just xor-in data. */ + for (int i = 0; i < RATE_WORDS; i++) + { + ctx->s[i] ^= read32_le(data); + data += WORD_BYTES; + } +} + +static void input_block(void *vctx, const uint8_t *data) +{ + /* Process block, then prepare for the next one. */ + blockctx *bctx = vctx; + input_block_final(vctx, data); + switch_domain(bctx->ctx, bctx->type); +} + +static void input(norx32_ctx *ctx, uint32_t type, + const uint8_t *buf, size_t nbuf) +{ + uint8_t partial[RATE_BYTES]; + size_t npartial = 0; + blockctx bctx = { ctx, type }; + + /* Process input. */ + cf_blockwise_accumulate(partial, &npartial, sizeof partial, + buf, nbuf, + input_block, + &bctx); + + /* Now pad partial. This contains the trailing portion of buf. */ + memset(partial + npartial, 0, sizeof(partial) - npartial); + partial[npartial] = 0x01; + partial[sizeof(partial) - 1] ^= 0x80; + + input_block_final(&bctx, partial); +} + +static void do_header(norx32_ctx *ctx, const uint8_t *buf, size_t nbuf) +{ + if (nbuf) + { + switch_domain(ctx, DOMAIN_HEADER); + input(ctx, DOMAIN_HEADER, buf, nbuf); + } +} + +static void do_trailer(norx32_ctx *ctx, const uint8_t *buf, size_t nbuf) +{ + if (nbuf) + { + switch_domain(ctx, DOMAIN_TRAILER); + input(ctx, DOMAIN_TRAILER, buf, nbuf); + } +} + +static void body_block_encrypt(norx32_ctx *ctx, + const uint8_t plain[RATE_BYTES], + uint8_t cipher[RATE_BYTES]) +{ + for (int i = 0; i < RATE_WORDS; i++) + { + ctx->s[i] ^= read32_le(plain); + write32_le(ctx->s[i], cipher); + plain += WORD_BYTES; + cipher += WORD_BYTES; + } +} + +static void encrypt_body(norx32_ctx *ctx, + const uint8_t *plain, uint8_t *cipher, size_t nbytes) +{ + if (nbytes == 0) + return; + + /* Process full blocks: easy */ + while (nbytes >= RATE_BYTES) + { + switch_domain(ctx, DOMAIN_PAYLOAD); + body_block_encrypt(ctx, plain, cipher); + plain += RATE_BYTES; + cipher += RATE_BYTES; + nbytes -= RATE_BYTES; + } + + /* Final padded block. */ + uint8_t partial[RATE_BYTES]; + memset(partial, 0, sizeof partial); + memcpy(partial, plain, nbytes); + partial[nbytes] ^= 0x01; + partial[sizeof(partial) - 1] ^= 0x80; + + switch_domain(ctx, DOMAIN_PAYLOAD); + body_block_encrypt(ctx, partial, partial); + + memcpy(cipher, partial, nbytes); +} + +static void body_block_decrypt(norx32_ctx *ctx, + const uint8_t cipher[RATE_BYTES], + uint8_t plain[RATE_BYTES], + size_t start, size_t end) +{ + for (size_t i = start; i < end; i++) + { + uint32_t ct = read32_le(cipher); + write32_le(ctx->s[i] ^ ct, plain); + ctx->s[i] = ct; + plain += WORD_BYTES; + cipher += WORD_BYTES; + } +} + +static void undo_padding(norx32_ctx *ctx, size_t bytes) +{ + assert(bytes < RATE_BYTES); + ctx->s[bytes / WORD_BYTES] ^= 0x01 << ((bytes % WORD_BYTES) * 8); + ctx->s[RATE_WORDS - 1] ^= 0x80000000; +} + +static void decrypt_body(norx32_ctx *ctx, + const uint8_t *cipher, uint8_t *plain, size_t nbytes) +{ + if (nbytes == 0) + return; + + /* Process full blocks. */ + while (nbytes >= RATE_BYTES) + { + switch_domain(ctx, DOMAIN_PAYLOAD); + body_block_decrypt(ctx, cipher, plain, 0, RATE_WORDS); + plain += RATE_BYTES; + cipher += RATE_BYTES; + nbytes -= RATE_BYTES; + } + + /* Then partial blocks. */ + size_t offset = 0; + switch_domain(ctx, DOMAIN_PAYLOAD); + + undo_padding(ctx, nbytes); + + /* In units of whole words. */ + while (nbytes >= WORD_BYTES) + { + body_block_decrypt(ctx, cipher, plain, offset, offset + 1); + plain += WORD_BYTES; + cipher += WORD_BYTES; + nbytes -= WORD_BYTES; + offset += 1; + } + + /* And then, finally, bytewise. */ + uint8_t tmp[WORD_BYTES]; + write32_le(ctx->s[offset], tmp); + + for (size_t i = 0; i < nbytes; i++) + { + uint8_t c = cipher[i]; + plain[i] = tmp[i] ^ c; + tmp[i] = c; + } + + ctx->s[offset] = read32_le(tmp); +} + +static void get_tag(norx32_ctx *ctx, uint8_t tag[16]) +{ + switch_domain(ctx, DOMAIN_TAG); + permute(ctx); + write32_le(ctx->s[0], tag + 0); + write32_le(ctx->s[1], tag + 4); + write32_le(ctx->s[2], tag + 8); + write32_le(ctx->s[3], tag + 12); +} + +void cf_norx32_encrypt(const uint8_t key[16], + const uint8_t nonce[8], + const uint8_t *header, size_t nheader, + const uint8_t *plaintext, size_t nbytes, + const uint8_t *trailer, size_t ntrailer, + uint8_t *ciphertext, + uint8_t tag[16]) +{ + norx32_ctx ctx; + + init(&ctx, key, nonce); + do_header(&ctx, header, nheader); + encrypt_body(&ctx, plaintext, ciphertext, nbytes); + do_trailer(&ctx, trailer, ntrailer); + get_tag(&ctx, tag); + + mem_clean(&ctx, sizeof ctx); +} + +int cf_norx32_decrypt(const uint8_t key[16], + const uint8_t nonce[8], + const uint8_t *header, size_t nheader, + const uint8_t *ciphertext, size_t nbytes, + const uint8_t *trailer, size_t ntrailer, + const uint8_t tag[16], + uint8_t *plaintext) +{ + norx32_ctx ctx; + uint8_t ourtag[16]; + + init(&ctx, key, nonce); + do_header(&ctx, header, nheader); + decrypt_body(&ctx, ciphertext, plaintext, nbytes); + do_trailer(&ctx, trailer, ntrailer); + get_tag(&ctx, ourtag); + + int err = 0; + + if (!mem_eq(ourtag, tag, sizeof ourtag)) + { + err = 1; + mem_clean(plaintext, nbytes); + mem_clean(ourtag, sizeof ourtag); + } + + mem_clean(&ctx, sizeof ctx); + return err; +} |