From 6bf0a5cb5034a7e684dcc3500e841785237ce2dd Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Apr 2024 19:32:43 +0200 Subject: Adding upstream version 1:115.7.0. Signed-off-by: Daniel Baumann --- comm/third_party/libgcrypt/cipher/ChangeLog-2011 | 4279 ++++++++++++++++++++ comm/third_party/libgcrypt/cipher/Makefile.am | 258 ++ comm/third_party/libgcrypt/cipher/Makefile.in | 1445 +++++++ comm/third_party/libgcrypt/cipher/arcfour-amd64.S | 108 + comm/third_party/libgcrypt/cipher/arcfour.c | 216 + .../libgcrypt/cipher/asm-common-aarch64.h | 104 + .../libgcrypt/cipher/asm-common-amd64.h | 189 + .../libgcrypt/cipher/asm-common-s390x.h | 90 + .../libgcrypt/cipher/asm-inline-s390x.h | 157 + .../libgcrypt/cipher/asm-poly1305-aarch64.h | 245 ++ .../libgcrypt/cipher/asm-poly1305-amd64.h | 171 + .../libgcrypt/cipher/asm-poly1305-s390x.h | 140 + comm/third_party/libgcrypt/cipher/bithelp.h | 123 + comm/third_party/libgcrypt/cipher/blake2.c | 996 +++++ .../libgcrypt/cipher/blake2b-amd64-avx2.S | 300 ++ .../libgcrypt/cipher/blake2s-amd64-avx.S | 278 ++ comm/third_party/libgcrypt/cipher/blowfish-amd64.S | 601 +++ comm/third_party/libgcrypt/cipher/blowfish-arm.S | 743 ++++ comm/third_party/libgcrypt/cipher/blowfish.c | 1142 ++++++ comm/third_party/libgcrypt/cipher/bufhelp.h | 385 ++ .../libgcrypt/cipher/camellia-aarch64.S | 586 +++ .../libgcrypt/cipher/camellia-aesni-avx-amd64.S | 2618 ++++++++++++ .../libgcrypt/cipher/camellia-aesni-avx2-amd64.S | 1782 ++++++++ comm/third_party/libgcrypt/cipher/camellia-arm.S | 626 +++ comm/third_party/libgcrypt/cipher/camellia-glue.c | 1097 +++++ comm/third_party/libgcrypt/cipher/camellia.c | 1413 +++++++ comm/third_party/libgcrypt/cipher/camellia.h | 95 + comm/third_party/libgcrypt/cipher/cast5-amd64.S | 663 +++ comm/third_party/libgcrypt/cipher/cast5-arm.S | 728 ++++ comm/third_party/libgcrypt/cipher/cast5.c | 1238 ++++++ .../libgcrypt/cipher/chacha20-aarch64.S | 648 +++ .../libgcrypt/cipher/chacha20-amd64-avx2.S | 601 +++ .../libgcrypt/cipher/chacha20-amd64-ssse3.S | 1012 +++++ .../libgcrypt/cipher/chacha20-armv7-neon.S | 393 ++ comm/third_party/libgcrypt/cipher/chacha20-ppc.c | 646 +++ comm/third_party/libgcrypt/cipher/chacha20-s390x.S | 1561 +++++++ comm/third_party/libgcrypt/cipher/chacha20.c | 1306 ++++++ comm/third_party/libgcrypt/cipher/cipher-aeswrap.c | 209 + comm/third_party/libgcrypt/cipher/cipher-cbc.c | 292 ++ comm/third_party/libgcrypt/cipher/cipher-ccm.c | 415 ++ comm/third_party/libgcrypt/cipher/cipher-cfb.c | 317 ++ comm/third_party/libgcrypt/cipher/cipher-cmac.c | 292 ++ comm/third_party/libgcrypt/cipher/cipher-ctr.c | 120 + comm/third_party/libgcrypt/cipher/cipher-eax.c | 289 ++ .../libgcrypt/cipher/cipher-gcm-armv7-neon.S | 341 ++ .../libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S | 433 ++ .../libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S | 424 ++ .../libgcrypt/cipher/cipher-gcm-intel-pclmul.c | 712 ++++ comm/third_party/libgcrypt/cipher/cipher-gcm.c | 1207 ++++++ .../third_party/libgcrypt/cipher/cipher-internal.h | 809 ++++ comm/third_party/libgcrypt/cipher/cipher-ocb.c | 761 ++++ comm/third_party/libgcrypt/cipher/cipher-ofb.c | 108 + .../third_party/libgcrypt/cipher/cipher-poly1305.c | 375 ++ .../third_party/libgcrypt/cipher/cipher-selftest.c | 512 +++ .../third_party/libgcrypt/cipher/cipher-selftest.h | 69 + comm/third_party/libgcrypt/cipher/cipher-xts.c | 189 + comm/third_party/libgcrypt/cipher/cipher.c | 1767 ++++++++ .../libgcrypt/cipher/crc-armv8-aarch64-ce.S | 497 +++ comm/third_party/libgcrypt/cipher/crc-armv8-ce.c | 229 ++ .../libgcrypt/cipher/crc-intel-pclmul.c | 939 +++++ comm/third_party/libgcrypt/cipher/crc-ppc.c | 656 +++ comm/third_party/libgcrypt/cipher/crc.c | 955 +++++ comm/third_party/libgcrypt/cipher/des-amd64.S | 1111 +++++ comm/third_party/libgcrypt/cipher/des.c | 1507 +++++++ comm/third_party/libgcrypt/cipher/dsa-common.c | 418 ++ comm/third_party/libgcrypt/cipher/dsa.c | 1394 +++++++ comm/third_party/libgcrypt/cipher/ecc-common.h | 140 + comm/third_party/libgcrypt/cipher/ecc-curves.c | 1603 ++++++++ comm/third_party/libgcrypt/cipher/ecc-ecdh.c | 127 + comm/third_party/libgcrypt/cipher/ecc-ecdsa.c | 248 ++ comm/third_party/libgcrypt/cipher/ecc-eddsa.c | 1182 ++++++ comm/third_party/libgcrypt/cipher/ecc-gost.c | 218 + comm/third_party/libgcrypt/cipher/ecc-misc.c | 438 ++ comm/third_party/libgcrypt/cipher/ecc-sm2.c | 569 +++ comm/third_party/libgcrypt/cipher/ecc.c | 1779 ++++++++ comm/third_party/libgcrypt/cipher/elgamal.c | 1149 ++++++ comm/third_party/libgcrypt/cipher/gost-s-box.c | 266 ++ comm/third_party/libgcrypt/cipher/gost.h | 34 + comm/third_party/libgcrypt/cipher/gost28147.c | 553 +++ comm/third_party/libgcrypt/cipher/gostr3411-94.c | 383 ++ comm/third_party/libgcrypt/cipher/hash-common.c | 193 + comm/third_party/libgcrypt/cipher/hash-common.h | 62 + comm/third_party/libgcrypt/cipher/idea.c | 382 ++ comm/third_party/libgcrypt/cipher/kdf-internal.h | 40 + comm/third_party/libgcrypt/cipher/kdf.c | 503 +++ .../libgcrypt/cipher/keccak-armv7-neon.S | 945 +++++ comm/third_party/libgcrypt/cipher/keccak.c | 1577 ++++++++ .../libgcrypt/cipher/keccak_permute_32.h | 536 +++ .../libgcrypt/cipher/keccak_permute_64.h | 385 ++ comm/third_party/libgcrypt/cipher/mac-cmac.c | 524 +++ comm/third_party/libgcrypt/cipher/mac-gmac.c | 187 + comm/third_party/libgcrypt/cipher/mac-hmac.c | 1495 +++++++ comm/third_party/libgcrypt/cipher/mac-internal.h | 275 ++ comm/third_party/libgcrypt/cipher/mac-poly1305.c | 364 ++ comm/third_party/libgcrypt/cipher/mac.c | 808 ++++ comm/third_party/libgcrypt/cipher/md.c | 1639 ++++++++ comm/third_party/libgcrypt/cipher/md4.c | 296 ++ comm/third_party/libgcrypt/cipher/md5.c | 322 ++ .../libgcrypt/cipher/poly1305-internal.h | 64 + comm/third_party/libgcrypt/cipher/poly1305-s390x.S | 87 + comm/third_party/libgcrypt/cipher/poly1305.c | 740 ++++ comm/third_party/libgcrypt/cipher/primegen.c | 1878 +++++++++ .../third_party/libgcrypt/cipher/pubkey-internal.h | 105 + comm/third_party/libgcrypt/cipher/pubkey-util.c | 1160 ++++++ comm/third_party/libgcrypt/cipher/pubkey.c | 970 +++++ comm/third_party/libgcrypt/cipher/rfc2268.c | 378 ++ .../libgcrypt/cipher/rijndael-aarch64.S | 514 +++ comm/third_party/libgcrypt/cipher/rijndael-aesni.c | 3965 ++++++++++++++++++ comm/third_party/libgcrypt/cipher/rijndael-amd64.S | 477 +++ comm/third_party/libgcrypt/cipher/rijndael-arm.S | 581 +++ .../libgcrypt/cipher/rijndael-armv8-aarch32-ce.S | 1867 +++++++++ .../libgcrypt/cipher/rijndael-armv8-aarch64-ce.S | 1613 ++++++++ .../libgcrypt/cipher/rijndael-armv8-ce.c | 414 ++ .../libgcrypt/cipher/rijndael-internal.h | 194 + .../libgcrypt/cipher/rijndael-padlock.c | 110 + .../libgcrypt/cipher/rijndael-ppc-common.h | 342 ++ .../libgcrypt/cipher/rijndael-ppc-functions.h | 2020 +++++++++ comm/third_party/libgcrypt/cipher/rijndael-ppc.c | 259 ++ .../third_party/libgcrypt/cipher/rijndael-ppc9le.c | 102 + comm/third_party/libgcrypt/cipher/rijndael-s390x.c | 1155 ++++++ .../libgcrypt/cipher/rijndael-ssse3-amd64-asm.S | 874 ++++ .../libgcrypt/cipher/rijndael-ssse3-amd64.c | 743 ++++ .../third_party/libgcrypt/cipher/rijndael-tables.h | 227 ++ comm/third_party/libgcrypt/cipher/rijndael.c | 2032 ++++++++++ comm/third_party/libgcrypt/cipher/rmd160.c | 529 +++ comm/third_party/libgcrypt/cipher/rsa-common.c | 1038 +++++ comm/third_party/libgcrypt/cipher/rsa.c | 2035 ++++++++++ comm/third_party/libgcrypt/cipher/salsa20-amd64.S | 940 +++++ .../libgcrypt/cipher/salsa20-armv7-neon.S | 899 ++++ comm/third_party/libgcrypt/cipher/salsa20.c | 600 +++ comm/third_party/libgcrypt/cipher/scrypt.c | 322 ++ comm/third_party/libgcrypt/cipher/seed.c | 478 +++ .../libgcrypt/cipher/serpent-armv7-neon.S | 1124 +++++ .../libgcrypt/cipher/serpent-avx2-amd64.S | 1160 ++++++ .../libgcrypt/cipher/serpent-sse2-amd64.S | 1211 ++++++ comm/third_party/libgcrypt/cipher/serpent.c | 1807 +++++++++ .../third_party/libgcrypt/cipher/sha1-armv7-neon.S | 526 +++ .../libgcrypt/cipher/sha1-armv8-aarch32-ce.S | 220 + .../libgcrypt/cipher/sha1-armv8-aarch64-ce.S | 201 + comm/third_party/libgcrypt/cipher/sha1-avx-amd64.S | 429 ++ .../libgcrypt/cipher/sha1-avx-bmi2-amd64.S | 441 ++ .../libgcrypt/cipher/sha1-avx2-bmi2-amd64.S | 573 +++ .../libgcrypt/cipher/sha1-intel-shaext.c | 292 ++ .../libgcrypt/cipher/sha1-ssse3-amd64.S | 437 ++ comm/third_party/libgcrypt/cipher/sha1.c | 765 ++++ comm/third_party/libgcrypt/cipher/sha1.h | 47 + .../libgcrypt/cipher/sha256-armv8-aarch32-ce.S | 231 ++ .../libgcrypt/cipher/sha256-armv8-aarch64-ce.S | 215 + .../libgcrypt/cipher/sha256-avx-amd64.S | 506 +++ .../libgcrypt/cipher/sha256-avx2-bmi2-amd64.S | 527 +++ .../libgcrypt/cipher/sha256-intel-shaext.c | 363 ++ comm/third_party/libgcrypt/cipher/sha256-ppc.c | 795 ++++ .../libgcrypt/cipher/sha256-ssse3-amd64.S | 528 +++ comm/third_party/libgcrypt/cipher/sha256.c | 857 ++++ comm/third_party/libgcrypt/cipher/sha512-arm.S | 464 +++ .../libgcrypt/cipher/sha512-armv7-neon.S | 450 ++ .../libgcrypt/cipher/sha512-avx-amd64.S | 461 +++ .../libgcrypt/cipher/sha512-avx2-bmi2-amd64.S | 502 +++ comm/third_party/libgcrypt/cipher/sha512-ppc.c | 969 +++++ .../libgcrypt/cipher/sha512-ssse3-amd64.S | 467 +++ .../libgcrypt/cipher/sha512-ssse3-i386.c | 404 ++ comm/third_party/libgcrypt/cipher/sha512.c | 1316 ++++++ comm/third_party/libgcrypt/cipher/sm3.c | 473 +++ .../libgcrypt/cipher/sm4-aesni-avx-amd64.S | 987 +++++ .../libgcrypt/cipher/sm4-aesni-avx2-amd64.S | 851 ++++ comm/third_party/libgcrypt/cipher/sm4.c | 1251 ++++++ comm/third_party/libgcrypt/cipher/stribog.c | 1362 +++++++ comm/third_party/libgcrypt/cipher/tiger.c | 860 ++++ .../third_party/libgcrypt/cipher/twofish-aarch64.S | 321 ++ comm/third_party/libgcrypt/cipher/twofish-amd64.S | 1184 ++++++ comm/third_party/libgcrypt/cipher/twofish-arm.S | 363 ++ .../libgcrypt/cipher/twofish-avx2-amd64.S | 1048 +++++ comm/third_party/libgcrypt/cipher/twofish.c | 1793 ++++++++ .../libgcrypt/cipher/whirlpool-sse2-amd64.S | 348 ++ comm/third_party/libgcrypt/cipher/whirlpool.c | 1535 +++++++ 175 files changed, 125928 insertions(+) create mode 100644 comm/third_party/libgcrypt/cipher/ChangeLog-2011 create mode 100644 comm/third_party/libgcrypt/cipher/Makefile.am create mode 100644 comm/third_party/libgcrypt/cipher/Makefile.in create mode 100644 comm/third_party/libgcrypt/cipher/arcfour-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/arcfour.c create mode 100644 comm/third_party/libgcrypt/cipher/asm-common-aarch64.h create mode 100644 comm/third_party/libgcrypt/cipher/asm-common-amd64.h create mode 100644 comm/third_party/libgcrypt/cipher/asm-common-s390x.h create mode 100644 comm/third_party/libgcrypt/cipher/asm-inline-s390x.h create mode 100644 comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h create mode 100644 comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h create mode 100644 comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h create mode 100644 comm/third_party/libgcrypt/cipher/bithelp.h create mode 100644 comm/third_party/libgcrypt/cipher/blake2.c create mode 100644 comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S create mode 100644 comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S create mode 100644 comm/third_party/libgcrypt/cipher/blowfish-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/blowfish-arm.S create mode 100644 comm/third_party/libgcrypt/cipher/blowfish.c create mode 100644 comm/third_party/libgcrypt/cipher/bufhelp.h create mode 100644 comm/third_party/libgcrypt/cipher/camellia-aarch64.S create mode 100644 comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/camellia-arm.S create mode 100644 comm/third_party/libgcrypt/cipher/camellia-glue.c create mode 100644 comm/third_party/libgcrypt/cipher/camellia.c create mode 100644 comm/third_party/libgcrypt/cipher/camellia.h create mode 100644 comm/third_party/libgcrypt/cipher/cast5-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/cast5-arm.S create mode 100644 comm/third_party/libgcrypt/cipher/cast5.c create mode 100644 comm/third_party/libgcrypt/cipher/chacha20-aarch64.S create mode 100644 comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S create mode 100644 comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S create mode 100644 comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/chacha20-ppc.c create mode 100644 comm/third_party/libgcrypt/cipher/chacha20-s390x.S create mode 100644 comm/third_party/libgcrypt/cipher/chacha20.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-aeswrap.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-cbc.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-ccm.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-cfb.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-cmac.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-ctr.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-eax.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-gcm.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-internal.h create mode 100644 comm/third_party/libgcrypt/cipher/cipher-ocb.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-ofb.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-poly1305.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-selftest.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher-selftest.h create mode 100644 comm/third_party/libgcrypt/cipher/cipher-xts.c create mode 100644 comm/third_party/libgcrypt/cipher/cipher.c create mode 100644 comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/crc-armv8-ce.c create mode 100644 comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c create mode 100644 comm/third_party/libgcrypt/cipher/crc-ppc.c create mode 100644 comm/third_party/libgcrypt/cipher/crc.c create mode 100644 comm/third_party/libgcrypt/cipher/des-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/des.c create mode 100644 comm/third_party/libgcrypt/cipher/dsa-common.c create mode 100644 comm/third_party/libgcrypt/cipher/dsa.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-common.h create mode 100644 comm/third_party/libgcrypt/cipher/ecc-curves.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-ecdh.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-ecdsa.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-eddsa.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-gost.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-misc.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc-sm2.c create mode 100644 comm/third_party/libgcrypt/cipher/ecc.c create mode 100644 comm/third_party/libgcrypt/cipher/elgamal.c create mode 100644 comm/third_party/libgcrypt/cipher/gost-s-box.c create mode 100644 comm/third_party/libgcrypt/cipher/gost.h create mode 100644 comm/third_party/libgcrypt/cipher/gost28147.c create mode 100644 comm/third_party/libgcrypt/cipher/gostr3411-94.c create mode 100644 comm/third_party/libgcrypt/cipher/hash-common.c create mode 100644 comm/third_party/libgcrypt/cipher/hash-common.h create mode 100644 comm/third_party/libgcrypt/cipher/idea.c create mode 100644 comm/third_party/libgcrypt/cipher/kdf-internal.h create mode 100644 comm/third_party/libgcrypt/cipher/kdf.c create mode 100644 comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/keccak.c create mode 100644 comm/third_party/libgcrypt/cipher/keccak_permute_32.h create mode 100644 comm/third_party/libgcrypt/cipher/keccak_permute_64.h create mode 100644 comm/third_party/libgcrypt/cipher/mac-cmac.c create mode 100644 comm/third_party/libgcrypt/cipher/mac-gmac.c create mode 100644 comm/third_party/libgcrypt/cipher/mac-hmac.c create mode 100644 comm/third_party/libgcrypt/cipher/mac-internal.h create mode 100644 comm/third_party/libgcrypt/cipher/mac-poly1305.c create mode 100644 comm/third_party/libgcrypt/cipher/mac.c create mode 100644 comm/third_party/libgcrypt/cipher/md.c create mode 100644 comm/third_party/libgcrypt/cipher/md4.c create mode 100644 comm/third_party/libgcrypt/cipher/md5.c create mode 100644 comm/third_party/libgcrypt/cipher/poly1305-internal.h create mode 100644 comm/third_party/libgcrypt/cipher/poly1305-s390x.S create mode 100644 comm/third_party/libgcrypt/cipher/poly1305.c create mode 100644 comm/third_party/libgcrypt/cipher/primegen.c create mode 100644 comm/third_party/libgcrypt/cipher/pubkey-internal.h create mode 100644 comm/third_party/libgcrypt/cipher/pubkey-util.c create mode 100644 comm/third_party/libgcrypt/cipher/pubkey.c create mode 100644 comm/third_party/libgcrypt/cipher/rfc2268.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-aarch64.S create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-aesni.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-arm.S create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch32-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-armv8-aarch64-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-armv8-ce.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-internal.h create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-padlock.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-ppc-common.h create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-ppc-functions.h create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-ppc.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-ppc9le.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-s390x.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64-asm.S create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-ssse3-amd64.c create mode 100644 comm/third_party/libgcrypt/cipher/rijndael-tables.h create mode 100644 comm/third_party/libgcrypt/cipher/rijndael.c create mode 100644 comm/third_party/libgcrypt/cipher/rmd160.c create mode 100644 comm/third_party/libgcrypt/cipher/rsa-common.c create mode 100644 comm/third_party/libgcrypt/cipher/rsa.c create mode 100644 comm/third_party/libgcrypt/cipher/salsa20-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/salsa20-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/salsa20.c create mode 100644 comm/third_party/libgcrypt/cipher/scrypt.c create mode 100644 comm/third_party/libgcrypt/cipher/seed.c create mode 100644 comm/third_party/libgcrypt/cipher/serpent-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/serpent-avx2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/serpent-sse2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/serpent.c create mode 100644 comm/third_party/libgcrypt/cipher/sha1-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1-armv8-aarch32-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1-armv8-aarch64-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1-avx-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1-avx-bmi2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1-avx2-bmi2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1-intel-shaext.c create mode 100644 comm/third_party/libgcrypt/cipher/sha1-ssse3-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha1.c create mode 100644 comm/third_party/libgcrypt/cipher/sha1.h create mode 100644 comm/third_party/libgcrypt/cipher/sha256-armv8-aarch32-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/sha256-armv8-aarch64-ce.S create mode 100644 comm/third_party/libgcrypt/cipher/sha256-avx-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha256-avx2-bmi2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha256-intel-shaext.c create mode 100644 comm/third_party/libgcrypt/cipher/sha256-ppc.c create mode 100644 comm/third_party/libgcrypt/cipher/sha256-ssse3-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha256.c create mode 100644 comm/third_party/libgcrypt/cipher/sha512-arm.S create mode 100644 comm/third_party/libgcrypt/cipher/sha512-armv7-neon.S create mode 100644 comm/third_party/libgcrypt/cipher/sha512-avx-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha512-avx2-bmi2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha512-ppc.c create mode 100644 comm/third_party/libgcrypt/cipher/sha512-ssse3-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sha512-ssse3-i386.c create mode 100644 comm/third_party/libgcrypt/cipher/sha512.c create mode 100644 comm/third_party/libgcrypt/cipher/sm3.c create mode 100644 comm/third_party/libgcrypt/cipher/sm4-aesni-avx-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sm4-aesni-avx2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/sm4.c create mode 100644 comm/third_party/libgcrypt/cipher/stribog.c create mode 100644 comm/third_party/libgcrypt/cipher/tiger.c create mode 100644 comm/third_party/libgcrypt/cipher/twofish-aarch64.S create mode 100644 comm/third_party/libgcrypt/cipher/twofish-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/twofish-arm.S create mode 100644 comm/third_party/libgcrypt/cipher/twofish-avx2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/twofish.c create mode 100644 comm/third_party/libgcrypt/cipher/whirlpool-sse2-amd64.S create mode 100644 comm/third_party/libgcrypt/cipher/whirlpool.c (limited to 'comm/third_party/libgcrypt/cipher') diff --git a/comm/third_party/libgcrypt/cipher/ChangeLog-2011 b/comm/third_party/libgcrypt/cipher/ChangeLog-2011 new file mode 100644 index 0000000000..1ce6bd1e68 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ChangeLog-2011 @@ -0,0 +1,4279 @@ +2011-12-01 Werner Koch + + NB: ChangeLog files are no longer manually maintained. Starting + on December 1st, 2011 we put change information only in the GIT + commit log, and generate a top-level ChangeLog file from logs at + "make dist". See doc/HACKING for details. + +2011-09-16 Werner Koch + + * primegen.c (_gcry_primegen_init): New. + +2011-09-15 Werner Koch + + * cipher-cbc.c, cipher-cfb.c, cipher-ofb.c, cipher-ctr.c: New. + * cipher-aeswrap.c: New. + * cipher-internal.h: New. + * cipher.c (cipher_context_alignment_t, struct gcry_cipher_handle) + (CTX_MAGIC_NORMAL, CTX_MAGIC_SECURE, NEED_16BYTE_ALIGNED_CONTEXT) + (MAX_BLOCKSIZE): Move to cipher-internal.h. + (do_aeswrap_encrypt, do_aeswrap_encrypt) + (do_cbc_encrypt, do_cbc_decrypt, do_ctr_encrypt, do_ctr_decrypt) + (do_ofb_encrypt, do_ofb_decrypt, do_ctr_encrypt): Move to the + respective new cipher-foo.c files. + (do_ctr_decrypt): Remove. + +2011-09-15 Werner Koch + + * pubkey.c (gcry_pk_list): Remove. + (gcry_pk_unregister): Remove. + * md.c (gcry_md_list): Remove. + (gcry_md_unregister): Remove. + * cipher.c (gcry_cipher_list): Remove. + (gcry_cipher_unregister): Remove. + * ac.c: Remove. + +2011-06-29 Werner Koch + + * cipher.c (cipher_get_keylen): Return zero for an invalid algorithm. + (cipher_get_blocksize): Ditto. + +2011-06-13 Werner Koch + + * dsa.c (selftest_sign_1024): Use the raw and not the pkcs1 flag. + + * pubkey.c (gcry_pk_sign): Special case output generation for PKCS1. + (sexp_data_to_mpi): Parse "random-override" for pkcs1 encryption. + (pkcs1_encode_for_encryption): Add args RANDOM_OVERRIDE and + RANDOM_OVERRIDE_LEN. + (gcry_pk_encrypt): Special case output generation for PKCS1. + (sexp_data_to_mpi): Use GCRYMPI_FMT_USG for raw encoding. + +2011-06-10 Werner Koch + + * pubkey.c (gcry_pk_sign): Use format specifier '%M' to avoid + leading zeroes. Special case output generation for PSS. + (gcry_pk_encrypt): Special case output generation for OAEP. + (sexp_data_to_mpi): Use GCRYMPI_FMT_USG for PSS verify. + +2011-06-09 Werner Koch + + * pubkey.c (oaep_decode): Make use of octet_string_from_mpi. + (sexp_to_enc): Skip "random-override". + + * pubkey.c (oaep_encode, pss_encode): Add args RANDOM_OVERRIDE and + RANDOM_OVERRIDE_LEN. + (sexp_data_to_mpi): Extract new random-override parameter. + + * pubkey.c (pss_encode, pss_verify): Use VALUE verbatim for MHASH. + (octet_string_from_mpi): Add arg SPACE. + +2011-06-08 Werner Koch + + * pubkey.c (pss_encode, pss_verify): Restructure and comment code + to match rfc-3447. Replace secure allocs by plain allocs and + wipememory. Use gcry_md_hash_buffer. + (octet_string_from_mpi): New. + +2011-06-03 Werner Koch + + * pubkey.c (oaep_decode): Add more comments and restructure to + match the description in RFC-3447. + (oaep_encode): Check for mgf1 error. s/dlen/hlen/. + +2011-05-31 Werner Koch + + * pubkey.c (mgf1): Optimize by using gcry_md_reset. Re-implement + for easier readability. + (oaep_encode): Add more comments and restructure to match the + description in RFC-3447. + + * pubkey.c (pkcs1_encode_for_signature, oaep_decode): Change + return value from one MPI to a buffer. + (gcry_pk_decrypt): Adjust for this change. + +2011-05-30 Werner Koch + + * pubkey.c (pkcs1_decode_for_encryption): Change handling of + leading zero byte. + +2011-05-27 Daiki Ueno + + * pubkey.c (gcry_pk_decrypt): Fix double-free when un-padding + invalid data. Thanks to Tom Ritter. + +2011-05-24 Daiki Ueno + + * rsa.c (rsa_verify): Use CMP if given, to check the decrypted + sig. + + * pubkey.c (sexp_to_enc, sexp_data_to_mpi): Factor out + CTX initialization to ... + (init_encoding_ctx): .. new. + (gcry_pk_verify): Pass verify func and the arg to pubkey_verify. + (pss_encode, pss_verify, pss_verify_cmp): New. + +2011-05-23 Daiki Ueno + + * pubkey.c (pkcs1_decode_for_encryption, oaep_decode): Fix memleak + when gcry_mpi_print fails. + +2011-05-18 Daiki Ueno + + * pubkey.c (sexp_data_to_mpi): Factor some code out to ... + (pkcs1_encode_for_encryption): .. new, + (pkcs1_encode_for_signature): .. new. + (pkcs1_decode_for_encryption): New. + (gcry_pk_decrypt): Do un-padding for PKCS#1 as well as OAEP. + (sexp_to_enc): Abolish "unpad" flag, which is not necessary since + we can do un-padding implicitly when "pkcs1" or "oaep" is given. + +2011-05-11 Werner Koch + + * pubkey.c (sexp_to_enc, sexp_data_to_mpi): Set LABEL to NULL + after free. + (sexp_to_enc, sexp_data_to_mpi): Do not allow multiple encoding + flags. + (oaep_encode, oaep_decode, sexp_to_key, sexp_to_sig) + (sexp_to_enc, sexp_data_to_mpi, gcry_pk_encrypt, gcry_pk_sign) + (gcry_pk_genkey, _gcry_pk_get_elements): Replace access to ERRNO + by gpg_err_code_from_syserror. + +2011-05-11 Daiki Ueno + + * pubkey.c (sexp_data_to_mpi): Factor some code out to ... + (get_hash_algo): .. new. + (mgf1, oaep_encode, oaep_decode): New. + (sexp_to_enc): Add arg CTX. Remove arg RET_WANT_PKCS1. Support + OAEP. + (sexp_data_to_mpi): Add arg CTX. Support OAEP. + (gcry_pk_encrypt): Pass a CTX to sexp_data_to_mpi. + (gcry_pk_decrypt): Pass a CTX tp sexp_to_enc and replace + WANT_PKCS1. Implement unpadding for OAEP. + (gcry_pk_sign): Pass NULL for CTX arg of sexp_data_to_mpi. + (gcry_pk_verify): Ditto. + +2011-04-19 Werner Koch + + * cipher.c (gcry_cipher_open): Replace gpg_err_code_from_errno by + gpg_err_code_from_syserror. + +2011-04-11 Werner Koch + + * pubkey.c (gcry_pk_get_keygrip): Avoid double free of L2. + + * cipher.c (_gcry_cipher_setctr): Clear unused lastiv info. + (gcry_cipher_ctl) : Implement by calling + _gcry_cipher_setctr. + (do_ctr_encrypt): Save last counter and reuse it. + + * cipher.c (do_ctr_encrypt): Allow arbitrary length inputs to + match the 1.4 behaviour. + +2011-04-04 Werner Koch + + * ecc.c (compute_keygrip): Release L1 while parsing "curve". + + * pubkey.c (gcry_pk_get_keygrip): Always release NAME and L2. + Reported by Ben Kibbey. + +2011-03-28 Werner Koch + + * primegen.c (_gcry_generate_elg_prime): Make sure that PRIME is + NULL if the called func ever returns an error. + + * pubkey.c (gcry_pk_decrypt): Remove unused var PUBKEY. + +2011-03-09 Werner Koch + + * kdf.c: New. + +2011-02-22 Werner Koch + + * rijndael.c (aesni_cleanup_2_4): New. + (aesenc_xmm1_xmm0, do_aesni_ctr_4): New. + (_gcry_aes_ctr_enc): New. + * cipher.c (struct gcry_cipher_handle): Add CTR_ENC. Move field + CTR into an u_ctr union and adjust all users. + (gcry_cipher_open): Use _gcry_aes_ctr_enc. + (do_ctr_encrypt): Use bulk mode. + +2011-02-18 Werner Koch + + * rijndael.c (u32_a_t): New. + (do_encrypt_aligned, do_encrypt_aligned): Use the new type to + avoid problems with strict aliasing rules. + +2011-02-16 Werner Koch + + * rijndael.c (do_aesni_cfb) [USE_AESNI]: New. + (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec) [USE_AESNI]: Use new fucntion. + +2011-02-15 Werner Koch + + * rijndael.c (do_aesni_enc_aligned, do_aesni_dec_aligned): Use + movdqa for the key but keep using movdqu for the data. + (do_aesni): Remove alignment detection. Don't burn the stack. + (aesni_prepare, aesni_cleanup): New macros. + (rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc) + (rijndael_decrypt, _gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use + these macros. Don't burn the stack in the USE_AESNI case. + (do_setkey): Add disabled code to use aeskeygenassist. + +2011-02-14 Werner Koch + + * rijndael.c (ATTR_ALIGNED_16): New + (do_aesni): Do not copy if already aligned. + (do_encrypt, do_decrypt): Ditto. + (rijndael_decrypt, rijndael_encrypt): Increase stack burning amount. + + * rijndael.c (RIJNDAEL_context): Reorder fields. Change fieldname + ROUNDS to rounds. Move padlock_key into u1. + (keySched, keySched2): Rename macros to keyscherr and keyschdec + and change all users. + (padlockkey): New macro. Change all users of padlock_key. + * cipher.c (NEED_16BYTE_ALIGNED_CONTEXT): Always define if using gcc. + (struct gcry_cipher_handle): Align U_IV to at least 16 byte. + +2011-02-13 Werner Koch + + * rijndael.c (USE_AESNI): New. Define for ia32 and gcc >= 4. + (m128i_t) [USE_AESNI]: New. + (RIJNDAEL_context) [USE_AESNI]: Add field use_aesni. + (do_setkey): Set USE_AESNI for all key lengths. + (prepare_decryption) [USE_AESNI]: Use aesimc instn if requested. + (do_aesni_enc_aligned, do_aesni_dec_aligned) + (do_aesni) [USE_AESNI]: New. + (rijndael_encrypt, _gcry_aes_cfb_enc, _gcry_aes_cbc_enc) + (rijndael_decrypt, _gcry_aes_cfb_dec) + (_gcry_aes_cbc_dec) [USE_AESNI]: Use do_aesni. + +2011-02-01 Werner Koch + + * pubkey.c (gcry_pk_get_curve): New. + (sexp_to_key): Add arg OVERRIDE_ELEMS. + (sexp_elements_extract_ecc): Allow for params only. + (gcry_pk_get_param): New. + * ecc.c (ecc_get_curve): New. + (ecc_get_param_sexp): New. + +2011-01-28 Werner Koch + + * pubkey.c (gcry_pk_genkey): Hack to insert the used curve name. + +2011-01-27 Werner Koch + + * ecc.c (fill_in_curve): Remove. + (generate_curve): Rename to .. + (fill_in_curve): this. Remove setting of NAME_OID. + (ecc_encrypt_raw): Change name of arg DATA to K for better + readability. Use ECC_public_key instead of ECC_secret_key. + Require a caller to pass a complete pkey array. + (ecc_decrypt_raw): Require a caller to pass a complete skey array. + (elliptic_curve_t): Add field NAME. + (fill_in_curve): Set field. + (generate_key): Add arg R_USED_CURVE. + (ecc_generate_ext): Return used curve name. + +2011-01-13 Andrey Jivsov (wk) + + * ecc.c (ec2os): Do not free passed parameters X and Y. Adjust + callers. + (ecc_encrypt_raw, ecc_decrypt_raw): New. + (ecdh_names, _gcry_pubkey_spec_ecdh): New. + * pubkey.c (pubkey_table): Support ECDH. + +2010-08-19 Werner Koch + + * cipher.c (gcry_cipher_open): Remove double release of the module. + Fixes bug#1263. + +2010-06-10 Jeff Johnson (wk) + + * ecc.c (ecc_generate_ext): Parse transient-key flag. + (generate_key): Add arg TRANSIENT_KEY and use it to set the random + level. + +2010-04-12 Brad Hards (wk) + + Spelling fixes. + +2010-03-26 Werner Koch + + * tiger.c (asn): Unfetter the old TIGER from an OID. + (TIGER_CONTEXT): Add field VARIANT. + (tiger_init): Factor code out to ... + (do_init): New. + (tiger1_init, tiger2_init): New. + (_gcry_digest_spec_tiger1, _gcry_digest_spec_tiger2): New. + * md.c (digest_table): Add TIGER1 and TIGER2 variants. + +2009-12-11 Werner Koch + + * sha256.c (Cho, Maj, Sum0, Sum1): Turn macros into inline + functions. + (transform): Partly unroll to interweave the chain variables + + * sha512.c (ROTR, Ch, Maj, Sum0, Sum1): Turn macros into inline + functions. + (transform): Partly unroll to interweave the chain variables. + Suggested by Christian Grothoff. + +2009-12-10 Werner Koch + + * Makefile.am (o_flag_munging): New. + (tiger.o, tiger.lo): Use it. + + * cipher.c (do_ctr_encrypt): Add arg OUTBUFLEN. Check for + suitable value. Add check for valid inputlen. Wipe temporary + memory. + (do_ctr_decrypt): Likewise. + (do_cbc_encrypt, do_cbc_decrypt): Add arg OUTBUFLEN. Check for + suitable value. Move check for valid inputlen to here; change + returned error from INV_ARG to INV_LENGTH. + (do_ecb_encrypt, do_ecb_decrypt): Ditto. + (do_cfb_encrypt, do_cfb_decrypt): Ditto. + (do_ofb_encrypt, do_ofb_decrypt): Ditto. + (cipher_encrypt, cipher_encrypt): Adjust for above changes. + (gcry_cipher_encrypt, gcry_cipher_decrypt): Simplify. + +2009-12-09 Werner Koch + + * cipher.c (gcry_cipher_open): Allow for GCRY_CIPHER_MODE_AESWRAP. + (cipher_encrypt, cipher_decrypt): Ditto. + (do_aeswrap_encrypt, do_aeswrap_decrypt): New. + (struct gcry_cipher_handle): Add field marks. + (cipher_setkey, cipher_setiv): Update marks flags. + (cipher_reset): Reset marks. + (cipher_encrypt, cipher_decrypt): Add new arg OUTBUFLEN. + (gcry_cipher_encrypt, gcry_cipher_decrypt): Pass outbuflen to + cipher_encrypt. Replace GPG_ERR_TOO_SHORT by + GPG_ERR_BUFFER_TOO_SHORT. + +2009-08-21 Werner Koch + + * dsa.c (dsa_generate_ext): Release retfactors array before + setting it to NULL. Reported by Daiko Ueno. + +2009-07-02 Werner Koch + + * md.c (md_read): Fix incomplete check for NULL. + Reported by Fabian Kail. + +2009-03-31 Werner Koch + + * rsa.c (rsa_check_secret_key): Return GPG_ERR_BAD_SECKEY and not + GPG_ERR_PUBKEY_ALGO. + +2009-02-16 Werner Koch + + * rsa.c (generate_x931): Do not initialize TBL with automatic + variables. + * whirlpool.c, tiger.c, sha256.c, sha1.c, rmd160.c, md5.c + * md4.c, crc.c: Remove memory.h. This is garbage from gnupg. + Reported by Dan Fandrich. + +2009-01-22 Werner Koch + + * ecc.c (compute_keygrip): Remove superfluous const. + +2009-01-06 Werner Koch + + * rmd160.c (oid_spec_rmd160): Add TeleTrust identifier. + +2008-12-10 Werner Koch + + * dsa.c (generate): Add arg DOMAIN and use it if specified. + (generate_fips186): Ditto. + (dsa_generate_ext): Parse and check the optional "domain" + parameter and pass them to the generate functions. + + * rijndael.c (rijndael_names): Add "AES128" and "AES-128". + (rijndael192_names): Add "AES-192". + (rijndael256_names): Add "AES-256". + +2008-12-05 Werner Koch + + * dsa.c (generate): Add arg TRANSIENT_KEY and use it to detrmine + the RNG quality needed. + (dsa_generate_ext): Parse the transient-key flag und pass it to + generate. + +2008-11-28 Werner Koch + + * dsa.c (generate_fips186): Add arg DERIVEPARMS and use the seed + value if available. + + * primegen.c (_gcry_generate_fips186_2_prime): Fix inner p loop. + +2008-11-26 Werner Koch + + * primegen.c (_gcry_generate_fips186_3_prime): New. + * dsa.c (generate_fips186): Add arg USE_FIPS186_2. + (dsa_generate_ext): Parse new flag use-fips183-2. + +2008-11-25 Werner Koch + + * dsa.c (generate_fips186): New. + (dsa_generate_ext): Use new function if derive-parms are given or + if in FIPS mode. + * primegen.c (_gcry_generate_fips186_2_prime): New. + +2008-11-24 Werner Koch + + * pubkey.c (gcry_pk_genkey): Insert code to output extrainfo. + (pubkey_generate): Add arg R_EXTRAINFO and pass it to the extended + key generation function. + * rsa.c (gen_x931_parm_xp, gen_x931_parm_xi): New. + (generate_x931): Generate params if not given. + (rsa_generate_ext): Parse use-x931 flag. Return p-q-swapped + indicator. + * dsa.c (dsa_generate_ext): Put RETFACTORS into R_EXTRAINFO if + possible. + + * pubkey.c (gcry_pk_genkey): Remove parsing of almost all + parameters and pass the parameter S-expression to pubkey_generate. + (pubkey_generate): Simplify by requitring modules to parse the + parameters. Remove the special cases for Elgamal and ECC. + (sexp_elements_extract_ecc): Add arg EXTRASPEC and use it. Fix + small memory leak. + (sexp_to_key): Pass EXTRASPEC to sexp_elements_extract_ecc. + (pubkey_table) [USE_ELGAMAL]: Add real extraspec. + * rsa.c (rsa_generate_ext): Adjust for new calling convention. + * dsa.c (dsa_generate_ext): Ditto. + * elgamal.c (_gcry_elg_generate): Ditto. Rename to elg_generate_ext. + (elg_generate): New. + (_gcry_elg_generate_using_x): Remove after merging code with + elg_generate_ext. + (_gcry_pubkey_extraspec_elg): New. + (_gcry_elg_check_secret_key, _gcry_elg_encrypt, _gcry_elg_sign) + (_gcry_elg_verify, _gcry_elg_get_nbits): Make static and remove + _gcry_ prefix. + * ecc.c (_gcry_ecc_generate): Rename to ecc_generate_ext and + adjust for new calling convention. + (_gcry_ecc_get_param): Rename to ecc_get_param and make static. + (_gcry_pubkey_extraspec_ecdsa): Add ecc_generate_ext and + ecc_get_param. + +2008-11-20 Werner Koch + + * pubkey.c (pubkey_generate): Add arg DERIVEPARMS. + (gcry_pk_genkey): Parse derive-parms and pass it to above. + * rsa.c (generate_x931): New. + (rsa_generate_ext): Add arg DERIVEPARMS and call new function in + fips mode or if DERIVEPARMS is given. + * primegen.c (_gcry_derive_x931_prime, find_x931_prime): New. + +2008-11-19 Werner Koch + + * rsa.c (rsa_decrypt): Use gcry_create_nonce for blinding. + (generate): Rename to generate_std. + +2008-11-05 Werner Koch + + * md.c (md_open): Use a switch to set the Bsize. + (prepare_macpads): Fix long key case for SHA384 and SHA512. + + * cipher.c (gcry_cipher_handle): Add field EXTRASPEC. + (gcry_cipher_open): Set it. + (gcry_cipher_ctl): Add private control code to disable weak key + detection and to return the current input block. + * des.c (_tripledes_ctx): Add field FLAGS. + (do_tripledes_set_extra_info): New. + (_gcry_cipher_extraspec_tripledes): Add new function. + (do_tripledes_setkey): Disable weak key detection. + +2008-10-24 Werner Koch + + * md.c (digest_table): Allow MD5 in fips mode. + (md_register_default): Take special action for MD5. + (md_enable, gcry_md_hash_buffer): Ditto. + +2008-09-30 Werner Koch + + * rijndael.c (do_setkey): Properly align "t" and "tk". + (prepare_decryption): Properly align "w". Fixes bug #936. + +2008-09-18 Werner Koch + + * pubkey.c (gcry_pk_genkey): Parse domain parameter. + (pubkey_generate): Add new arg DOMAIN and remove special case for + DSA with qbits. + * rsa.c (rsa_generate): Add dummy args QBITS, NAME and DOMAIN and + rename to rsa_generate_ext. Change caller. + (_gcry_rsa_generate, _gcry_rsa_check_secret_key) + (_gcry_rsa_encrypt, _gcry_rsa_decrypt, _gcry_rsa_sign) + (_gcry_rsa_verify, _gcry_rsa_get_nbits): Make static and remove + _gcry_ prefix. + (_gcry_pubkey_spec_rsa, _gcry_pubkey_extraspec_rsa): Adjust names. + * dsa.c (dsa_generate_ext): New. + (_gcry_dsa_generate): Replace code by a call to dsa_generate. + (_gcry_dsa_check_secret_key, _gcry_dsa_sign, _gcry_dsa_verify) + (_gcry_dsa_get_nbits): Make static and remove _gcry prefix. + (_gcry_dsa_generate2): Remove. + (_gcry_pubkey_spec_dsa): Adjust to name changes. + (_gcry_pubkey_extraspec_rsa): Add dsa_generate_ext. + +2008-09-16 Werner Koch + + * ecc.c (run_selftests): Add arg EXTENDED. + +2008-09-12 Werner Koch + + * rsa.c (test_keys): Do a bad case signature check. + * dsa.c (test_keys): Do a bad case check. + + * cipher.c (_gcry_cipher_selftest): Add arg EXTENDED and pass it + to the called tests. + * md.c (_gcry_md_selftest): Ditto. + * pubkey.c (_gcry_pk_selftest): Ditto. + * rijndael.c (run_selftests): Add arg EXTENDED and pass it to the + called tests. + (selftest_fips_128): Add arg EXTENDED and run only one test + non-extended mode. + (selftest_fips_192): Add dummy arg EXTENDED. + (selftest_fips_256): Ditto. + * hmac-tests.c (_gcry_hmac_selftest): Ditto. + (run_selftests): Ditto. + (selftests_sha1): Add arg EXTENDED and run only one test + non-extended mode. + (selftests_sha224, selftests_sha256): Ditto. + (selftests_sha384, selftests_sha512): Ditto. + * sha1.c (run_selftests): Add arg EXTENDED and pass it to the + called test. + (selftests_sha1): Add arg EXTENDED and run only one test + non-extended mode. + * sha256.c (run_selftests): Add arg EXTENDED and pass it to the + called tests. + (selftests_sha224): Add arg EXTENDED and run only one test + non-extended mode. + (selftests_sha256): Ditto. + * sha512.c (run_selftests): Add arg EXTENDED and pass it to the + called tests. + (selftests_sha384): Add arg EXTENDED and run only one test + non-extended mode. + (selftests_sha512): Ditto. + * des.c (run_selftests): Add arg EXTENDED and pass it to the + called test. + (selftest_fips): Add dummy arg EXTENDED. + * rsa.c (run_selftests): Add dummy arg EXTENDED. + + * dsa.c (run_selftests): Add dummy arg EXTENDED. + + * rsa.c (extract_a_from_sexp): New. + (selftest_encr_1024): Check that the ciphertext does not match the + plaintext. + (test_keys): Improve tests and return an error status. + (generate): Return an error if test_keys fails. + * dsa.c (test_keys): Add comments and return an error status. + (generate): Return an error if test_keys failed. + +2008-09-11 Werner Koch + + * rsa.c (_gcry_rsa_decrypt): Return an error instead of calling + BUG in case of a practically impossible condition. + (sample_secret_key, sample_public_key): New. + (selftest_sign_1024, selftest_encr_1024): New. + (selftests_rsa): Implement tests. + * dsa.c (sample_secret_key, sample_public_key): New. + (selftest_sign_1024): New. + (selftests_dsa): Implement tests. + +2008-09-09 Werner Koch + + * hmac-tests.c (selftests_sha1): Add tests. + (selftests_sha224, selftests_sha384, selftests_sha512): Make up tests. + + * hash-common.c, hash-common.h: New. + * sha1.c (selftests_sha1): Add 3 tests. + * sha256.c (selftests_sha256, selftests_sha224): Ditto. + * sha512.c (selftests_sha512, selftests_sha384): Ditto. + +2008-08-29 Werner Koch + + * pubkey.c (gcry_pk_get_keygrip): Remove the special case for RSA + and check whether a custom computation function has been setup. + * rsa.c (compute_keygrip): New. + (_gcry_pubkey_extraspec_rsa): Setup this function. + * ecc.c (compute_keygrip): New. + (_gcry_pubkey_extraspec_ecdsa): Setup this function. + +2008-08-28 Werner Koch + + * cipher.c (cipher_decrypt, cipher_encrypt): Return an error if + mode NONE is used. + (gcry_cipher_open): Allow mode NONE only with a debug flag set and + if not in FIPS mode. + +2008-08-26 Werner Koch + + * pubkey.c (pubkey_generate): Add arg KEYGEN_FLAGS. + (gcry_pk_genkey): Implement new parameter "transient-key" and + pass it as flags to pubkey_generate. + (pubkey_generate): Make use of an ext_generate function. + * rsa.c (generate): Add new arg transient_key and pass appropriate + args to the prime generator. + (_gcry_rsa_generate): Factor all code out to ... + (rsa_generate): .. new func with extra arg KEYGEN_FLAGS. + (_gcry_pubkey_extraspec_ecdsa): Setup rsa_generate. + * primegen.c (_gcry_generate_secret_prime) + (_gcry_generate_public_prime): Add new arg RANDOM_LEVEL. + +2008-08-21 Werner Koch + + * primegen.c (_gcry_generate_secret_prime) + (_gcry_generate_public_prime): Use a constant macro for the random + level. + +2008-08-19 Werner Koch + + * pubkey.c (sexp_elements_extract_ecc) [!USE_ECC]: Do not allow + allow "curve" parameter. + +2008-08-15 Werner Koch + + * pubkey.c (_gcry_pk_selftest): New. + * dsa.c (selftests_dsa, run_selftests): New. + * rsa.c (selftests_rsa, run_selftests): New. + * ecc.c (selftests_ecdsa, run_selftests): New. + + * md.c (_gcry_md_selftest): New. + * sha1.c (run_selftests, selftests_sha1): New. + * sha256.c (selftests_sha224, selftests_sha256, run_selftests): New. + * sha512.c (selftests_sha384, selftests_sha512, run_selftests): New. + + * des.c (selftest): Remove static variable form selftest. + (des_setkey): No on-the-fly self test in fips mode. + (tripledes_set3keys): Ditto. + + * cipher.c (_gcry_cipher_setkey, _gcry_cipher_setiv): + + * dsa.c (generate): Bail out in fips mode if NBITS is less than 1024. + * rsa.c (generate): Return an error code if the the requested size + is less than 1024 and we are in fpis mode. + (_gcry_rsa_generate): Take care of that error code. + + * ecc.c (generate_curve): In fips mode enable only NIST curves. + + * cipher.c (_gcry_cipher_selftest): New. + + * sha512.c (_gcry_digest_extraspec_sha384) + (_gcry_digest_extraspec_sha512): New. + * sha256.c (_gcry_digest_extraspec_sha224) + (_gcry_digest_extraspec_sha256): New. + * sha1.c (_gcry_digest_extraspec_sha1): New. + * ecc.c (_gcry_pubkey_extraspec_ecdsa): New. + * dsa.c (_gcry_pubkey_extraspec_dsa): New. + * rsa.c (_gcry_pubkey_extraspec_rsa): New. + * rijndael.c (_gcry_cipher_extraspec_aes) + (_gcry_cipher_extraspec_aes192, _gcry_cipher_extraspec_aes256): New. + * des.c (_gcry_cipher_extraspec_tripledes): New. + + * cipher.c (gcry_cipher_register): Rename to _gcry_cipher_register. + Add arg EXTRASPEC. + (dummy_extra_spec): New. + (cipher_table_entry): Add extraspec field. + * md.c (_gcry_md_register): Rename to _gcry_md_register. Add + arg EXTRASPEC. + (dummy_extra_spec): New. + (digest_table_entry): Add extraspec field. + * pubkey.c (gcry_pk_register): Rename to _gcry_pk_register. Add + arg EXTRASPEC. + (dummy_extra_spec): New. + (pubkey_table_entry): Add extraspec field. + + * ac.c: Let most public functions return GPG_ERR_UNSUPPORTED in + fips mode. + + * pubkey.c (pubkey_table_entry): Add field FIPS_ALLOWED and mark + appropriate algorithms. + (dummy_generate, dummy_check_secret_key, dummy_encrypt) + (dummy_decrypt, dummy_sign, dummy_verify, dummy_get_nbits): Signal + a fips error when used. + (gcry_pk_register): In fips mode do not allow to register new + algorithms. + + * md.c (digest_table): Add field FIPS_ALLOWED and mark appropriate + algorithms. + (md_register_default): In fips mode register only fips algorithms. + (gcry_md_register): In fips mode do not allow to register new + algorithms. + (gcry_md_get): Signal a fips error if called. + (gcry_md_hash_buffer): Do not allow rmd160 when not in fips mode. + (md_start_debug): Disable in fips_mode. + + * md.c (gcry_md_register_default): Rename to .. + (md_register_default): .. this. + (md_digest): Remove this commented fucntion. + * pubkey.c (gcry_pk_register_default): Rename to .. + (pk_register_default): .. this. + + * cipher.c (cipher_table_entry): Add field FIPS_ALLOWED. + (gcry_cipher_register_default): Register only fips approved + algorithms. + (gcry_cipher_register): Do not allow to register new ciphers. + (cipher_setiv): Signal fips error. + + * cipher (gcry_cipher_register_default): Rename to .. + (cipher_register_default): .. this. + (REGISTER_DEFAULT_CIPHERS): Adjust for that change. + +2008-07-05 Werner Koch + + * random-daemon.c, random.h, rndhw.c, rndunix.c, rand-internal.h + * random.c, rndegd.c, rndlinux.c, rndw32.c: Move to ../cipher. + * Makefile.am: Remove random stuff. + +2008-06-24 Szakats Istvan (wk) + + * ac.c (_gcry_ac_key_destroy, _gcry_ac_key_pair_generate): Relase + some more memory. + +2008-04-22 Werner Koch + + * rijndael.c (_gcry_aes_cfb_enc, _gcry_aes_cbc_enc) + (_gcry_aes_cfb_dec, _gcry_aes_cbc_dec): Use Padlock if possible. + +2008-04-18 Werner Koch + + * sha1.c (transform_aligned): Remove. That is will obviosuly not + work because we need a scratch working area and our internal API + does not allow to modify the buffers. + + * rijndael.c: Factor tables out to .. + * rijndael-tables.h: .. new. + + * ac.c (ac_data_extract): Make static. + + * camellia.h [HAVE_CONFIG_H]: Include config.h. + + * rndw32.c (registry_poll): Only print the performance data + problem warning once. Suggested by Simon Josefsson. + +2008-03-19 Werner Koch + + * cipher.c (gcry_cipher_open) [USE_AES]: Init bulk encryption only + if requested. Suggested by Dirk Stoecker. + +2008-03-18 Werner Koch + + * sha1.c: Include stdint.h. + (transform): Add arg NBLOCKS so that we can work on more than one + block and avoid updates of the chaining variables. Changed all + callers to use 1. + (sha1_write): Replace loop around transform. + (transform_aligned) [WORDS_BIGENDIAN]: New. + (TRANSFORM): New macro to replace all direct calls of transform. + +2008-03-17 Werner Koch + + * rijndael.c (_gcry_aes_cfb_dec): New. + (do_encrypt): Factor code out to .. + (do_encrypt_aligned): .. New. + (_gcry_aes_cfb_enc, _gcry_aes_cfb_dec): Use new function. + (do_decrypt): Factor code out to .. + (do_decrypt_aligned): .. new. + (_gcry_aes_cbc_enc, _gcry_aes_cbc_dec): New. + * cipher.c (struct gcry_cipher_handle): Put field IV into new + union U_IV to enforce proper alignment. Change all users. + (do_cfb_decrypt): Optimize. + (do_cbc_encrypt, do_cbc_decrypt): Optimize. + +2008-03-15 Werner Koch + + * rijndael.c (_gcry_aes_cfb_enc): New. + * cipher.c (struct gcry_cipher_handle): Add field ALGO and BULK. + (gcry_cipher_open): Set ALGO and BULK. + (do_cfb_encrypt): Optimize. + +2008-02-18 Werner Koch + + * rsa.c (_gcry_rsa_verify) [IS_DEVELOPMENT_VERSION]: Print + intermediate results. + +2008-01-08 Werner Koch + + * random.c (add_randomness): Do not just increment + POOL_FILLED_COUNTER but update it by the actual amount of data. + +2007-12-13 Werner Koch + + * pubkey.c (sexp_data_to_mpi): Support SHA-224. + +2007-12-05 Werner Koch + + * rijndael.c (USE_PADLOCK): Depend on ENABLE_PADLOCK_SUPPORT. + * rndhw.c (USE_PADLOCK): Ditto + + * rsa.c (secret): Fixed condition test for using CRT. Reported by + Dean Scarff. Fixes bug#864. + (_gcry_rsa_check_secret_key): Return an erro if the optional + parameters are missing. + * pubkey.c (sexp_elements_extract): Add arg ALGO_NAME. Changed all + callers to pass NULL. Add hack to allow for optional RSA + parameters. + (sexp_to_key): Pass algo name to sexp_elements_extract. + +2007-12-03 Werner Koch + + * random.c (gcry_random_add_bytes): Implement it. + * rand-internal.h (RANDOM_ORIGIN_EXTERNAL): New. + +2007-11-30 Werner Koch + + * rndhw.c: New. + * rndlinux.c (_gcry_rndlinux_gather_random): Try to read 50% + directly from the hwrng. + * random.c (do_fast_random_poll): Also run the hw rng fast poll. + (_gcry_random_dump_stats): Tell whether the hw rng failed. + +2007-11-29 Werner Koch + + * rijndael.c (USE_PADLOCK): Define new macro used for ia32. + (RIJNDAEL_context) [USE_PADLOCK]: Add fields USE_PADLOCK and + PADLOCK_KEY. + (do_setkey) [USE_PADLOCK]: Enable padlock if available for 128 bit + AES. + (do_padlock) [USE_PADLOCK]: New. + (rijndael_encrypt, rijndael_decrypt) [USE_PADLOCK]: Divert to + do_padlock. + * cipher.c (cipher_context_alignment_t): New. Use it in this + module in place of PROPERLY_ALIGNED_TYPE. + (NEED_16BYTE_ALIGNED_CONTEXT): Define macro for ia32. + (struct gcry_cipher_handle): Add field HANDLE_OFFSET. + (gcry_cipher_open): Take care of increased alignment requirements. + (gcry_cipher_close): Ditto. + +2007-11-28 Werner Koch + + * sha256.c (asn224): Fixed wrong template. It happened due to a + bug in RFC4880. SHA-224 is not in the stable version of libgcrypt + so the consequences are limited to users of this devel version. + +2007-10-31 Werner Koch + + * ac.c (gcry_ac_data_new): Remove due to the visibility wrapper. + (gcry_ac_data_destroy, gcry_ac_data_copy, gcry_ac_data_length) + (gcry_ac_data_set, gcry_ac_data_get_name, gcry_ac_data_get_index) + (gcry_ac_data_to_sexp, gcry_ac_data_from_sexp) + (gcry_ac_data_clear, gcry_ac_io_init, gcry_ac_open) + (gcry_ac_close, gcry_ac_key_init, gcry_ac_key_pair_generate) + (gcry_ac_key_pair_extract, gcry_ac_key_destroy) + (gcry_ac_key_pair_destroy, gcry_ac_key_data_get) + (gcry_ac_key_test, gcry_ac_key_get_nbits, gcry_ac_key_get_grip) + (gcry_ac_data_encrypt, gcry_ac_data_decrypt, gcry_ac_data_sign) + (gcry_ac_data_verify, gcry_ac_data_encode, gcry_ac_data_decode) + (gcry_ac_mpi_to_os, gcry_ac_mpi_to_os_alloc, gcry_ac_os_to_mpi) + (gcry_ac_data_encrypt_scheme, gcry_ac_data_decrypt_scheme) + (gcry_ac_data_sign_scheme, gcry_ac_data_verify_scheme) + (gcry_ac_io_init_va): Ditto. + (gcry_ac_id_to_name, gcry_ac_name_to_id): Remove as these + deprecated functions are now implemented by visibility.c. + +2007-10-26 Werner Koch + + * rndw32.c: Disable debug flag. + +2007-10-25 Werner Koch + + * rndw32.c: Updated from current cryptlib snapshot and modified + for our use. Removed support from pre NT systems. + (slow_gatherer_windows95): Remove. + (_gcry_rndw32_gather_random): Require an NT platform. + (init_system_rng, read_system_rng, read_mbm_data): New. + (slow_gatherer_windowsNT): Rename to ... + (slow_gatherer): .. this. Read system RNG and MBM. + (registry_poll): New with code factored out from slow_gatherer. + +2007-08-23 Werner Koch + + * random.c (pool_filled_counter): New. + (add_randomness): Use it. + +2007-08-22 Werner Koch + + * rndw32.c, rndunix.c: Switched to LGPL. + +2007-05-30 Werner Koch + + * camellia.h, camellia.c: Replace by new LGPL version and adjusted + camellia.h. + +2007-05-09 Marcus Brinkmann + + * ac.c (_gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read): + Adjust users of gcry_ac_io_t because union is not anonymous + anymore. + +2007-05-02 Werner Koch + + * camellia-glue.c (camellia_setkey, camellia_encrypt) + (camellia_decrypt): Recalculated used stack size in called + functions. + * camellia.h: Redefine external symbols. + +2007-05-02 David Shaw + + * Makefile.am, cipher.c: Add Camellia. + + * camellia-glue.c: New. The necessary glue to interface libgcrypt + to the stock NTT Camellia distribution. + + * camellia.h, camellia.c: The stock NTT Camellia distribution + (GPL). + +2007-04-30 David Shaw + + * cipher.c: Use #if instead of #ifdef as configure defines the + USE_cipher defines as 0 for disabled. + +2007-04-30 Werner Koch + + * rndegd.c (_gcry_rndegd_set_socket_name): New. + +2007-04-30 Marcus Brinkmann + + * ecc.c (ec2os): Fix relocation of short numbers. + + * ecc.c (generate_key): Do not allocate D, which will be allocated + by GEN_K. Remove G. Fix test if g_x, g_y resp. q_x, q_y are + requested. + (_gcry_ecc_generate): Release unneeded members of SK. + * pubkey.c (sexp_to_key): Release NAME. + +2007-04-28 Marcus Brinkmann + + * ac.c (gcry_ac_mpi): Remove member NAME_PROVIDED. + (ac_data_mpi_copy, _gcry_ac_data_set, _gcry_ac_data_get_name) + (_gcry_ac_data_get_index, ac_data_construct): Adjust handling of + NAME accordingly. + +2007-04-20 Werner Koch + + * ecc.c (domain_parms): Add standard brainpool curves. + +2007-04-18 Werner Koch + + * ecc.c (generate_curve): Implement alias mechanism. + + * pubkey.c (sexp_elements_extract_ecc): New. + (sexp_to_key): Add special case for ecc. + (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_genkey): Replace + name_terminated stuff by a call to _gcry_sexp_nth_string. + (gcry_pk_get_keygrip): Ditto. + +2007-04-16 Werner Koch + + * ecc.c (_gcry_ecc_generate): Renamed DUMMY to CURVE and use it. + +2007-04-13 Marcus Brinkmann + + * ac.c (ac_data_construct): Cast const away to suppress compiler + warning. + + * ecc.c (ecc_generate): Avoid compiler warning for unused argument + DUMMY. + (ecc_verify): Avoid compiler warning for unused arguments CMP and + OPAQUEV. + +2007-04-06 Werner Koch + + * sha1.c (oid_spec_sha1): Add another oid from X9.62. + +2007-03-28 Werner Koch + + * pubkey.c (gcry_pk_genkey): Do not issue misc-key-info if it is + empty. + (gcry_pk_genkey): New parameter "curve". + + * ecc.c: Entirely rewritten with only a few traces of the old + code left. + (_gcry_ecc_generate): New. + (generate_key) New arg NAME. + (generate_curve): Ditto. Return actual number of NBITS. + +2007-03-26 Werner Koch + + * pubkey.c (gcry_pk_genkey): Increase size of SKEY array and add a + runtime bounds check. + +2007-03-23 Werner Koch + + * ecc.c (ecc_ctx_init, ecc_ctx_free, ecc_mod, ecc_mulm): New. + (duplicate_point, sum_points, escalar_mult): Don't use a + copy of base->p. Replaced all mpi_mulm by ecc_mulm so that we can + experiment with different algorithms. + (generate_key, check_secret_key, sign, verify): Initialize a + computation context for use by ecc_mulm. + +2007-03-22 Werner Koch + + * pubkey.c (pubkey_table): Initialize ECC. + * Makefile.am (EXTRA_libcipher_la_SOURCES): Add ecc.c. + * ecc.c: New. Heavily reformatted and changed for use in libgcrypt. + (point_init): New. + (escalar_mult): Make arg R the first arg to be similar to the mpi + functions. + (duplicate_point): Ditto + (sum_points): Ditto + (sign, verify): Remove unneeded copy operations. + (sum_points): Removed memory leaks and optimized some compares. + (verify): Simplified input check. + +2007-03-14 Werner Koch + + * random.c (MASK_LEVEL): Removed macro as it was used only at one + place. Open coded it there. + (gcry_randomize, _gcry_update_random_seed_file) + (_gcry_fast_random_poll): Factor lock code out to .. + (lock_pool, unlock_pool): .. new. + (initialize): Look the pool while allocating. + (read_random_source, do_fast_random_poll): Moved intialization to ... + (initialize): .. here. + (_gcry_enable_quick_random_gen): No more need for initialization. + (is_initialized): Moved this global flag to .. + (initialize): .. here and changed all users to unconditionally call + initialize. + (add_randomness): Remove initalization here. It simply can't + happen. + + * random.c (enum random_origins): Moved to .. + * rand-internal.h: .. here. + * rndunix.c (_gcry_rndunix_gather_random): Use enum in prototype + for ORIGIN and renamed REQUESTOR to ORIGIN. + * rndegd.c (_gcry_rndegd_gather_random): Ditto. + * rndlinux.c (_gcry_rndlinux_gather_random): Ditto. + * rndw32.c (_gcry_rndw32_gather_random): Ditto. + (_gcry_rndw32_gather_random_fast): Ditto. + +2007-03-13 Werner Koch + + * random.c (enum random_origins): New. + (add_randomness): Renamed arg SOURCE to ORIGIN. + (read_random_source): Renamed arg REQUESTOR to ORIGIN. + (getfnc_gather_random): Removed static variable because this + function is only called one and thus we don't need this + optimization. + (_gcry_quick_random_gen): Removed and replaced by.. + (_gcry_enable_quick_random_gen): .. this. It is onlyu used to + enable it and it does not make sense to disable it later. Changed + the only one caller too. + (get_random_bytes): Removed. + (gcry_random_bytes, gcry_random_bytes_secure): Implement in terms + of gcry_randomize. + * random-daemon.c (_gcry_daemon_get_random_bytes): Removed. + +2007-02-23 Werner Koch + + * elgamal.c (generate): Removed unused variable TEMP. + (test_keys): New arg NODIE. + (generate_using_x, _gcry_elg_generate_using_x): New. + * pubkey.c (pubkey_generate): New arg XVALUE and direct call to + the new elgamal generate fucntion. + (gcry_pk_genkey): Parse the new "xvalue" tag. + +2007-02-22 Werner Koch + + * pubkey.c (sexp_data_to_mpi): Handle dynamically allocated + algorithms. Suggested by Neil Dunbar. Fixes bug#596. + + * rndw32.c (_gcry_rndw32_gather_random_fast): Make it return void. + + * cipher.c (gcry_cipher_algo_name): Simplified. + + * random.c: Use the daemon only if compiled with USE_RANDOM_DAEMON. + + * Makefile.am (libcipher_la_SOURCES): Build random-daemon support + only if requested. + +2007-02-21 Werner Koch + + * random.c (rndpool, keypool): Make unsigned. + (mix_pool): Change char* variables to unsigned char*. + (gcry_randomize): Make arg BUFFER a void*. + (gcry_create_nonce): Ditto. + + * rmd160.c (gcry_rmd160_mixblock): Make BUFFER a void*. + (_gcry_rmd160_hash_buffer): Make OUTBUF and BUFFER void*. + * sha1.c (_gcry_sha1_hash_buffer): Ditto. + + * cipher.c (gcry_cipher_encrypt, cry_cipher_decrypt): Change + buffer args to void*. + (gcry_cipher_register): Make ALGORITHM_ID a int *. + + * md.c (md_start_debug): Make SUFFIX a const char*. Use snprintf. + (gcry_md_debug): New. + (gcry_md_ctl): Changed arg BUFFER from unsigned char*. + + * md.c (md_write): Make INBUF a const void*. + (gcry_md_write): Remove needless cast. + * crc.c (crc32_write): Make INBUF a const void* + (update_crc32, crc24rfc2440_write): Ditto. + * sha512.c (sha512_write, transform): Ditto. + * sha256.c (sha256_write, transform): Ditto. + * rmd160.c (rmd160_write, transform): Ditto. + * md5.c (md5_write, transform): Ditto. + * md4.c (md4_write, transform): Ditto. + * sha1.c (sha1_write, transform): Ditto. + + * tiger.c (tiger_write, transform): Ditto. + * whirlpool.c (whirlpool_write, whirlpool_add, transform): Ditto. + + * elgamal.c (elg_names): Change to a const*. + * dsa.c (dsa_names): Ditto. + * rsa.c (rsa_names): Ditto. + * pubkey.c (gcry_pk_lookup_func_name): Make ALIASES a const. + +2007-02-20 Werner Koch + + * rndlinux.c (open_device): Remove unsused arg MINOR. + +2007-01-30 Werner Koch + + * sha256.c (oid_spec_sha256): Add alias from pkcs#1. + * sha512.c (oid_spec_sha512): Ditto. + (oid_spec_sha384): Ditto. + +2006-12-18 Werner Koch + + * rndlinux.c (set_cloexec_flag): New. + (open_device): Set close-on-exit flags. Suggested by Max + Kellermann. Fixes Debian#403613. + + * Makefile.am (AM_CPPFLAGS, AM_CFLAGS): Splitted and merged + Moritz' changes. + (INCLUDES): Removed. + +2006-11-30 Werner Koch + + * serpent.c (byte_swap_32): Remove trailing semicolon. + +2006-11-15 Werner Koch + + * Makefile.am (INCLUDES): Include ../src/ + +2006-11-03 Werner Koch + + * random.c [HAVE_GETTIMEOFDAY]: Included sys/time.h and not + sys/times.h. Reported by Rafaël Carré. + +2006-11-05 Moritz Schulte + + * Makefile.am (AM_CFLAGS): Added -I$(top_builddir)/src so that the + new gcrypt.h is used, not the one installed in the system. + +2006-10-25 Werner Koch + + * primegen.c (prime_generate_internal): Tweaked use of secure + memory and entropy use. Safe unused primes from the pool. Allocate + at least a pool of 30. + (save_pool_prime, get_pool_prime): New. + +2006-10-23 Werner Koch + + * ac.c (_gcry_ac_data_from_sexp): Reset sexp_tmp for failsafe + means. Release sexp_cur if needed. Reported by Dirk Stoecker. + + * pubkey.c (pubkeys_registered_lock): Intialized it. It is not + realy needed because this is a mere initialization to 0 anyway. + Noted by Victor Stinner. + +2006-10-17 Werner Koch + + * dsa.c (_gcry_dsa_generate2): New. + (generate): New arg QBITS. Add sanity checks for reasonable qbits + and nbits. + * pubkey.c (gcry_pk_genkey): Parse an qbits element. + (pubkey_generate): New arg QBITS. Pass it to the DSA generation. + +2006-10-05 Werner Koch + + * md.c (gcry_md_algo_info) : Check that the algo is + available. + +2006-10-04 David Shaw (wk) + + * tiger.c (round): Rename to tiger_round as gcc 4 has a built-in + round function that this conflicts with. + +2006-09-11 Werner Koch + + * rndw32.c (slow_gatherer_windowsNT): While adding data use the + size of the diskPerformance and not its address. Has been fixed in + GnuPG more than a year ago. Noted by Lee Fisher. + +2006-08-30 Werner Koch + + * pubkey.c (sexp_data_to_mpi): Need to allow "ripemd160" here as + this is the canonical name. + +2006-08-29 Hye-Shik Chang (wk) + + * seed.c: New. + +2006-08-03 Werner Koch + + * random-daemon.c (_gcry_daemon_initialize_basics): Don't + initialize the socket. Remove arg SOCKETNAME. + (connect_to_socket): Make sure that daemon is set to -1 on error. + (call_daemon): Initialize the socket on the first call. + (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes) + (_gcry_daemon_create_nonce): New arg SOCKETNAME. + * random.c (initialize): Call new daemon initializator. + (get_random_bytes, gcry_randomize, gcry_create_nonce): Pass socket + name to daemon call and reset allow_daemon on failure. + +2006-07-26 Werner Koch + + * rmd160.c (_gcry_rmd160_mixblock): Add cast to transform call. + + * blowfish.c (selftest): Cast string to usnigned char*. + + * primegen.c (prime_generate_internal): Cast unsigned/char* + mismatch in calling m_out_of_n. + (is_prime): Changed COUNT to unsigned int *. + + * ac.c (_gcry_ac_data_copy): Initialize DATA_MPIS. + + * random.c (gcry_create_nonce): Update the pid after a fork. + Reported by Uoti Urpala. + +2006-07-04 Marcus Brinkmann + + * sha512.c: Fix typo in copyright notice. + +2006-06-21 Werner Koch + + * rsa.c (_gcry_rsa_generate): Replace xcalloc by calloc. + * pubkey.c (gcry_pk_encrypt, gcry_pk_sign): Ditto. + (sexp_to_key, sexp_to_sig, sexp_to_enc, gcry_pk_encrypt) + (gcry_pk_sign, gcry_pk_genkey, gcry_pk_get_keygrip): Ditto. + * md.c (md_copy): Ditto. + +2006-04-22 Moritz Schulte + + * random-daemon.c (_gcry_daemon_initialize_basics): New argument: + SOCKETNAME. Passing on to connect_to_socket() if non-NULL. + (connect_to_socket, writen, readn, call_daemon): New functions. + (_gcry_daemon_randomize, _gcry_daemon_get_random_bytes) + (_gcry_daemon_create_nonce): Call call_daemon(). + (RANDOM_DAEMON_SOCKET): New symbol. + (daemon_socket): New static variable. + + * random.h (_gcry_daemon_initialize_basics): New parameter: + SOCKETNAME. + (_gcry_set_random_daemon_socket): New declaration. + + * random.c (initialize_basics): Pass DAEMON_SOCKET_NAME to + _gcry_daemon_initialize_basics. + (_gcry_set_random_daemon_socket): New function, setting + DAEMON_SOCKET_NAME. + +2006-04-01 Moritz Schulte + + * ac.c (eme_pkcs_v1_5_encode): Use KEY_SIZE directly, no need to + call gcry_ac_key_get_nbits. + (eme_pkcs_v1_5_decode): Likewise. + (ac_es_dencode_prepare_pkcs_v1_5): Fill options_em structure with + key_size. + (_gcry_ac_data_dump, gcry_ac_data_dump): New functions. + (_gcry_ac_data_to_sexp, _gcry_ac_data_from_sexp): More or less + rewritten; changed S-Expression format so that it matches the one + used in pubkey.c. + +2006-03-15 Werner Koch + + * random-daemon.c: New. + * random.c (_gcry_use_random_daemon): New. + (get_random_bytes, gcry_randomize, gcry_create_nonce): Try + diverting to the daemon functions. + +2006-03-14 Werner Koch + + * random.c (lock_seed_file): New. + (read_seed_file, _gcry_update_random_seed_file): Use it. + + * random.c (gcry_create_nonce): Detect a fork and re-seed. + (read_pool): Fixed the fork detection; it used to work only for + multi-threaded processes. + +2006-03-12 Brad Hards (wk) + + * md.c (md_open): Use new variable macpads_Bsize instead of + hardwiring the block size. Changed at all places. + +2006-03-10 Brad Hards (wk, patch 2005-04-22) + + * md.c, sha256.c: Add support for SHA-224. + (sha224_init): New. + +2006-01-18 Brad Hards (wk 2006-03-07) + + * cipher.c (cipher_encrypt, cipher_decrypt, do_ofb_encrypt) + (do_ofb_decrypt, gcry_cipher_open): Implement Output Feedback Mode. + +2005-11-02 Moritz Schulte + + * pubkey.c (gcry_pk_algo_name): Return "?" instead of NULL for + unknown algorithm IDs. + * cipher.c (cipher_algo_to_string): Likewise. + +2005-11-01 Moritz Schulte + + * pubkey.c (gcry_pk_algo_info): Don't forget to break after switch + case. + +2005-09-19 Werner Koch + + * dsa.c (generate): Add preliminary support for 2 and 4 keys. + Return an error code if the key size is not supported. + (_gcry_dsa_generate): Return an error. + +2005-08-22 Werner Koch + + * primegen.c (check_prime): New arg RM_ROUNDS. + (prime_generate_internal): Call it here with 5 rounds as used + before. + (gcry_prime_check): But here with 64 rounds. + (is_prime): Make sure never to use less than 5 rounds. + +2005-04-16 Moritz Schulte + + * ac.c (_gcry_ac_init): New function. + +2005-04-12 Moritz Schulte + + * ac.c (_gcry_ac_io_write, _gcry_ac_io_read): Initialize err to + make the compiler happy. + Always use errno, now that gcry_malloc() is guaranteed to set + errno on failure. + (_gcry_ac_data_to_sexp): Don't forget to goto out after error in + loop. + (_gcry_ac_data_to_sexp): Remove unused variable: mpi_list; + (_gcry_ac_data_to_sexp): Always deallocate sexp_buffer. + (_gcry_ac_data_from_sexp): Don't forget to initialize data_set_new. + (_gcry_ac_data_from_sexp): Handle special case, which is + necessary, since gcry_sexp_nth() does not distinguish between + "element does not exist" and "element is the empty list". + (_gcry_ac_io_init_va): Use assert to make sure that mode and type + are correct. + Use gcry_error_t types where gcry_err_code_t types have been used + before. + +2005-04-11 Moritz Schulte + + * ac.c (_gcry_ac_data_sign_scheme): Don't forget to initialize + buffer. + + * whirlpool.c: New file. + * md.c (digest_table): Add whirlpool. + * Makefile.am (EXTRA_libcipher_la_SOURCES): Added: whirlpool.c. + +2005-03-30 Moritz Schulte + + * ac.c (_gcry_ac_data_from_sexp): Use length of SEXP_CUR, not + length of SEXP; do not forget to set SEXP_TMP to NULL after it has + been released. + + (struct gcry_ac_mpi): New member: name_provided. + (_gcry_ac_data_set): Rename variable `name_final' to `name_cp'; + remove const qualifier; change code to not cast away const + qualifiers; use name_provided member as well. + (_gcry_ac_data_set, _gcry_ac_data_get_name): Use name_provided + member of named mpi structure. + + (gcry_ac_name_to_id): Do not forget to initialize err. + (_gcry_ac_data_get_index): Do not forget to initialize mpi_return; + use gcry_free() instead of free(); remove unnecessary cast; rename + mpi_return and name_return to mpi_cp and name_cp; adjust code. + (ac_data_mpi_copy): Do not cast away const qualifier. + (ac_data_values_destroy): Likewise. + (ac_data_construct): Likewise. + + (ac_data_mpi_copy): Initialize flags to GCRY_AC_FLAG_DEALLOC. + (ac_data_extract): Use GCRY_AC_FLAG_DEALLOC instead of + GCRY_AC_FLAG_COPY. + + (_gcry_ac_io_init_va, _gcry_ac_io_init, gcry_ac_io_init) + (gcry_ac_io_init_va, _gcry_ac_io_write, _gcry_ac_io_read) + (_gcry_ac_io_read_all, _gcry_ac_io_process): New functions. + (gry_ac_em_dencode_t): Use gcry_ac_io_t in prototype instead of + memroy strings directly; adjust encode/decode functions to use io + objects. + (emsa_pkcs_v1_5_encode_data_cb): New function ... + (emsa_pkcs_v1_5_encode): ... use it here. + (ac_data_dencode): Use io objects. + (_gcry_ac_data_encode, _gcry_ac_data_decode, gcry_ac_data_encode) + (gcry_ac_data_decode): Likewise. + (_gcry_ac_data_encrypt_scheme, gcry_ac_data_encrypt_scheme) + (_gcry_ac_data_decrypt_scheme, gcry_ac_data_decrypt_scheme) + (_gcry_ac_data_sign_scheme, gcry_ac_data_sign_scheme) + (_gcry_ac_data_verify_scheme, gcry_ac_data_verify_scheme): + Likewise. + +2005-03-23 Werner Koch + + * rndw32.c (_gcry_rndw32_gather_random_fast): While adding data + use the size of the object and not the one of its address. Bug + reported by Sascha Kiefer. + +2005-03-19 Moritz Schulte + + * cipher.c (do_cbc_encrypt): Be careful to not overwrite data, + which is to be used later on. This happend, in case CTS is + enabled and OUTBUF is equal to INBUF. + +2005-02-25 Werner Koch + + * pubkey.c (gcry_pk_get_keygrip): Allow for shadowed-private-key. + +2005-02-13 Moritz Schulte + + * serpent.c: Updated from 1.2 branch: + + s/u32_t/u32/ and s/byte_t/byte/. Too match what we have always + used and are using in all other files too + (serpent_test): Moved prototype out of a fucntion. + +2005-02-07 Moritz Schulte + + * ac.c: Major parts rewritten. + * pubkey.c (_gcry_pk_get_elements): New function. + +2004-12-09 Werner Koch + + * serpent.c (serpent_setkey): Moved prototype of serpent_test to + outer scope. + +2004-09-11 Moritz Schulte + + * pubkey.c (pubkey_table): Added an alias entry for GCRY_PK_ELG_E. + +2004-08-23 Moritz Schulte + + * ac.c: Do not include . + * rndegd.c: Likewise. + * sha1.c: Likewise. + * rndunix.c: Likewise. + * rndlinux.c: Likewise. + * rmd160.c: Likewise. + * md5.c: Likewise. + * md4.c: Likewise. + * cipher.c: Likewise. + * crc.c: Likewise. + * blowfish.c: Likewise. + + * pubkey.c (dummy_generate, dummy_check_secret_key) + (dummy_encrypt, dummy_decrypt, dummy_sign, dummy_verify): Return + err code GPG_ERR_NOT_IMPLEMENTED instead of aborting through + log_bug(). + (dummy_get_nbits): Return 0 instead of aborting though log_bug(). + +2004-08-19 Werner Koch + + * pubkey.c (sexp_data_to_mpi): Changed the zero random byte + substituting code to actually do clever things. Thanks to + Matthias Urlichs for noting the implementation problem. + +2004-08-09 Moritz Schulte + + * pubkey.c (gcry_pk_sign): Fixed memory leak; fix provided by + Modestas Vainius. + +2004-07-16 Werner Koch + + * rijndael.c (do_encrypt): Fix alignment problem. Bugs found by + Matthias Urlichs. + (do_decrypt): Ditto. + (keySched, keySched2): Use 2 macros along with unions in the key + schedule context. + +2004-07-14 Moritz Schulte + + * rsa.c (_gcry_rsa_decrypt): Don't forget to free "a". Thanks to + Nikos Mavroyanopoulos. + +2004-05-09 Werner Koch + + * random.c (read_pool): Mix the PID in to better protect after a + fork. + +2004-07-04 Moritz Schulte + + * serpent.c: Use "u32_t" instead of "unsigned long", do not + declare S-Box variables as "register". Fixes failure on + OpenBSD/sparc64, reported by Nikolay Sturm. + +2004-05-07 Werner Koch + + * random.c (initialize): Factored out some code to .. + (initialize_basics): .. new function. + (_gcry_random_initialize): Just call initialize_basics unless the + new arg FULL is set to TRUE. + (_gcry_fast_random_poll): Don't do anything unless the random + system has been really initialized. + +2004-05-07 Moritz Schulte + + * ac.c (gcry_ac_open): Do not dereference NULL pointer. Reported + by Umberto Salsi. + +2004-02-20 Werner Koch + + * primegen.c (check_prime): New args CB_FUNC and CB_ARG; call them + at different stages. Pass these arguments through all callers. + +2004-02-06 Werner Koch + + * des.c: Add a new OID as used by pkcs#12. + + * rfc2268.c: New. Taken from libgcrypt. + * cipher.c: Setup the rfc2268 algorithm. + +2004-01-25 Moritz Schulte + + * primegen.c (prime_generate_internal): Do not forget to free + `q_factor'; fixed by Brieuc Jeunhomme. + (prime_generate_internal): Do not forget to free `prime'. + +2004-01-14 Moritz Schulte + + * ac.c (gcry_ac_data_set): New argument: flags; slightly + rewritten. + (gcry_ac_data_get_name, gcry_ac_data_get_index): Likewise. + (gcry_ac_key_pair_generate): New argument: misc_data; modified + order of arguments. + (gcry_ac_key_test): New argument: handle. + (gcry_ac_key_get_nbits, gcry_ac_key_get_grip): Likewise. + Use GCRY_AC_FLAG_NO_BLINDING instead of + GCRY_AC_DATA_FLAG_NO_BLINDING. + (gcry_ac_mpi): New member: flags. + (gcry_ac_data_search, gcry_ac_data_add): Removed functions. + +2003-12-22 Werner Koch + + * primegen.c (is_prime): Release A2. + +2003-12-19 Werner Koch + + * md.c: Moved a couple of functions down below the data structure + definitions. + (struct gcry_md_context): New field ACTUAL_HANDLE_SIZE. + (md_open): Set it here. + (strcut gcry_md_list): New field ACTUAL_STRUCT_SIZE. + (md_enable): Set it here. + (md_close): Wipe the context memory. + secure memory. + * cipher.c (struct gcry_cipher_handle): New field ACTUAL_HANDLE_SIZE. + (gcry_cipher_open): Set it here. + (gcry_cipher_close): Use it to always wipe out the handle data. + + * ac.c (gcry_ac_open): Make sure HANDLE gets initialized even when + the function is not successful. + (gcry_ac_close): Allow a NULL handle. + (gcry_ac_key_destroy, gcry_ac_key_pair_destroy): Ditto. + (gcry_ac_key_get_grip): Return INV_OBJ on error. + + * primegen.c (prime_generate_internal): Fixed error code for + failed malloc. Replaced the !err if chain by gotos. + (gcry_prime_group_generator): Remove the extra sanity check. + + * md.c: Minor code and comment cleanups. + +2003-12-16 Werner Koch + + * primegen.c (gen_prime): Doc fix. Thanks to Newton Hammet. + +2003-12-11 Werner Koch + + * rndunix.c (slow_poll): Don't use #warning but #error. + + * rndegd.c: Changed indentation. + (my_make_filename): Removd the var_arg cruft becuase we + don't need it here. Changed caller. + + * rndlinux.c: Changed indentation. + (open_device): Remove the superfluous stat call and clarify + comment. + + * rsa.c: Changed indentation. + (secret): Use the standard algorithm if p, q and u are not + available. + (rsa_blind, rsa_unblind): Renamed from _gcry_rsa_blind, + _gcry_rsa_unblind and moved more to the top. + + * md4.c: Changed indentation. Removed unnecessary casts. + * md5.c, rmd160.c, sha1.c, tiger.c: Ditto. + * rijndael.c, twofish.c: Ditto. + * serpent.c: Removed unnecessary casts. + * sha256.c, sha512.c: Ditto. + +2003-12-09 Werner Koch + + * dsa.c: Unified indentation style. + * elgamal.c: Ditto. + * des.c (des_key_schedule): Code beautifications. + * blowfish.c: Changed indentation style. + * cast5.c (do_cast_setkey): Ditto. + + * pubkey.c (gcry_pk_encrypt): Replaced the chain of if(!err) tests + by straightforward gotos. Other cleanups. + (gcry_pk_decrypt): Ditto. + (gcry_pk_sign): Ditto. + (gcry_pk_verify): Ditto. + (gcry_pk_genkey): Ditto. Use strtoul instead of strtol. + (gcry_pk_ctl): Use GPG_ERR_INV_ARG to indicate bad arguments. + +2003-12-07 Werner Koch + + * pubkey.c (gcry_pk_register_default): Undef the helper macro. + (gcry_pk_map_name): Allow NULL for string. + (sexp_to_key): Use memcpy and not strncpy. Use gcry_free and not + free. + (sexp_to_sig): Ditto. + (sexp_to_enc): Ditto. Replaced the chain of if(!err) tests by + straightforward gotos. + +2003-12-05 Werner Koch + + * cipher.c: Documentation cleanups. + (gcry_cipher_mode_from_oid): Allow NULL for STRING. + +2003-12-03 Werner Koch + + * elgamal.c (sign, do_encrypt, gen_k): Make sure that a small K is + only used for encryption. + +2003-11-18 Werner Koch + + * random.h (rndw32_set_dll_name): Removed unused prototype. + + * Makefile.am (EXTRA_DIST): Added Manifest. + +2003-11-11 Werner Koch + + * Manifest: New. + +2003-11-04 Werner Koch + + * md.c (gcry_md_hash_buffer): Use shortcut for SHA1 + * sha1.c (_gcry_sha1_hash_buffer): New. + + * random.c: Reformatted most functions. + (mix_pool): Moved the failsafe_digest from global + scope to here. + (do_fast_random_poll): Use the generic fucntions even if a fast + gathering function has been used. + (read_pool): Detect a fork and retry. + (gcry_randomize, get_random_bytes): Don't distinguish anymore + between weak and strong random. + (gcry_create_nonce): New. + +2003-10-31 Werner Koch + + * rndw32.c (slow_gatherer_windowsNT): Use a plain buffer for the + disk performance values and not the W32 API structure. + + * dsa.c (verify): s/exp/ex/ due to shadowing of a builtin. + * elgamal.c (verify): Ditto. + + * ac.c (gcry_ac_data_get_index): s/index/idx/ + (gcry_ac_data_copy_internal): Remove the cast in _gcry_malloc. + (gcry_ac_data_add): Must use gcry_realloc instead of realloc. + * pubkey.c (sexp_elements_extract): s/index/idx/ as tribute to the + forehackers. + (gcry_pk_encrypt): Removed shadowed definition of I. Reordered + arguments to malloc for clarity. + (gcry_pk_sign, gcry_pk_genkey): Ditto. + * primegen.c (prime_generate_internal): s/random/randomlevel/. + +2003-10-27 Moritz Schulte + + * pubkey.c (gcry_pk_encrypt): Don't forget to deallocate pkey. + +2003-10-27 Werner Koch + + * random.c (gcry_random_add_bytes): Return if buflen is zero to + avoid gcc warning about unsed parameter. + (MASK_LEVEL): Simplified; does now work for signed and unsigned + w/o warnings. + + * md.c (md_start_debug): Removed the const from SUFFIX, because + this function is called from the control fucntion which does not + require const. + + Prefixed all (pubkey,digest,cipher}_spec_* globale variables with + _gcry_. + + * ac.c (ac_key_identifiers): Made static. + + * random.c (getfnc_gather_random,getfnc_fast_random_poll): Move + prototypes to .. + * rand-internal.h: .. here + * random.c (getfnc_gather_random): Include rndw32 gatherer. + * rndunix.c, rndw32.c, rndegd.c: Include them here. + * rndlinux.c (_gcry_rndlinux_gather_random): Prepend the _gcry_ + prefix. Changed all callers. + * rndegd.c (_gcry_rndegd_gather_random): Likewise. + (_gcry_rndegd_connect_socket): Likewise. + * rndunix.c (_gcry_rndunix_gather_random): Likewise. + (waitpid): Made static. + * rndw32.c: Removed the old and unused winseed.dll cruft. + (_gcry_rndw32_gather_random_fast): Renamed from + gather_random_fast. + (_gcry_rndw32_gather_random): Renamed from gather_random. Note, + that the changes 2003-04-08 somehow got lost. + + * sha512.c (sha512_init, sha384_init): Made static. + + * cipher.c (do_ctr_decrypt): Removed "return" from this void + function. + +2003-10-24 Moritz Schulte + + * serpent.c: Fix an issue on big-endian systems. + + * rndw32.c: Removed IS_MODULE -cruft. + * rndlinux.c (rndlinux_gather_random): Likewise. + +2003-10-10 Werner Koch + + * primegen.c (gen_prime): Bail out if NBITS is less than 16. + (prime_generate_internal): Initialize prime variable to suppress + compiler warning. Check pbits, initialize qbits when passed as + zero. + + * primegen.c (prime_generate_internal): New arg + ALL_FACTORS. Changed all callers. + (gcry_prime_generate): Make the factors arg optional. Request + all_factors. Make sure PRIME is set to NULL even on error. + (gcry_prime_group_generator): New. + (gcry_prime_release_factors): New. + +2003-10-06 Werner Koch + + * primegen.c (gen_prime): Assert that NBITS is never zero, it + would cause a segv. + +2003-09-28 Moritz Schulte + + * ac.c: Include "cipher.h". + +2003-09-27 Moritz Schulte + + * rndegd.c (do_read): Return nread instead of nbytes; thanks to + Michael Caerwyn. + +2003-09-04 Werner Koch + + * pubkey.c (_gcry_pk_aliased_algo_name): New. + * ac.c (gcry_ac_open): Use it here. + + * Makefile.am (EXTRA_libcipher_la_SOURCES): Add serpent.c + +2003-09-02 Moritz Schulte + + * primegen.c (gcry_prime_check, gcry_prime_generate): New + functions. + (prime_generate_internal): New function, based on + _gcry_generate_elg_prime. + (_gcry_generate_elg_prime): Rewritten as a wrapper for + prime_generate_internal. + +2003-08-28 Werner Koch + + * pubkey.c (gcry_pk_encrypt): Don't include the flags list in the + return value. This does not make sense and breaks any programs + parsing the output strictly (e.g. current gpgsm). + (gcry_pk_encrypt): If aliases for the algorithm name exists, take + the first one instead of the regular name to adhere to SPKI + conventions. + (gcry_pk_genkey): Ditto. + (gcry_pk_sign): Ditto. Removed unused KEY_ALGO_NAME. + +2003-08-19 Moritz Schulte + + * cipher.c: Add support for Serpent + * serpent.c: New file. + +2003-08-10 Moritz Schulte + + * rsa.c (_gcry_rsa_blind, _gcry_rsa_unblind): Declare static. + +2003-08-09 Timo Schulz + + * random.c (getfnc_gather_random): Don't check NAME_OF_DEV_RANDOM + two times, but also the NAME_OF_DEV_URANDOM device. + +2003-08-08 Moritz Schulte + + * pubkey.c (sexp_to_enc): Fixed extraction of S-Expression: do not + fail if no `flags' sub S-Expression is found. + +2003-07-27 Werner Koch + + * md.c (gcry_md_lookup_func_oid): Allow for empty OID lists. + +2003-07-23 Moritz Schulte + + * ac.c (gcry_ac_data_construct): New argument: include_flags, only + include `flags' S-expression, if include_flags is true. Adjust + callers. Thanks for triggering a bug caused by `flags' + sub-S-expression where they are not expected to Ralf Schneider. + +2003-07-21 Moritz Schulte + + * pubkey.c (gcry_pk_lookup_func_name): Use new member name + `aliases' instead of `sexp_names'. + + * ac.c (gcry_ac_key_data_get): New function. + + * cipher.c (gcry_cipher_lookup_func_name): Fix return value. + +2003-07-20 Moritz Schulte + + * blowfish.c: Adjusted for new gcry_cipher_spec_t structure. + * cast5.c: Likewise. + * twofish.c: Likewise. + * arcfour.c: Likewise. + * rijndael.c (rijndael_oids, rijndael192_oids, rijndael256_oids): + New variables, adjust for new gcry_cipher_spec_t structure. + * des.c (oids_tripledes): New variable, adjust for new + gcry_cipher_spec_t structure. + + * md.c (oid_table): Removed. + + * tiger.c (oid_spec_tiger): New variable. + (digest_spec_tiger): Adjusted for new gry_md_spec_t structure. + + * sha512.c (oid_spec_sha512): New variable. + (digest_spec_sha512): Adjusted for new gry_md_spec_t structure. + + * sha512.c (oid_spec_sha384): New variable. + (digest_spec_sha384): Adjusted for new gry_md_spec_t structure. + + * sha256.c (oid_spec_sha256): New variable. + (digest_spec_sha256): Adjusted for new gry_md_spec_t structure. + + * sha1.c (oid_spec_sha1): New variable. + (digest_spec_sha1): Adjusted for new gry_md_spec_t structure. + + * rmd160.c (oid_spec_rmd160): New variable. + (digest_spec_rnd160): Adjusted for new gry_md_spec_t structure. + + * md5.c (oid_spec_md5): New variable. + (digest_spec_md5): Adjusted for new gry_md_spec_t structure. + + * md4.c (oid_spec_md4): New variable. + (digest_spec_md4): Adjusted for new gry_md_spec_t structure. + + * crc.c (digest_spec_crc32, digest_spec_crc32_rfc1510, + digest_spec_crc32_rfc2440): Adjusted for new gry_md_spec_t + structure. + +2003-07-19 Moritz Schulte + + * md.c (gcry_md_lookup_func_oid): New function. + (search_oid): New function, copied from cipher.c. + (gcry_md_map_name): Adjust for new search_oid_interface. + + * cipher.c (oid_table): Removed table. + (gcry_cipher_lookup_func_oid): New function. + (search_oid): Rewritten to use the module functions. + (gcry_cipher_map_name): Adjust for new search_oid interface. + (gcry_cipher_mode_from_oid): Likewise. + +2003-07-18 Werner Koch + + * md.c (gcry_md_hash_buffer): Convert ERR to gpg_error_t in + gpg_strerror. + +2003-07-14 Moritz Schulte + + * cipher.c (gcry_cipher_lookup_func_name): Also check the cipher + name aliases, not just the primary name. + (gcry_cipher_map_name): Remove kludge for aliasing Rijndael to + AES. + + * arcfour.c, blowfish.c, cast5.c, des.c, twofish.c: Adjust cipher + specification structures. + + * rijndael.c (rijndael_names, rijndael192_names, + rijndael256_names): New variables, use them in the cipher + specifications. + + * rmd160test.c: Removed file. + + * ac.c, arcfour.c, blowfish.c, cast5.c, cipher.c, des.c, dsa.c, + elgamal.c, md.c, pubkey.c, random.c, rijndael.c, rsa.c, twofish.c: + Used gcry_err* wrappers for libgpg symbols. + + * primegen.c (gen_prime): Correct the order arguments to + extra_check. + +2003-07-12 Moritz Schulte + + * ac.c: Replaced all public occurences of gpg_error_t with + gcry_error_t. + * cipher.c: Likewise. + * md.c: Likewise. + * pubkey.c: Likewise. + * random.c: Likewise. + + * cipher.c: Added support for TWOFISH128. + +2003-07-08 Moritz Schulte + + * ac.c (gcry_ac_data_copy_internal): New function, based on + gcry_ac_data_copy. + (gcry_ac_data_copy): Made public, use gcry_ac_data_copy_internal. + (gcry_ac_key_init): Use gcry_ac_data_copy_internal. + +2003-07-07 Moritz Schulte + + * ac.c (gcry_ac_data_set): Only release old MPI value if it is + different from the new value. Bug reported by Simon Josefsson + . + + * pubkey.c (gcry_pk_list): New function. + * md.c (gcry_md_list): New function. + + * ac.c (gcry_ac_key_pair_generate): Fix calculation of format + string size. + +2003-07-05 Moritz Schulte + + * md.c: Named struct of digest_table `digest_table_entry'. + (digest_table_entry): New member: algorithm; filled in. + (digest_table_entry): Removed unused member: flags. + (gcry_md_register): New argument: algorithm_id, filled in. + (gcry_md_register_default): Used algorithm ID from module + structure. + (gcry_md_map_name): Likewise. + (md_enable): Likewise. + (md_read): Likewise. + (gcry_md_info): Likewise. + + * pubkey.c: Named truct for pubkey_table `pubkey_table_entry'. + (pubkey_table_entry): New member: algorithm; filled in. + (gcry_pk_register_default): Used algorithm ID from pubkey_table. + (gcry_pk_register): New argument: algorithm_id, filled in. + (gcry_pk_map_name): Used algorithm ID from module structure. + (gcry_pk_decrypt): Likewise. + (gcry_pk_encrypt): Likewise. + (gcry_pk_verify): Likewise. + (gcry_pk_sign): Likewise. + (gcry_pk_testkey): Likewise. + (gcry_pk_genkey): Likewise. + (gcry_pk_get_nbits): Likewise. + (sexp_to_key): Removed unused variable: algo. + (sexp_to_sig): Likewise. + + * cipher.c: Named struct for cipher_table `cipher_table_entry'. + (cipher_table_entry): New member: algorithm; filled in. + (gcry_cipher_register_default): Used algorithm ID from + cipher_table. + (gcry_cipher_register): New argument: algorithm_id, filled in. + (gcry_cipher_map_name): Used algorithm ID from module structure. + + * arcfour.c (cipher_spec_arcfour): Removed algorithm ID. + * blowfish.c (cipher_spec_blowfish): Likewise. + * cast5.c (cipher_spec_cast5): Likewise. + * crc.c (digest_spec_crc32): Likewise. + * crc.c (digest_spec_crc32_rfc1510): Likewise. + * crc.c (digest_spec_crc32_rfc2440): Likewise. + * des.c (cipher_spec_des): Likewise. + * des.c (cipher_spec_tripledes): Likewise. + * dsa.c (pubkey_spec_dsa): Likewise. + * elgamal.c (pubkey_spec_elg): Likewise. + * md4.c (digest_spec_md4): Likewise. + * md5.c (digest_spec_md5): Likewise. + * aes.c (cipher_spec_aes): Likewise. + * aes.c (cipher_spec_aes192): Likewise. + * aes.c (cipher_spec_aes256): Likewise. + * rsa.c (pubkey_spec_rsa): Likewise. + * sha1.c (digest_spec_sha1): Likewise. + * sha256.c (digest_spec_sha256): Likewise. + * sha512.c (digest_spec_sha512): Likewise. + * tiger.c (digest_spec_tiger): Likewise. + * twofish.c (cipher_spec_twofish): Likewise. + * twofish.c (cipher_spec_twofish128): Likewise. + + * Makefile.am (EXTRA_libcipher_la_SOURCES): Fix list of source + files; reported by Simon Josefsson . + + * pubkey.c: Replaced all occurences of `id' with `algorithm', + since `id' is a keyword in obj-c. + * md.c: Likewise. + * cipher.c: Likewise. + + * crc.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, tiger.c: + Replaced all occurences of gcry_digest_spec_t with gcry_md_spec_t. + + * dsa.c, rsa.c, elgamal.c: Replaced all occurencens of + gcry_pubkey_spec_t with gcry_pk_spec_t. + + * md.c: Replaced all occurences of gcry_digest_spec_t with + gcry_md_spec_t. + (gcry_digest_register_default): Renamed to ... + (gcry_md_register_default): ... this; adjusted callers. + (gcry_digest_lookup_func_name): Renamed to ... + (gcry_md_lookup_func_name): ... this; adjusted callers. + (gcry_digest_lookup_name): Renamed to ... + (gcry_md_lookup_name): ... this; adjusted callers. + (gcry_digest_register): Renamed to ... + (gcry_md_register): ... this. + (gcry_digest_unregister): Renamed to ... + (gcry_md_unregister): ... this. + + * pubkey.c (gcry_pubkey_register): Renamed to ... + (gcry_pk_register): ... this. + (gcry_pubkey_unregister): Renamed to ... + (gcry_pk_unregister): ... this. + Replaced all occurences of gcry_pubkey_spec_t with gcry_pk_spec_t. + (gcry_pubkey_register_default): Renamed to ... + (gcry_pk_register_default): ... this; adjusted callers. + (gcry_pubkey_lookup_func_name): Renamed to ... + (gcry_pk_lookup_func_name): ... this; adjusted callers. + (gcry_pubkey_lookup_name): Renamed to ... + (gcry_pk_lookup_name): ... this; adjusted callers. + + * md.c (gcry_md_hash_buffer): Fix error checking. Thanks to Simon + Josefsson . + +2003-07-04 Moritz Schulte + + * cipher.c (gcry_cipher_list): New function. + +2003-07-01 Moritz Schulte + + * pubkey.c (sexp_to_sig): Accept a `flags' S-expression to be more + consistent with sexp_to_enc. + +2003-06-30 Moritz Schulte + + * Makefile.am (libcipher_la_SOURCES): Added: ac.c. + + * pubkey.c (_gcry_pk_module_lookup): New function. + (_gcry_pk_module_release): New function. + +2003-06-29 Moritz Schulte + + * ac.c: New file. + +2003-06-26 Werner Koch + + * md.c (gcry_md_hash_buffer): Trigger BUG correcly with new API. + +2003-06-19 Werner Koch + + * md.c (gcry_md_is_enabled): Fixed. + +2003-06-18 Werner Koch + + * cipher.c (gcry_cipher_get_algo_keylen): New. + (gcry_cipher_get_algo_blklen): New. + +2003-06-18 Moritz Schulte + + * arcfour.c, cipher.c, blowfish.c, md.c, cast5.c, pubkey.c, crc.c, + des.c, dsa.c, elgamal.c, md4.c, md5.c, random.c, rijndael.c, + rmd160.c, rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c: + Replaced older types GcryDigestSpec, GcryCipherSpec and + GcryPubkeySpec with newer types: gcry_digest_spec_t, + gcry_cipher_spec_t and gcry_pubkey_spec_t. + + * md.c (gcry_digest_id_new): Removed function. + (gcry_digest_register): Removed code for generating a new module + ID. + + * pubkey.c (gcry_pubkey_id_new): Removed function. + (gcry_pubkey_register): Removed code for generating a new module + ID. + + * cipher.c, md.c, pubkey.c: Replace old type GcryModule with newer + one: gcry_module_t. + (gcry_cipher_id_new): Removed function. + (gcry_cipher_register): Removed code for generating a new module + ID. + + * cipher.c (gcry_cipher_register): Adjust call to + _gcry_module_add. + (gcry_cipher_register_default): Likewise. + * pubkey.c (gcry_pubkey_register_default): Likewise. + (gcry_pubkey_register): Likewise. + * md.c (gcry_digest_register_default): Likewise. + (gcry_digest_register): Likewise. + + * md.c (gcry_digest_lookup_func_id): Removed function. + (gcry_digest_lookup_id): Likewise. + (gcry_digest_id_new): Use _gcry_module_lookup_id instead of + gcry_digest_lookup_id. + (digest_algo_to_string): Likewise. + (check_digest_algo): Likewise. + (md_enable): Likewise. + (md_digest_length): Likewise. + (md_asn_oid): Likewise. + + * pubkey.c (gcry_pubkey_lookup_id): Removed function. + (gcry_pubkey_lookup_func_id): Likewise. + (gcry_pubkey_id_new): Use _gcry_module_lookup_id instead of + gcry_pubkey_id_new. + (gcry_pk_algo_name): Likewise. + (disable_pubkey_algo): Likewise. + (check_pubkey_algo): Likewise. + (pubkey_get_npkey): Likewise. + (pubkey_get_nskey): Likewise. + (pubkey_get_nsig): Likewise. + (pubkey_get_nenc): Likewise. + (pubkey_generate): Likewise. + (pubkey_check_secret_key): Likewise. + (pubkey_encrypt): Likewise. + (pubkey_decrypt): Likewise. + (pubkey_sign): Likewise. + (pubkey_verify): Likewise. + (gcry_pk_algo_info): Likewise. + + * cipher.c (gcry_cipher_lookup_func_id): Removed function. + (gcry_cipher_lookup_id): Likewise. + (cipher_algo_to_string): use _gcry_module_lookup_id instead of + gcry_cipher_lookup_id. + (disable_cipher_algo): Likewise. + (check_cipher_algo): Likewise. + (cipher_get_blocksize): Likewise. + (gcry_cipher_open): Likewise. + (gcry_cipher_id_new): Likewise. + +2003-06-17 Moritz Schulte + + * Makefile.am (GCRYPT_MODULES): Set to @GCRYPT_CIPHERS@, + @GCRYPT_PUBKEY_CIPHERS@, @GCRYPT_DIGESTS@ and @GCRYPT_RANDOM@. + (libcipher_la_DEPENDENCIES): Set to $(GCRYPT_MODULES). + (libcipher_la_LIBADD): Likewise. + (AM_CFLAGS): Added: @GPG_ERROR_CFLAGS@. + (EXTRA_libcipher_la_SOURCES): Added all conditional sources. + + * md.c (md_open): Use _gcry_fast_random_poll instead of + fast_random_poll. + * cipher.c (gcry_cipher_open): Likewise. + + * random.h (fast_random_poll): Removed macro. + + * blowfish.c, md4.c, md5.c, rmd160.c, sha1.c, sha256.c, sha512.c, + tiger.c: Use Autoconf's WORDS_BIGENDIAN instead of our own + BIG_ENDIAN_HOST. + +2003-06-16 Moritz Schulte + + * random.c (getfnc_gather_random): Do not special-case + USE_ALL_RANDOM_MODULES, make it the default. + + * dsa.c: Replace last occurences of old type names with newer + names (i.e. replace MPI with gcry_mpi_t). + * elgamal.c: Likewise. + * primegen.c: Likewise. + * pubkey.c: Likewise. + * rsa.c: Likewise. + +2003-06-14 Moritz Schulte + + * des.c (des_setkey): Add selftest check. + (tripledes_set3keys): Likewise. + (do_tripledes_setkey): Remove selftest check. + (do_des_setkey): Likewise. + +2003-06-11 Moritz Schulte + + * md.c (_gcry_md_init): New function. + * cipher.c (_gcry_cipher_init): New function. + * pubkey.c (_gcry_pk_init): New function. + +2003-06-13 Werner Koch + + * md.c (gcry_md_get_algo): Reverted to old API. This is a + convenience function anyway and error checking is not approriate. + (gcry_md_is_secure): New. + (gcry_md_is_enabled): New. + +2003-06-12 Werner Koch + + * cipher.c (gcry_cipher_open): Make sure HANDLE is set to NULL on + error. + +2003-06-11 Werner Koch + + * md.c (gcry_md_open): Make sure H receives either NULL or an + valid handle. + (gcry_md_copy): Swapped arguments so that it is more in lione with + md_open and most other API fucntions like memcpy (destination + comes first). Make sure HANDLE is set to NULL on error. + + * rijndael.c (do_encrypt): Hack to force correct alignment. It + seems not to be not sufficient, though. We should rework this + fucntions and remove all these ugly casts. Let the compiler + optimize or have an assembler implementation. + +2003-06-09 Moritz Schulte + + * Makefile.am: Removed rules serpent, since that is not commited + yet. + +2003-06-08 Moritz Schulte + + * pubkey.c (gcry_pk_encrypt): Improve calculation for size of the + format string. + +2003-06-07 Moritz Schulte + + * arcfour.c, bithelp.h, blowfish.c, cast5.c, cipher.c, crc.c, + des.c, dsa.c, elgamal.c, md4.c, md5.c, md.c, primegen.c, pubkey.c, + rand-internal.h, random.c, random.h, rijndael.c, rmd160.c, + rmd160test.c, rmd.h, rndeged.c, rndlinux.c, rndunix.c, rndw32.c, + rsa.c, sha1.c, sha256.c, sha512.c, tiger.c, twofish.c: Edited all + preprocessor instructions to remove whitespace before the '#'. + This is not required by C89, but there are some compilers out + there that don't like it. Replaced any occurence of the now + deprecated type names with the new ones. + +2003-06-04 Moritz Schulte + + * pubkey.c (gcry_pk_encrypt): Construct an arg_list and use + gcry_sexp_build_array instead of gcry_sexp_build. + (gcry_pk_sign): Likewise. + (gcry_pk_genkey): Likewise. + +2003-06-01 Moritz Schulte + + * dsa.c (_gcry_dsa_generate): Do not check wether the algorithm ID + does indeed belong to DSA. + (_gcry_dsa_sign): Likewise. + (_gcry_dsa_verify): Likewise. + (_gcry_dsa_get_nbits): Likewise. + + * elgamal.c (_gcry_elg_check_secret_key): Do not check wether the + algorithm ID does indeed belong to ElGamal. + (_gcry_elg_encrypt): Likewise. + (_gcry_elg_decrypt): Likewise. + (_gcry_elg_sign): Likewise. + (_gcry_elg_verify): Likewise. + (_gcry_elg_get_nbits): Likewise. + (_gcry_elg_generate): Likewise. + + * rsa.c (_gcry_rsa_generate): Do not check wether the algorithm ID + does indeed belong to RSA. + (_gcry_rsa_encrypt): Likewise. + (_gcry_rsa_decrypt): Likewise. + (_gcry_rsa_sign): Likewise. + (_gcry_rsa_verify): Likewise. + (_gcry_rsa_get_nbits): Likewise. + +2003-05-30 Moritz Schulte + + * md.c (md_get_algo): Return zero in case to algorithm is enabled. + + * md.c (gcry_md_info): Adjusted for new no-errno-API. + (md_final): Likewise. + (gcry_md_get_algo): Likewise. + * pubkey.c (gcry_pk_get_keygrip): Likewise. + (gcry_pk_ctl): Likewise. + (gcry_pk_algo_info): Likewise. + * des.c (selftest): Likewise. + +2003-05-29 Moritz Schulte + + * md.c (md_enable): Do not forget to release module on error. + (gcry_md_open): Adjusted for new no-errno-API. + (md_open): Likewise. + (md_copy): Likewise. + (gcry_md_copy): Likewise. + (gcry_md_setkey): Likewise. + (gcry_md_algo_info): Likewise. + + * cipher.c (gcry_cipher_open): Adjusted for new no-errno-API and + also fixed a locking bug. + (gcry_cipher_encrypt): Adjusted for new no-errno-API. + (gcry_cipher_decrypt): Likewise. + (gcry_cipher_ctl): Likewise. + (gcry_cipher_info): Likewise. + (gcry_cipher_algo_info): Likewise. + +2003-05-28 Moritz Schulte + + * md.c (md_enable): Adjusted for libgpg-error. + (gcry_md_enable): Likewise. + (gcry_digest_register_default): Likewise. + (gcry_digest_register): Likewise. + (check_digest_algo): Likewise. + (prepare_macpads): Likewise. + (gcry_md_setkey): Likewise. + (gcry_md_ctl): Likewise. + (gcry_md_get): Likewise. + (gcry_md_algo_info): Likewise. + (gcry_md_info): Likewise. + * dsa.c (_gcry_dsa_generate): Likewise. + (_gcry_dsa_check_secret_key): Likewise. + (_gcry_dsa_sign): Likewie. + (_gcry_dsa_verify): Likewise. + * twofish.c (do_twofish_setkey): Likewise. + (twofish_setkey): Likewise. + * cipher.c (gcry_cipher_register): Likewise. + +2003-05-25 Moritz Schulte + + * rijndael.c (do_setkey): Adjusted for libgpg-error. + (rijndael_setkey): Likewise. + * random.c (gcry_random_add_bytes): Likewise. + * elgamal.c (_gcry_elg_generate): Likewise. + (_gcry_elg_check_secret_key): Likewise. + (_gcry_elg_encrypt): Likewise. + (_gcry_elg_decrypt): Likewise. + (_gcry_elg_sign): Likewise. + (_gcry_elg_verify): Likewise. + * rsa.c (_gcry_rsa_generate): Likewise. + (_gcry_rsa_check_secret_key): Likewise. + (_gcry_rsa_encrypt): Likewise. + (_gcry_rsa_decrypt): Likewise. + (_gcry_rsa_sign): Likewise. + (_gcry_rsa_verify): Likewise. + * pubkey.c (dummy_generate, dummy_check_secret_key, dummy_encrypt, + dummy_decrypt, dummy_sign, dummy_verify): Likewise. + (gcry_pubkey_register): Likewise. + (check_pubkey_algo): Likewise. + (pubkey_generate): Likewise. + (pubkey_check_secret_key): Likewise. + (pubkey_encrypt): Likewise. + (pubkey_decrypt): Likewise. + (pubkey_sign): Likewise. + (pubkey_verify): Likewise. + (sexp_elements_extract): Likewise. + (sexp_to_key): Likewise. + (sexp_to_sig): Likewise. + (sexp_to_enc): Likewise. + (sexp_data_to_mpi): Likewise. + (gcry_pk_encrypt): Likewise. + (gcry_pk_decrypt): Likewise. + (gcry_pk_sign): Likewise. + (gcry_pk_verify): Likewise. + (gcry_pk_testkey): Likewise. + (gcry_pk_genkey): Likewise. + (gcry_pk_ctl): Likewise. + * cipher.c (dummy_setkey): Likewise. + (check_cipher_algo): Likewise. + (gcry_cipher_open): Likewise. + (cipher_setkey): Likewise. + (gcry_cipher_ctl): Likewise. + (cipher_encrypt): Likewise. + (gcry_cipher_encrypt): Likewise. + (cipher_decrypt): Likewise. + (gcry_cipher_decrypt): Likewise. + (gcry_cipher_info): Likewise. + (gcry_cipher_algo_info): Likewise. + * cast5.c (cast_setkey): Likewise. + (do_cast_setkey): Likewise. + * arcfour.c (arcfour_setkey): Likewise. + (do_arcfour_setkey): Likewise. + * blowfish.c (do_bf_setkey): Likewise. + (bf_setkey): Likewise. + * des.c (do_des_setkey): Likewise. + (do_tripledes_setkey): Likewise. + +2003-05-22 Moritz Schulte + + * tiger.c: Merged code ussing the U64_C macro from GnuPG. + + * sha512.c: Likewise. + +2003-05-17 Moritz Schulte + + * pubkey.c (gcry_pk_genkey): Fix type: acquire a lock, instead of + releasing it. + +2003-05-11 Moritz Schulte + + * pubkey.c (gcry_pk_testkey): Call REGISTER_DEFAULT_CIPHERS. + (gcry_pk_ctl): Likewise. + +2003-04-27 Moritz Schulte + + * pubkey.c (gcry_pk_genkey): Release sexp after extracted data has + been used. + + * md.c (gcry_md_get_algo_dlen): Simplified, simply call + md_digest_length to do the job. + + * des.c (do_des_setkey): Check for selftest failure not only + during initialization. + (do_tripledes_setkey): Include check for selftest failure. + + * pubkey.c (gcry_pubkey_register_default): New macro + `pubkey_use_dummy', use it. + + * elgamal.c (elg_names): New variable. + (pubkey_spec_elg): Include elg_names. + + * dsa.c (dsa_names): New variable. + (pubkey_spec_dsa): Include dsa_names. + + * rsa.c (rsa_names): New variable. + (pubkey_spec_rsa): Include rsa_names. + + * pubkey.c (gcry_pubkey_lookup_func_name): Compare name also with + the names listed in `sexp_names'. + +2003-04-24 Moritz Schulte + + * pubkey.c (sexp_to_key): New variables: module, pubkey. Adjusted + to new module interface. + (sexp_to_key): Changend type of argument `retalgo' from `int *' to + `GcryModule **'. Adjusted all callers. Removed argument: + r_algotblidx. + (sexp_to_sig): Changend type of argument `retalgo' from `int *' to + `GcryModule **'. Adjusted all callers. + (sexp_to_enc): Likewise. + + (pubkey_get_npkey, pubkey_get_nskey, pubkey_get_nsig, + pubkey_get_nenc): Use strlen to find out the number. + + * rsa.c: Adjust pubkey_spec_rsa to new internal interface. + * dsa.c: Likewise. + * elgamal.c: Likewise. + +2003-04-17 Moritz Schulte + + * pubkey.c (sexp_elements_extract): New function. + * pubkey.c (sexp_to_key): Removed variable `idx', added `err', use + sexp_elements_extract. + (sexp_to_sig): Likewise. + (sexp_to_enc): Likewise. + + * pubkey.c: Terminate list correctly. + * md.c: Include sha512/sha384 in digest_table. + +2003-04-16 Moritz Schulte + + * Makefile.am: Include support for sha512.c. + + * sha512.c: New file, merged from GnuPG, with few modifications + for libgcrypt. + + * rand-internal.h: Removed declarations for constructor functions. + + * md.c (md_copy): Call _gcry_module_use for incrementing the usage + counter of the digest modules. + + * rsa.c: Do not include "rsa.h". + * dsa.c: Do not include "dsa.h". + * elgamal.c: Do not include "elgamal.h". + * des.c: Do not include "des.h". + * cast5.c: Do not include "cast5.h". + * blowfish.c: Do not include "blowfish.h". + * arcfour.c: Do not include "arcfour.h". + + * Makefile.am (libcipher_la_DEPENDENCIES): Removed. + (libcipher_la_LIBADD): Removed. + Use Automake conditionals for conditional compilation. + +2003-04-13 Moritz Schulte + + * cipher.c (gcry_cipher_open): Call REGISTER_DEFAULT_CIPHERS. + + * md.c (gcry_md_list): New member: module. + (md_enable): New variable: module, changed use of module and + digest. + (md_enable): Initialize member: module. + (md_close): Call _gcry_module_release. + + * cipher.c (gcry_cipher_open): New variable: module, changed use of + module and cipher. + (struct gcry_cipher_handle): New member: module. + (gcry_cipher_open): Initialize member: module. + (gcry_cipher_close): Call _gcry_module_release. + +2003-04-09 Moritz Schulte + + * cipher.c: Include "ath.h". + * md.c: Likewise. + * pubkey.c: Likewise. + + * cipher.c (ciphers_registered_lock): New variable. + * md.c (digests_registered_lock): New variable. + * pubkey.c (pubkeys_registered_lock): New variable. + + * rndlinux.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndlinux_constructor): Removed function. + + * rndegd.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndegd_constructor): Removed function. + + * rndunix.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndunix_constructor): Removed function. + + * rndw32.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rndw32_constructor): Removed function. + + * rndegd.c (rndegd_connect_socket): Simplify code for creating the + egd socket address. + (rndegd_connect_socket): Call log_fatal use instead of + g10_log_fatal. + (egd_gather_random): Renamed to ... + (rndegd_gather_random): ... here. + +2003-04-08 Moritz Schulte + + * rndlinux.c: Do not include "dynload.h". + * rndunix.c: Likewise. + * rndw32.c: Likewise. + + * rndegd.c (rndegd_connect_socket): Factored out from ... + (egd_gather_random): here; call it. + (egd_socket): New variable. + (egd_gather_random): Initialize fd with egd_socket, do not declare + fd static. + (do_read): Merged few changes from GnuPG. FIXME - not finished? + Do not include "dynload.h". + + * rndw32.c (gather_random): Renamed to rndw32_gather_random, do + not declare static. + (gather_random_fast): Renamed to rndw32_gather_random_fast, do not + declare static. + + * rndunix.c (gather_random): Renamed to rndunix_gather_random, do + not declare static. + * rndegd.c (gather_random): Renamed to rndegd_gather_random, do + not declare static. + * rndlinux.c (gather_random): Renamed to rndlinux_gather_random, + do not declare static. + +2003-04-07 Moritz Schulte + + * Makefile.am (libcipher_la_SOURCES): Removed construct.c. + (libcipher_la_SOURCES): Added sha1.c, sha256.c, rmd160.c, md4.c, + md5.c, tiger.c and crc.c + (EXTRA_PROGRAMS): Removed sha1, sha256, rmd160, md4, md5, tiger + and crc. Removed definitions: EXTRA_md4_SOURCES, + EXTRA_md5_SOURCES, EXTRA_rmd160_SOURCES, EXTRA_sha1_SOURCES, + EXTRA_sha256_SOURCES, EXTRA_tiger_SOURCES and EXTRA_crc_SOURCES, + BUILT_SOURCES, DISTCLEANFILES. + + * pubkey.c: Do not include "elgamal.h", "dsa.h" and "rsa.h". + + * Makefile.am (libcipher_la_SOURCES): Removed rsa.h, elgamal.h, + dsa.h, des.h, cast5.h, arcfour.h and blowfish.h. + + * rsa.h: Removed file. + * elgamal.h: Removed file. + * dsa.h: Removed file. + * des.h: Removed file. + * cast5.h: Removed file. + * arcfour.h: Removed file. + * blowfish.h: Removed file. + + * Makefile.am (libcipher_la_SOURCES): Removed dynload.c and + dynload.h. + + * rsa.c (pubkey_spec_rsa): New variable. + * dsa.c (pubkey_spec_rsa): New variable. + * elgamal.c (pubkey_spec_elg): New variable. + + * rsa.c (_gcry_rsa_get_info): Removed function. + * elgamal.c (_gcry_elg_get_info): Removed function. + * dsa.c (_gcry_dsa_get_info): Removed function. + + * tiger.c (tiger_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_tiger_constructor): Removed function. + + * sha1.c (sha1_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_sha1_constructor): Removed function. + + * sha256.c (sha256_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_sha256_constructor): Removed function. + + * rmd160.c (rmd160_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_rmd160_constructor): Removed function. + + * md5.c (md5_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_md5_constructor): Removed function. + + * md4.c (md4_get_info): Removed function. + (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func): Removed function. + (_gcry_md4_constructor): Removed function. + + * crc.c (crc_get_info): Removed function. + + * arcfour.c (do_arcfour_setkey): Changed type of context argument + to `void *', added local variable for cast, adjusted callers. + (arcfour_setkey): Likewise. + (encrypt_stream): Likewise. + * cast5.c (cast_setkey): Likewise. + (encrypt_block): Likewise. + * rijndael.c (rijndael_setkey): Likewise. + (rijndael_encrypt): Likewise. + (rijndael_decrypt): Likewise. + * twofish.c (twofish_setkey): Likewise. + (twofish_encrypt): Likewise. + (twofish_decrypt): Likewise. + * des.c (do_des_setkey): Likewise. + (do_des_encrypt): Likewise. + (do_des_encrypt): Likewise. + (do_tripledes_encrypt): Likewise. + (do_tripledes_encrypt): Likewise. + * blowfish.c (bf_setkey: Likewise. + (encrypt_block): Likewise. + (decrypt_block): Likewise. + + * arcfour.c (encrypt_stream): Likewise. + + * rijndael.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func) Removed function. + + * twofish.c (gnupgext_version, func_table): Removed definitions. + (gnupgext_enum_func) Removed function. + + * cast5.c (CIPHER_ALGO_CAST5): Removed. + + * blowfish.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros. + (CIPHER_ALGO_BLOWFISH): Removed symbol. + * cast5.c (FNCCAST_SETKEY, FNCCAST_CRYPT): Likewise. + * des.c (selftest_failed): Removed. + (initialized): New variable. + (do_des_setkey): Run selftest, if not yet done. + (FNCCAST_SETKEY, FNCCAST_CRYPT): Removed macros. + + * arcfour.c (_gcry_arcfour_get_info): Removed function. + * blowfish.c (_gcry_blowfish_get_info): Removed function. + * cast5.c (_gcry_cast5_get_info): Removed function. + * des.c (_gcry_des_get_info): Removed function. + * rijndael.c (_gcry_rijndael_get_info): Removed function. + * twofish.c (_gcry_twofish_get_info): Removed function. + + * arcfour.c (cipher_spec_arcfour): New variable. + * twofish.c (cipher_spec_twofish, cipher_spec_twofish128): New + variables. + * rijndael.c (cipher_spec_aes, cipher_spec_aes192, + cipher_spec256): New variables. + * des.c (cipher_spec_des, cipher_spec_tripledes): New variables. + * cast5.c (cipher_spec_cast5): New variable. + * blowfish.c (cipher_spec_blowfish): Likewise. + + * twofish.c: Do not include "dynload.h". + * rijndael.c: Likewise. + * des.c: Likewise. + * cast5.c: Likewise. + * blowfish.c: Likewise. + * cipher.c: Likewise. + * crc.c: Likewise. + * md4.c: Likewise. + * md5.c: Likewise. + * md.c: Likewise. + * pubkey.c: Likewise. + * rijndael.c: Likewise. + * sha1.c: Likewise. + * sha256.c: Likewise. + + * arcfour.c: Include "cipher.h". + * twofish.c: Likewise. + * rijndael.c: Likewise. + * des.c: Likewise. + * cast5.c: Likewise. + * blowfish.c: Likewise. + + * twofish.c (twofish_setkey): Declared argument `key' const. + (twofish_encrypt): Declared argument `inbuf' const. + (twofish_decrypt): Likewise. + + * rijndael.c (rijndael_setkey): Declared argument `key' const. + (rijndael_encrypt): Declared argument `inbuf' const. + (rijndael_decrypt): Likewise. + + * des.c (do_des_setkey): Declared argument `key' const. + (do_tripledes_setkey): Likewise. + (do_des_encrypt): Declared argument `inbuf' const. + (do_des_decrypt): Likewise. + (do_tripledes_encrypt): Likewise. + (do_tripledes_decrypt): Likewise. + + * cast5.c (encrypt_block): Declared argument `inbuf' const. + (decrypt_block): Likewise. + (cast_setkey): Declared argument `key' const. + + * blowfish.c (do_bf_setkey): Declared argument `key' const. + (encrypt_block): Declared argument `inbuf' const. + (encrypt_block): Likewise. + + + + * cipher.c: Remove CIPHER_ALGO_DUMMY related code. + Removed struct cipher_table_s. + Changed definition of cipher_table. + Removed definition of disabled_algos. + (ciphers_registered, default_ciphers_registered): New variables. + (REGISTER_DEFAULT_CIPHERS): New macro. + (dummy_setkey): Declared argument `key' const. + (dummy_encrypt_block): Declared argument `inbuf' const. + (dummy_encrypt_block): Likewise. + (dummy_encrypt_stream): Likewise. + (dummy_encrypt_stream): Likewise. + (dummy_setkey): Use `unsigned char' instead of `byte'. + (dummy_encrypt_block): Likewise. + (dummy_decrypt_block): Likewise. + (dummy_encrypt_stream): Likewise. + (dummy_decrypt_stream): Likewise. + (gcry_cipher_register_default): New function. + (gcry_cipher_lookup_func_id): New function. + (gcry_cipher_lookup_func_name): New function. + (gcry_cipher_lookup_id): New function. + (gcry_cipher_lookup_name): New function. + (gcry_cipher_id_new): New function. + (gcry_cipher_register): New function. + (gcry_cipher_unregister): New function. + (setup_cipher_table): Removed function. + (load_cipher_modules): Removed function. + (gcry_cipher_map_name): Adjusted to use new module management. + (cipher_algo_to_string): Likewise. + (disable_cipher_algo): Likewise. + (check_cipher_algo): Likewise. + (cipher_get_keylen): Likewise. + (cipher_get_blocksize): Likewise. + (gcry_cipher_open): Likewise. + (struct gcry_cipher_handle): Replaced members algo, algo_index, + blocksize, setkey, encrypt, decrypt, stencrypt, stdecrypt with one + member: cipher. + (gcry_cipher_open): Adjusted code for new handle structure. + (cipher_setkey): Likewise. + (cipher_setiv): Likewise. + (cipher_reset): Likewise. + (do_ecb_encrypt): Likewise. + (do_ecb_decrypt): Likewise. + (do_cbc_encrypt): Likewise. + (do_cbc_decrypt): Likewise. + (do_cfb_encrypt): Likewise. + (do_cfb_decrypt): Likewise. + (do_ctr_encrypt): Likewise. + (cipher_encrypt): Likewise. + (gcry_cipher_encrypt): Likewise. + (cipher_decrypt): Likewise. + (gcry_cipher_decrypt): Likewise. + (cipher_sync): Likewise. + (gcry_cipher_ctl): Likewise. + + * pubkey.c: Removed struct pubkey_table_s. + Changed definition of pubkey_table. + Removed definition of disabled_algos. + (pubkeys_registered, default_pubkeys_registered): New variables. + (REGISTER_DEFAULT_PUBKEYS): New macro. + (setup_pubkey_table): Removed function. + (load_pubkey_modules): Removed function. + (gcry_pubkey_register_default): New function. + (gcry_pubkey_lookup_func_id): New function. + (gcry_pubkey_lookup_func_name): New function. + (gcry_pubkey_lookup_id): New function. + (gcry_pubkey_lookup_name): New function. + (gcry_pubkey_id_new): New function. + (gcry_pubkey_register): New function. + (gcry_pubkey_unregister): New function. + (gcry_pk_map_name): Adjusted to use new module management. + (gcry_pk_algo_name): Likewise. + (disable_pubkey_algo): Likewise. + (check_pubkey_algo): Likewise. + (pubkey_get_npkey): Likewise. + (pubkey_get_nskey): Likewise. + (pubkey_get_nsig): Likewise. + (pubkey_get_nenc): Likewise. + (pubkey_generate): Likewise. + (pubkey_check_secret_key): Likewise. + (pubkey_encrypt): Likewise. + (pubkey_decrypt): Likewise. + (pubkey_sign): Likewise. + (pubkey_verify): Likewise. + (gcry_pk_get_nbits): Likewise. + (gcry_pk_algo_info): Likewise. + + * md.c: Removed struct md_digest_list_s. + (digest_list): Changed definition. + (digests_registered, default_digests_registered): New variables. + (REGISTER_DEFAULT_DIGESTS): New macro. + (new_list_item): Removed function. + (setup_md_table): Removed function. + (load_digest_module): Removed function. + (gcry_digest_register_default): New function. + (gcry_digest_lookup_func_id): New function. + (gcry_digest_lookup_func_name): New function. + (gcry_digest_lookup_id): New function. + (gcry_digest_lookup_name): New function. + (gcry_digest_id_new): New function. + (gcry_digest_register): New function. + (gcry_digest_unregister): New function. + (GcryDigestEntry): New type. + (struct gcry_md_context): Adjusted type of `list'. + (gcry_md_map_name): Adjusted to use new module management. + (digest_algo_to_string): Likewise. + (check_digest_algo): Likewise. + (md_enable): Likewise. + (md_digest_length): Likewise. + (md_asn_oid): Likewise. + +2003-04-07 Moritz Schulte + + * pubkey.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA, + PUBKEY_ALGO_RSA with GCRY_PK_RSA and PUBKEY_ALGO_ELGAMAL with + GCRY_PK_ELG. + + * dsa.c: Replaced PUBKEY_ALGO_DSA with GCRY_PK_DSA. + +2003-04-01 Moritz Schulte + + * des.c: Removed checks for GCRY_CIPHER_3DES and GCRY_CIPHER_DES. + +2003-03-31 Moritz Schulte + + * tiger.c (tiger_get_info): Do not declare static. + * sha256.c (sha256_get_info): Likewise. + * sha1.c (sha1_get_info): Likewise. + * rmd160.c (rmd160_get_info): Likewise. + * md5.c (md5_get_info): Likewise. + * md4.c (md4_get_info): Likewise. + * crc.c (crc_get_info): Likewise. + + * md.c (load_digest_module): Call setup_md_table during + initialization. + (new_list_item): Link new element into digest_list. + + * cipher.c (do_ctr_decrypt): Made do_ctr_encrypt act as a wrapper + for do_ctr_encrypt, since these functions are identical. + +2003-03-30 Simon Josefsson + + * cipher.c (struct gcry_cipher_handle): Add counter field. + (gcry_cipher_open): Add CTR. + (cipher_reset): Clear counter field. + (do_ctr_encrypt, do_ctr_decrypt): New functions. + (cipher_encrypt, cipher_decrypt): Call CTR functions. + (gcry_cipher_ctl): Add SET_CTR to set counter. + +2003-03-30 Moritz Schulte + + * rsa.c (_gcry_rsa_blind): New function. + (_gcry_rsa_unblind): New function. + (_gcry_rsa_decrypt): Use _gcry_rsa_blind and _gcry_rsa_decrypt. + +2003-03-26 Moritz Schulte + + * dynload.c (_gcry_enum_gnupgext_pubkeys): Adjust `encrypt' and + `decrypt' function arguments. + (_gcry_enum_gnupgext_pubkeys): Likewise. + * dynload.h: Likewise. + + * pubkey.c (dummy_decrypt): Add argument: int flags. + (dummy_encrypt): Likewise. + + * elgamal.c (_gcry_elg_encrypt): Add argument: int flags. + (_gcry_elg_decrypt): Likewise. + + * rsa.c (_gcry_rsa_encrypt): Add argument: int flags. + (_gcry_rsa_decrypt): Likewise. + + * pubkey.c: Add `flags' argument to members `encrypt' and + `decrypt' of struct `pubkey_table_s'. + + * rsa.h: Add `flags' argument to function declarations. + * elgamal.h: Likewise. + + * pubkey.c (sexp_data_to_mpi): New variable: int parsed_flags. + (sexp_data_to_mpi): Set `parsed_flags'. + (sexp_data_to_mpi): New argument: int *flags. + (gcry_pk_encrypt): New variable: int flags. + (gcry_pk_encrypt): Pass `flags' to pubkey_encrypt. + (pubkey_encrypt): New variable: int flags. + (pubkey_encrypt): Pass `flags' to pubkey encrypt function. + (pubkey_decrypt): Likewise. + (pubkey_decrypt): Pass `flags' to pubkey encrypt function. + (gcry_pk_encrypt): Include `flags' s-exp in return list. + (sexp_to_enc): New argument: int *flags. + (gcry_pk_decrypt): New variable: int flags. + (gcry_pk_decrypt): Pass `flags' to pubkey_decrypt. + (sexp_to_enc): New variable: int parsed_flags. + (sexp_to_enc): Set `parsed_flags'. + +2003-03-22 Simon Josefsson + + * cipher.c (gcry_cipher_open, do_cbc_encrypt) + (gcry_cipher_encrypt): Support GCRY_CIPHER_CBC_MAC. + (gcry_cipher_ctl): Support GCRYCTL_SET_CBC_MAC. + +2003-03-19 Werner Koch + + * primegen.c (gen_prime): New args EXTRA_CHECK and EXTRA_CHECK_ARG + to allow for a user callback. Changed all callers. + (_gcry_generate_secret_prime) + (_gcry_generate_public_prime): Ditto, pass them to gen_prime. + * rsa.c (check_exponent): New. + (generate): Use a callback to ensure that a given exponent is + actually generated. + +2003-03-12 Moritz Schulte + + * primegen.c: Initialize `no_of_small_prime_numbers' statically. + (gen_prime): Remove calculation of `no_of_small_prime_numbers'. + +2003-03-03 Moritz Schulte + + * md.c (gcry_md_ctl): Rewritten to use same style like the other + functions dispatchers. + +2003-03-02 Moritz Schulte + + * cipher.c (struct gcry_cipher_handle): New member: algo_index. + (gcry_cipher_open): Allocate memory for two cipher contexts. + Initialize algo_index. + (cipher_setkey): Duplicate context into reserved memory. + (cipher_reset): New function, which resets the context and clear + the IV. + (gcry_cipher_ctl): Call cipher_reset. + +2003-02-23 Moritz Schulte + + * cipher.c: Remove (bogus) `digitp' macro definition. + * md.c: Likewise. + + * blowfish.c (burn_stack): Removed. + * arcfour.c (burn_stack): Likewise. + * cast5.c (burn_stack): Likewise. + * des.c (burn_stack): Likewise. + * md4.c (burn_stack): Likewise. + * md5.c (burn_stack): Likewise. + * random.c (burn_stack): Likewise. + * rijndael.c (burn_stack): Likewise. + * rmd160.c (burn_stack): Likewise. + * sha1.c (burn_stack): Likewise. + * sha256.c (burn_stack): Likewise. + * tiger.c (burn_stack): Likewise. + * twofish.c (burn_stack): Likewise. + + * blowfish.c: Changed all occurences of burn_stack to + _gcry_burn_stack. + * arcfour.c: Likewise. + * cast5.c: Likewise. + * des.c: Likewise. + * md4.c: Likewise. + * md5.c: Likewise. + * random.c: Likewise. + * rijndael.c: Likewise. + * rmd160.c: Likewise. + * sha1.c: Likewise. + * sha256.c: Likewise. + * tiger.c: Likewise. + * twofish.c: Likewise. + + * arcfour.c (_gcry_arcfour_get_info): Use GCRY_CIPHER_ARCFOUR + instead of hard-coded value `301'. + +2003-01-24 Werner Koch + + * random.c (_gcry_register_random_progress): New. + (_gcry_random_progress): New. + + * rndlinux.c (gather_random): Call the random progress function. + +2003-01-23 Werner Koch + + * rsa.c (generate): New arg USE_E to request a specific public + exponent. + (_gcry_rsa_generate): Ditto. + * elgamal.c (_gcry_elg_generate): Must add an dummy argument + instead of USE_E. + * dsa.c (_gcry_dsa_generate): Ditto. + * pubkey.c (dummy_generate): Ditto. + (pubkey_generate): Add USE_E arg and pass it down. + (gcry_pk_genkey): Detect "rsa-use-e" parameter and pass it to generate. + + * pubkey.c (sexp_to_enc): New arg RET_MODERN. + (gcry_pk_decrypt): Make use of it to return a real S-expression. + Return better error codes. + (gcry_pk_verify): Return better error codes. + +2003-01-21 Werner Koch + + * random.c (gcry_random_add_bytes): Add QUALITY argument, let + function return an error code and disable its core for now. + +2003-01-21 Timo Schulz + + * random.c (gcry_random_add_bytes): New. Function to add external + random to the pool. + +2003-01-20 Simon Josefsson + + * crc.c: New. + * Makefile.am (EXTRA_PROGRAMS, EXTRA_crc_SOURCES): Add crc.c. + * md.c (gcry_md_get_algo_dlen): Add values for CRC. + +2003-01-20 Werner Koch + + * sha256.c: New. + * bithelp.h (ror): New. + * Makfile.am: Add sha256.c. + * md.c (oid_table): Add values for SHA256 et al. + (gcry_md_get_algo_dlen): Likewise + +2003-01-20 Werner Koch + + * pubkey.c (gcry_pk_get_keygrip): Implemented keygrips for DSA + and ElGamal. + +2003-01-17 Werner Koch + + * cipher.c (gcry_cipher_encrypt): Reworked so that the output will + never contain the plaintext even if the caller did not checked the + return value. + + * md.c (gcry_md_get_algo): Changed error code to GCRYERR_GENERAL + because we don't have an invalid md algo but no algorithm enabled. + + * pubkey.c (gcry_pk_genkey): Changed error code for bounds check + of table parameters to GCRYERR_INTERNAL. + + * md.c (gcry_md_open): Partly reverted Timo's change from + 2002-10-10 by removing the check for the algorithm. An algorithm + of 0 is allowed and anyway we should not double check it or check + it using a different function. Also fixed the flags check. + + * pubkey.c (gcry_pk_encrypt): Make sure that R_CIPH points to NULL + on error. + (gcry_pk_decrypt): Ditto for R_PLAIN. + (gcry_pk_sign): Ditto for R_SIG. + (gcry_pk_genkey): Ditto for R_KEY. + +2003-01-16 Werner Koch + + * md.c (gcry_md_write): Changed 2nd argument type to void*. + (gcry_md_hash_buffer): Changed type of boths buffers to void*. + (gcry_md_setkey): Changed 2nd argument type to void*. + +2003-01-15 Werner Koch + + * pubkey.c (sexp_data_to_mpi): New. This handles pkcs1 padding. + (gcry_pk_sign, gcry_pk_verify): Use it here. + (gcry_pk_encrypt): And here. + (pubkey_verify): Add debug code. + (sexp_to_enc): Handle flags in the input and return the pkcs1 flag + in a new parameter. + (gcry_pk_decrypt): Prepare for future pkcs1 handling. + +2002-12-19 Werner Koch + + * random.c (_gcry_random_initialize): New. + +2002-12-16 Werner Koch + + * cipher.c: Added a Teletrust specific OID for 3DES. + +2002-12-12 Werner Koch + + * md.c: Added another oddball OIW OID (sha-1WithRSAEncryption). + +2002-11-23 Werner Koch + + * md.c (load_digest_module): Enlarged checked_algos bitmap. + * md4.c (func_table): Fixed entry for md4. + Both by Simon Josephson. + (transform): Copy data to get the alignment straight. Tested only + on i386. + +2002-11-10 Simon Josefsson + + * cipher.c (gcry_cipher_open): Don't reject CTS flag. + (do_cbc_encrypt, do_cbc_decrypt, cipher_encrypt) + (gcry_cipher_encrypt, cipher_decrypt) + (gcry_cipher_decrypt): Support CTS flag. + (gcry_cipher_ctl): Toggle CTS flag. + +2002-11-10 Werner Koch + + * md4.c: New. By Simon Josefsson. + * Makefile.am (EXTRA_PROGRAMS): Add md4.c. + * md.c (oid_table,gcry_md_get_algo_dlen): MD4 support. + +2002-10-14 Werner Koch + + * arcfour.c (do_encrypt_stream): Don't use increment op when + assigning to the same variable. + +2002-10-10 Timo Schulz + + * pubkey.c (gcry_pk_genkey): Check boundaries. + + * md.c (gcry_md_open): Check that algo is available and only + valid flag values are used. + (gcry_md_get_algo): Add error handling. + +2002-09-26 Werner Koch + + * md.c: Include an OID for TIGER. + * tiger.c (tiger_get_info): Use a regular OID. + +2002-09-17 Werner Koch + + * random.c: Replaced mutex.h by the new ath.h. Changed all calls. + +2002-09-16 Werner Koch + + * arcfour.c (do_encrypt_stream): Use register modifier and modulo. + According to Nikos Mavroyanopoulos this increases perfromace on + i386 system noticable. And I always tought gcc is clever enough. + * md5.c (transform): Use register modifier. + * rmd160.c (transform): Ditto. + * sha1.c (transform): Ditto. We hope that there are 6 free registers. + * random.c (gcry_randomize): Rewrote to avoid malloc calls. + + * rndlinux.c (gather_random): Replaced remaining fprintfs by log_*. + * arcfour.c (do_arcfour_setkey): Ditto. + * twofish.c (do_twofish_setkey): Ditto. + * rndegd.c (gather_random): Ditto. + * rijndael.c (do_setkey): Ditto. + * random.c (_gcry_random_dump_stats): Ditto. + * primegen.c (_gcry_generate_elg_prime): Ditto. + * des.c (_gcry_des_get_info): Ditto. + * cast5.c (do_cast_setkey): Ditto. + * blowfish.c (do_bf_setkey): Ditto. + +2002-08-26 Werner Koch + + * des.c (weak_keys): Fixed one entry in the table and compared + all entries against the literature. + (selftest): Checksum the weak key table. + +2002-08-21 Werner Koch + + * pubkey.c: Enable keygrip calculation for "openpgp-rsa". + +2002-08-17 Werner Koch + + * cipher.c (setup_cipher_table): Don't overwrite the DES entry + with the entry for DUMMY. + +2002-08-14 Werner Koch + + * des.c (do_des_setkey,do_des_encrypt, do_des_decrypt): New. + (_gcry_des_get_info): Support plain old DES. + * cipher.c (setup_cipher_table): Put DES into the table. + +2002-07-25 Werner Koch + + * rndunix.c (_gcry_rndunix_constructor): Prefixed with _gcry_. + Noted by Stephan Austermuehle. + +2002-07-08 Timo Schulz + + * rndw32.c: Replaced the m_ memory functions with the real + gcry_ functions. Renamed all g10_ prefixed functions to log_. + +2002-06-12 Werner Koch + + * rsa.c (generate): Use e = 65537 for now. + +2002-06-11 Werner Koch + + * pubkey.c (gcry_pk_get_keygrip): Allow a "protected-private-key". + +2002-06-05 Timo Schulz + + * cipher.c (gcry_cipher_encrypt, gcry_cipher_decrypt): + Check that the input size is a multiple of the blocksize. + +2002-05-23 Werner Koch + + * md.c (oid_table): Add an rsadsi OID for MD5. + +2002-05-21 Werner Koch + + * primegen.c, elgamal.c, dsa.c (progress): Do not print anything + by default. Pass an extra identifying string to the callback and + reserved 2 argumenst for current and total counters. Changed the + register function prototype. + +2002-05-17 Werner Koch + + * rndegd.c (rndegd_constructor): Fixed name of register function + and prefixed the function name with _gcry_. + * rndw32.c (rndw32_constructor): Ditto. + * tiger.c (tiger_constructor): Ditto. + + * Makefile.am: Removed all dynamic loading stuff. + * dynload.c: Ditto. Now only used for the constructor system. + +2002-05-15 Werner Koch + + * random.c (gcry_random_bytes,gcry_random_bytes_secure) + (gcry_randomize): Make sure we are initialized. + +2002-05-14 Werner Koch + + Changed license of most files to the LGPL. + +2002-05-02 Werner Koch + + * random.c (_gcry_fast_random_poll): Initialize the module so the + mutex can be used. + + * primegen.c (small_prime_numbers): Moved table from smallprime.c + * smallprime.c: File removed. + + * des.c (leftkey_swap, rightkey_swap, working_memcmp): Made static. + + * cipher.c (gcry_cipher_map_name): Map "RIJNDAEL" to "AES". + * rijndael.c (rijndael_get_info): We do only support a 128 bit + blocksize so it makes sense to change the algorithm strings to + AES. + + * tiger.c (tiger_final): Removed superfluous token pasting operators. + * md5.c (md5_final): Ditto. + +2002-04-30 Werner Koch + + * cipher.c: Fixed list of copyright years. + +2002-03-18 Werner Koch + + * random.c (initialize): Initialize the new pool lock mutex. + (_gcry_fast_random_poll): Add locking and moved main + code out to... + (do_fast_random_poll): new function. + (read_pool): Use the new function here. + (get_random_bytes): Add locking. + (_gcry_update_random_seed_file): Ditto. + +2002-03-11 Werner Koch + + * md.c: Add rsaSignatureWithripemd160 to OID table. + +2002-02-20 Werner Koch + + * sha1.c: Removed a left over comment note. The code has been + rewritten from scratch in 1998. Thanks to Niels Möller for + reporting this misleading comment. + +2002-02-18 Werner Koch + + * rndunix.c (rndunix_constructor): Use the the new prefixed + function name. Reported by Jordi Mallach. + +2002-02-10 Werner Koch + + * random.c (mix_pool): Carry an extra failsafe_digest buffer + around to make the function more robust. + +2002-02-08 Werner Koch + + * random.c (add_randomness): Xor new data into the pool and not + just copy it. This avoids any choosen input attacks which are not + serious in our setting because an outsider won't be able to mix + data in and even then we keep going with a PRNG. Thanks to Stefan + Keller for pointing this out. + +2002-01-04 Werner Koch + + * pubkey.c (gcry_pk_genkey): Do not release skey - it is static. + + * primegen.c (gen_prime): Of course we should use set_bit + and not set_highbit to set the second high bit. + +2001-12-18 Werner Koch + + * rsa.c (generate): Loop until we find the exact modulus size. + Changed the exponent to 41. + (rsa_get_info): s/usage/r_usage/ to avoid shadow warnings. + * primegen.c (gen_prime): Set 2 high order bits for secret primes. + + * Makefile.am (DISTCLEANFILES): Include construct.c. + +2001-12-17 Werner Koch + + * pubkey.c (gcry_pk_get_keygrip): New - experimental. + +2001-12-11 Werner Koch + + * cipher.c: Added OIDs for AES. + (gcry_cipher_mode_from_oid): New. + (gcry_cipher_map_name): Moved OID search code to .. + (search_oid): .. new function. + +2001-12-10 Werner Koch + + * pubkey.c (gcry_pk_encrypt): Find the signature algorithm by name + and not by number. + + * pubkey.c (gcry_pk_encrypt,gcry_pk_decrypt,gcry_pk_sign) + (gcry_pk_verify,gcry_pk_testkey, gcry_pk_genkey) + (gcry_pk_get_nbits): Release the arrays. Noted by Nikos + Mavroyanopoulos. + +2001-12-06 Werner Koch + + * cipher.c (gcry_cipher_map_name): Look also for OIDs prefixed + with "oid." or "OID.". + +2001-12-05 Werner Koch + + * pubkey.c (algo_info_table): Fixed entry for openpgp-rsa. + +2001-11-24 Werner Koch + + * pubkey.c: Added the rsaEncryption OID to the tables. + (sexp_to_key): Add an arg to return the index of the algorithm, + changed all callers. + (gcry_pk_sign): Find the signature algorithm by name and not by + number. + (gcry_pk_get_nbits): Fixed so that we can now really pass a secret + key to get the result. + + * md.c (gcry_md_map_name): Look also for OIDs prefixed with "oid." + or "OID." so that an OID string can be used as an S-Exp token. + +2001-11-20 Werner Koch + + * md.c (gcry_md_map_name): Lookup by OID if the the name begins + with a digit. + (oid_table): New. + +2001-11-16 Werner Koch + + * md.c (gcry_md_info): New operator GCRYCTL_IS_ALGO_ENABLED. + +2001-11-07 Werner Koch + + * md.c (gcry_md_hash_buffer): Close the handle which was left open + for algorithms other than rmd160. + +2001-08-08 Werner Koch + + * rndw32.c (gather_random): Use toolhelp in addition to the NT + gatherer for Windows2000. Suggested by Sami Tolvanen. + + * random.c (read_pool): Fixed length check, this used to be one + byte to strict. Made an assert out of it because the caller has + already made sure that only poolsize bytes are requested. + Reported by Marcus Brinkmann. + +2001-08-03 Werner Koch + + * cipher.c (cipher_encrypt, cipher_decrypt): Prepare to return + errors. We have to change the interface to all ciphers to make + this really work but we should do so to prepare for hardware + encryption modules. + (gcry_cipher_encrypt, gcry_cipher_decrypt): Return the error and + set lasterr. + (gcry_cipher_ctl): Make sure that errors from setkey are returned. + +2001-08-02 Werner Koch + + * rndlinux.c (gather_random): casted a size_t arg to int so that + the format string is correct. Casting is okay here and avoids + translation changes. + + * random.c (fast_random_poll): Do not check the return code of + getrusage. + + * rndunix.c: Add a signal.h header to avoid warnings on Solaris 7 + and 8. + + * tiger.c (print_abc,print_data): Removed. + + * rijndael.c, des.c, blowfish.c, twofish.c, cast5.c, arcfour.c + (burn_stack): New. Add wrappers for most functions to be able to + call burn_stack after the function invocation. This methods seems + to be the most portable way to zeroise the stack used. It does + only work on stack frame based machines but it is highly portable + and has no side effects. Just setting the automatic variables at + the end of a function to zero does not work well because the + compiler will optimize them away - marking them as volatile would + be bad for performance. + * md5.c, sha1.c, rmd160.c, tiger.c (burn_stack): Likewise. + * random.c (burn_stack): New. + (mix_pool): Use it here to burn the stack of the mixblock function. + + * primegen.c (_gcry_generate_elg_prime): Freed q at 3 places. + Thanks to Tommi Komulainen. + + * arcfour.c (arcfour_setkey): Check the minimim keylength against + bytes and not bits. + (selftest): Must reset the key before decryption. + +2001-05-31 Werner Koch + + * sha1.c (sha1_init): Made static. + + Changed all g10_ prefixed function names as well as some mpi_ + function names to cope with the introduced naming changes. + + * md.c (prepare_macpads): Made key const. + +2001-05-28 Werner Koch + + * rndegd.c (gather_random): Removed the use of tty_printf. + +2001-03-29 Werner Koch + + * md5.c (md5_final): Fixed calculation of hashed length. Thanks + to disastry@saiknes.lv for pointing out that it was horrible wrong + for more than 512MB of input. + * sha1.c (sha1_final): Ditto. + * rmd160.c (rmd160_final): Ditto. + * tiger.c (tiger_final): Ditto. + + * blowfish.c (encrypt,do_encrypt): Changed name to do_encrypt to + avoid name clashes with an encrypt function in stdlib.h of + Dynix/PIX. Thanks to Gene Carter. + * elgamal.c (encrypt,do_encrypt): Ditto. + + * twofish.c (gnupgext_enum_func): Use only when when compiled as a + module. + * rijndael.c (gnupgext_enum_func): Ditto. + + * tiger.c (tiger_get_info): Return "TIGER192" and not just + "TIGER". By Edwin Woudt. + + * random.c: Always include time.h - standard requirement. Thanks + to James Troup. + + * rndw32.c: Fixes to the macros. + +2001-01-11 Werner Koch + + * cipher.c (cipher_encrypt,gcry_cipher_encrypt): Use blocksize and + not 8. + +2000-12-19 Werner Koch + + Major change: + Removed all GnuPG stuff and renamed this piece of software + to gcrypt. + +2000-11-14 Werner Koch + + * dsa.c (test_keys): Replaced mpi_alloc by gcry_mpi_new and + mpi_free by gcry_mpi_release. + * elgamal.c (test_keys,generate): Ditto, also for mpi_alloc_secure. + * rsa.c (test_keys,generate,rsa_verify): Ditto. + * primegen.c (generate_elg_prime): Ditto. + (gen_prime): Ditto and removed nlimbs. + + * rsa.c (generate): Allocate 2 more vars in secure memory. + + * Makefile.am (OMIT_DEPENDENCIES): Hack to work around dependency + problems. + +2000-10-09 Werner Koch + + * arcfour.c, arcfour.h: New. + * cipher.c (cipher_encrypt, cipher_decrypt): Add stream mode. + (setup_cipher_table): Add Arcfour. + (gcry_cipher_open): Kludge to allow stream mode. + +Wed Oct 4 13:16:18 CEST 2000 Werner Koch + + * sha1.c (transform): Use rol() macro. Actually this is not needed + for a newer gcc but there are still aoter compilers. + + * rsa.c (test_keys): Use new random function. + + * md.c (gcry_md_setkey): New function to overcome problems with + const conflics. + (gcry_md_ctl): Pass set key to the new functions. + + * rijndael.c: New. + * cipher.c: Add Rijndael support. + +Mon Sep 18 16:35:45 CEST 2000 Werner Koch + + * rndlinux.c (open_device): Loose random device checking. + By Nils Ellmenreich. + + * random.c (fast_random_poll): Check ENOSYS for getrusage. + * rndunix.c: Add 2 sources for QNX. By Sam Roberts. + + * pubkey.c (gcry_pk_algo_info): Add GCRYCTL_GET_ALGO_USAGE. + + * rsa.c: Changed the comment about the patent. + (secret): Speed up by using the CRT. For a 2k keys this + is about 3 times faster. + (stronger_key_check): New but unused code to check the secret key. + * Makefile.am: Included rsa.[ch]. + * pubkey.c: Enabled RSA support. + (pubkey_get_npkey): Removed RSA workaround. + +Mon Jul 31 10:04:47 CEST 2000 Werner Koch + + * pubkey.c: Replaced all gcry_sexp_{car,cdr}_{data,mpi} by the new + gcry_sexp_nth_{data,mpi} functions. + +Tue Jul 25 17:44:15 CEST 2000 Werner Koch + + * pubkey.c (exp_to_key,sexp_to_sig,sexp_to_enc,gcry_pk_encrypt, + gcry_pk_decrypt,gcry_pk_sign,gcry_pk_genkey): Changed to work with + the new S-Exp interface. + +Mon Jul 17 16:35:47 CEST 2000 Werner Koch + + * random.c (gather_faked): Replaced make_timestamp by time(2) again. + +Fri Jul 14 19:38:23 CEST 2000 Werner Koch + + * md.c (gcry_md_ctl): Support GCRYCTL_{START,STOP}_DUMP. + + * Makefile.am: Never compile mingw32 as module. + + * Makefile.am: Tweaked module build and removed libtool + + * Makefile.am: Replaced -O1 by -O. Suggested by Alec Habig. + + * elgamal.c (sign): Removed inactive code. + + * rsa.c, rsa.h: New based on the old module version (only in CVS for now). + * pubkey.c (setup_pubkey_table): Added commented support for RSA. + + * rndunix.c (waitpid): New. For UTS 2.1. All by Dave Dykstra. + (my_popen): Do the FD_CLOEXEC only if it is available + (start_gatherer): Cope with missing _SC_OPEN_MAX + + * rndunix.c: Add some more headers for QNX. By Sam Roberts. + + * rndegd.c (gather_random): Shortcut level 0. + * rndunix.c (gather_random): Ditto. + * rndw32.c (gather_random): Ditto. + + * rndw32.c: Replaced with code from Cryptlib and commented the old stuff. + * rndw32.c: Add some debuging code enabled by an environment variable. + + * random.c (read_seed_file): Binary open for DOSish system + (update_random_seed_file): Ditto. + * random.c [MINGW32]: Include process.h for getpid. + * random.c (fast_random_poll): Add clock_gettime() as fallback for + system which support this POSIX.4 fucntion. By Sam Roberts. + + * random.c (read_seed_file): Removed the S_ISLNK test becuase it + is already covered by !S_ISREG and is not defined in Unixware. + Reported by Dave Dykstra. + (update_random_seed_file): Silently ignore update request when pool + is not filled. + + * random.c (read_seed_file): New. + (set_random_seed_file): New. + (read_pool): Try to read the seeding file. + (update_random_seed_file): New. + + (read_pool): Do an initial extra seeding when level 2 quality random + is requested the first time. This requestes at least POOLSIZE/2 bytes + of entropy. Compined with the seeding file this should make normal + random bytes cheaper and increase the quality of the random bytes + used for key generation. + + * random.c (read_pool): Print a more friendly error message in + cases when too much random is requested in one call. + + * random.c (fast_random_poll): Check whether RUSAGE_SELF is defined; + this is not the case for some ESIX and Unixware, although they have + getrusage(). + + * primegen.c (generate_elg_prime): All primes are now generated with + the lowest random quality level. Because they are public anyway we + don't need stronger random and by this we do not drain the systems + entropy so much. + + * primegen.c (register_primegen_progress): New. + * dsa.c (register_pk_dsa_progress): New. + * elgamal.c (register_pk_elg_progress): New. + + * elgamal.c (wiener_map): New. + (gen_k): Use a much smaller k. + (generate): Calculate the qbits using the wiener map and + choose an x at a size comparable to the one choosen in gen_k + + * rmd160.c (rmd160_get_info): Moved casting to the left side due to a + problem with UTS4.3. Suggested by Dave Dykstra. + * sha1.c (sha1_get_info): Ditto. + * tiger.c (tiger_get_info): Ditto. + * md5.c (md5_get_info): Ditto + * des.c (des_get_info): Ditto. + * blowfish.c (blowfish_get_info): Ditto. + * cast5.c (cast5_get_info): Ditto. + * twofish.c (twofish_get_info): Ditto. + +Fri Mar 24 11:25:45 CET 2000 Werner Koch + + * md.c (md_open): Add hmac arg and allocate space for the pads. + (md_finalize): Add HMAC support. + (md_copy): Ditto. + (md_close): Ditto. + (gcry_md_reset): Ditto. + (gcry_md_ctl): Ditto. + (prepare_macpdas): New. + +Mon Mar 13 19:22:46 CET 2000 Werner Koch + + * md.c (gcry_md_hash_buffer): Add support for the other algorithms. + +Mon Jan 31 16:37:34 CET 2000 Werner Koch + + * genprime.c (generate_elg_prime): Fixed returned factors which never + worked for non-DSA keys. + +Thu Jan 27 18:00:44 CET 2000 Werner Koch + + * pubkey.c (sexp_to_key): Fixed mem leaks in case of errors. + +Mon Jan 24 22:24:38 CET 2000 Werner Koch + + * pubkey.c (gcry_pk_decrypt): Implemented. + (gcry_pk_encrypt): Implemented. + (gcry_pk_testkey): New. + (gcry_pk_genkey): New. + (pubkey_decrypt): Made static. + (pubkey_encrypt): Ditto. + (pubkey_check_secret_key): Ditto. + (pubkey_generate): Ditto. + +Mon Jan 24 13:04:28 CET 2000 Werner Koch + + * pubkey.c (pubkey_nbits): Removed and replaced by ... + (gcry_pk_get_nbits): this new one. + +Wed Dec 8 21:58:32 CET 1999 Werner Koch + + * dsa.c: s/mpi_powm/gcry_mpi_powm/g + * elgamal.c: Ditto. + * primegen.c: Ditto. + + * : Replaced g10_opt_verbose by g10_log_verbosity(). + + * Makefile.am (INCLUDES): removed intl, add ../gcrypt + +Fri Nov 19 17:15:20 CET 1999 Werner Koch + + * dynload.c (cmp_filenames): New to replaced compare_filename() in + module. + (register_cipher_extension): Removed the tilde expansion stuff. + * rndeg.c (my_make_filename): New. + + * : Replaced header util.h by g10lib.h + + * random.c (gather_faked): Replaced make_timestamp by time(2). + Disabled wrning printed with tty_printf. + * rndlinux.c (gather_random): Always use fprintf instead of tty_xxx; + this should be replaced by a callback function. + + * primegen.c (gen_prime): Use gcry_mpi_randomize. + (is_prime): Ditto. + * elgamal.c (test_keys): Ditto. + * dsa.c (test_keys): Ditto. + + * cipher.c (gcry_cipher_close): Die on invalid handle. + +Mon Nov 15 21:36:02 CET 1999 Werner Koch + + * elgamal.c (gen_k): Use the new random API. + (generate): Ditto. + * dsa.c (gen_k): Ditto. + (generate): Ditto. + +Sat Nov 13 17:44:23 CET 1999 Werner Koch + + * pubkey.c (disable_pubkey_algo): Made static. + (gcry_pk_ctl): New. + + * random.c (get_random_bits): Renamed to ... + (get_random_bytes): ... this and made static. + (gcry_random_bytes): New. + (gcry_random_bytes_secure): New. + (randomize_buffer): Renamed to ... + (gcry_randomize): ...this. + + * md.c (gcry_md_hash_buffer): New. + + * pubkey.c (gcry_pk_algo_info): 4 new commands. + (pubkey_get_npkey): Made static. + (pubkey_get_nskey): Made static. + (pubkey_get_nsig): Made static. + (pubkey_get_nenc): Made static. + + * pubkey.c: Removed all G10ERR_xxx. + * cipher.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_CIPHER_ALGO. + * md.c: Changed all GCRYERR_INV_ALGO to GCRYERR_INV_MD_ALGO. + * cast5.c (cast_setkey): Changed errocodes to GCRYERR_xxx. + * blowfish.c: Ditto. + * des.c: Ditto. + * twofish.c: Ditto. + * dsa.c: Ditto. + * elgamal.c: Ditto. + + * g10c.c: Removed + + * cipher.c (gcry_cipher_open): Replaced alloc functions and return NULL + if we are out of core. + * dynload.c: Replaced all memory allocation functions. + * md.c: Ditto. + * primegen.c: Ditto. + * pubkey.c: Ditto. + * random.c: Ditto. + * rndw32.c: Ditto. + * elgamal.c: Ditto. + * dsa.c: Ditto. + +Tue Oct 26 14:10:21 CEST 1999 Werner Koch + + * elgamal.c (sign): Hugh found strange code here. Replaced by BUG(). + + * cipher.c: Merged with gcrypt/symapi.c. + + * pubkey.c (string_to_pubkey_algo): Renamed function to ... + (gcry_pk_map_name): ... this. + (pubkey_algo_to_string): Renamed function to ... + (gcry_pk_algo_name): ... this. + (gcry_pk_algo_info): New. + * pubkey.c: Merged with gcrypt/pkapi.c. + + * md.c (md_reset): Clear finalized; thanks to Ulf Moeller for + fixing this bug. + + * md.c: Merged with gcrypt/mdapi.c + +Wed Sep 15 14:39:59 CEST 1999 Michael Roth + + * des.c: Various speed improvements: One bit pre rotation + trick after initial permutation (Richard Outerbridge). + Finished test of SSLeay Tripple-DES patterns. + +Wed Sep 15 16:22:17 CEST 1999 Werner Koch + + * rndw32.c: New. + +Mon Sep 13 10:51:29 CEST 1999 Werner Koch + + * bithelp.h: New. + * rmd160.h, sha1.h, md5.h: Use the rol macro from bithelp.h + +Tue Sep 7 16:23:36 CEST 1999 Werner Koch + + * Makefile.am: Fixed seds for latest egcc. By Ollivier Robert. + +Mon Sep 6 19:59:08 CEST 1999 Werner Koch + + * des.c (selftest): Add some testpattern + +Mon Aug 30 20:38:33 CEST 1999 Werner Koch + + * cipher.c (do_cbc_encrypt): Fixed serious bug occuring when not using + in place encryption. Pointed out by Frank Stajano. + +Mon Jul 26 09:34:46 CEST 1999 Werner Koch + + * md5.c (md5_final): Fix for a SCO cpp bug. + +Thu Jul 15 10:15:35 CEST 1999 Werner Koch + + * elgamal.c (elg_check_secret_key,elg_encrypt + elg_decrypt,elg_sign,elg_verify): Sanity check on the args. + * dsa.c (dsa_check_secret_key,dsa_sign,dsa_verify): Ditto. + + * pubkey.c (disable_pubkey_algo): New. + (check_pubkey_algo2): Look at disabled algo table. + * cipher.c (disable_cipher_algo): New. + (check_cipher_algo): Look at disabled algo table. + +Wed Jul 7 13:08:40 CEST 1999 Werner Koch + + * Makefile.am: Support for libtool. + +Fri Jul 2 11:45:54 CEST 1999 Werner Koch + + * dsa.c (gen_k): Changed algorithm to consume less random bytes + * elgamal.c (gen_k): Ditto. + + * random.c (random_dump_stats): New. + +Thu Jul 1 12:47:31 CEST 1999 Werner Koch + + * primegen.c, elgamal.c, dsa.c (progess): New and replaced all + fputc with a call to this function. + +Sat Jun 26 12:15:59 CEST 1999 Werner Koch + + * rndegd.c (do_write): s/ssize_t/int/ due to SunOS 4.1 probs. + + * cipher.c (do_cbc_encrypt, do_cbc_decrypt): New. + + * dynload.c (HAVE_DL_SHL_LOAD): Map hpux API to dlopen (Dave Dykstra). + * Makefile.am (install-exec-hook): Removed. + +Sun May 23 14:20:22 CEST 1999 Werner Koch + + * cipher.c (setup_cipher_table): Enable Twofish + + * random.c (fast_random_poll): Disable use of times() for mingw32. + +Mon May 17 21:54:43 CEST 1999 Werner Koch + + * dynload.c (register_internal_cipher_extension): Minor init fix. + +Tue May 4 15:47:53 CEST 1999 Werner Koch + + * primegen.c (gen_prime): Readded the Fermat test. Fixed the bug + that we didn't correct for step when passing the prime to the + Rabin-Miller test which led to bad performance (Stefan Keller). + (check_prime): Add a first Fermat test. + +Sun Apr 18 10:11:28 CEST 1999 Werner Koch + + * cipher.c (cipher_setiv): Add ivlen arg, changed all callers. + + * random.c (randomize_buffer): alway use secure memory because + we can't use m_is_secure() on a statically allocated buffer. + + * twofish.c: Replaced some macros by a loop to reduce text size. + * Makefile.am (twofish): No more need for sed editing. + +Fri Apr 9 12:26:25 CEST 1999 Werner Koch + + * cipher.c (cipher_open): Reversed the changes for AUTO_CFB. + + * blowfish.c: Dropped the Blowfish 160 mode. + * cipher.c (cipher_open): Ditto. + (setup_cipher_table): Ditto. And removed support of twofish128 + +Wed Apr 7 20:51:39 CEST 1999 Werner Koch + + * random.c (get_random_bits): Can now handle requests > POOLSIZE + + * cipher.c (cipher_open): Now uses standard CFB for automode if + the blocksize is gt 8 (according to rfc2440). + + * twofish.c: Applied Matthew Skala's patches for 256 bit key. + +Tue Apr 6 19:58:12 CEST 1999 Werner Koch + + * random.c (get_random_bits): Can now handle requests > POOLSIZE + + * cipher.c (cipher_open): Now uses standard CFB for automode if + the blocksize is gt 8 (according to rfc2440). + +Sat Mar 20 11:44:21 CET 1999 Werner Koch + + * rndlinux.c (tty_printf) [IS_MODULE]: Removed. + + * rndegd.c (gather_random): Some fixes. + +Wed Mar 17 13:09:03 CET 1999 Werner Koch + + * rndegd.c (do_read): New. + (gather_random): Changed the implementation. + +Mon Mar 8 20:47:17 CET 1999 Werner Koch + + * dynload.c (DLSYM_NEEDS_UNDERSCORE): Renamed. + +Fri Feb 26 17:55:41 CET 1999 Werner Koch + + * md.c: Nearly a total rewrote. + +Wed Feb 24 11:07:27 CET 1999 Werner Koch + + * cipher.c (context): Fixed alignment + * md.c: Ditto. + + * rndegd.c: New + +Mon Feb 22 20:04:00 CET 1999 Werner Koch + + * rndegd.c: New. + +Wed Feb 10 17:15:39 CET 1999 Werner Koch + + * Makefile.am: Modules are now figured out by configure + * construct.c: New. Generated by configure. Changed all modules + to work with that. + * sha1.h: Removed. + * md5.h: Removed. + + * twofish.c: Changed interface to allow Twofish/256 + + * rndunix.c (start_gatherer): Die on SIGPIPE. + +Wed Jan 20 18:59:49 CET 1999 Werner Koch + + * rndunix.c (gather_random): Fix to avoid infinite loop. + +Sun Jan 17 11:04:33 CET 1999 Werner Koch + + * des.c (is_weak_key): Replace system memcmp due to bugs + in SunOS's memcmp. + (des_get_info): Return error on failed selftest. + * twofish.c (twofish_setkey): Return error on failed selftest or + invalid keylength. + * cast5.c (cast_setkey): Ditto. + * blowfish.c (bf_setkey): Return error on failed selftest. + +Tue Jan 12 11:17:18 CET 1999 Werner Koch + + * random.c (random_is_faked): New. + + * tiger.c: Only compile if we have the u64 type + +Sat Jan 9 16:02:23 CET 1999 Werner Koch + + * rndunix.c (gather_random): check for setuid. + + * Makefile.am: Add a way to staically link random modules + +Thu Jan 7 18:00:58 CET 1999 Werner Koch + + * md.c (md_stop_debug): Do a flush first. + (md_open): size of buffer now depends on the secure parameter + +Sun Jan 3 15:28:44 CET 1999 Werner Koch + + * rndunix.c (start_gatherer): Fixed stupid ==/= bug + +1998-12-31 Geoff Keating + + * des.c (is_weak_key): Rewrite loop end condition. + +Tue Dec 29 14:41:47 CET 1998 Werner Koch + + * random.c: add unistd.h for getpid(). + (RAND_MAX): Fallback value for Sun. + +Wed Dec 23 17:12:24 CET 1998 Werner Koch + + * md.c (md_copy): Reset debug. + +Mon Dec 14 21:18:49 CET 1998 Werner Koch + + * random.c (read_random_source): Changed the interface to the + random gathering function. + (gather_faked): Use new interface. + * dynload.c (dynload_getfnc_fast_random_poll): Ditto. + (dynload_getfnc_gather_random): Ditto. + * rndlinux.c (gather_random): Ditto. + * rndunix.c (gather_random): Ditto. + +Sat Dec 12 18:40:32 CET 1998 Werner Koch + + * dynload.c (SYMBOL_VERSION): New to cope with system which needs + underscores. + + * rndunix.c: Rewrote large parts + +Thu Dec 10 20:15:36 CET 1998 Werner Koch + + * dynload.c (load_extension): increased needed verbosity level. + + * random.c (fast_random_poll): Fallback to a default fast random + poll function. + (read_random_source): Always use the faked entroy gatherer if no + gather module is available. + * rndlinux.c (fast_poll): Removed. + * rndunix.c (fast_poll): Removed. + + +Wed Nov 25 12:33:41 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-*.c: Removed. + * rndlinux.c : New. + * rndunix.c : New. + * random.c : Restructured the interface to the gather modules. + (intialize): Call constructor functions + (read_radnom_source): Moved to here. + * dynload.c (dynload_getfnc_gather_random): New. + (dynload_getfnc_fast_random_poll): New. + (register_internal_cipher_extension): New. + (register_cipher_extension): Support of internal modules. + +Sun Nov 8 17:44:36 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-unix.c (read_random_source): Removed the assert. + +Mon Oct 19 18:34:30 1998 me,,, (wk@tobold) + + * pubkey.c: Hack to allow us to give some info about RSA keys back. + +Thu Oct 15 11:47:57 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dynload.c: Support for DLD + +Wed Oct 14 12:13:07 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-unix.c: Now uses names from configure for /dev/random. + +1998-10-10 SL Baur + + * Makefile.am: fix sed -O substitutions to catch -O6, etc. + +Tue Oct 6 10:06:32 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-unix.c (HAVE_GETTIMEOFDAY): Fixed (was ..GETTIMEOFTIME :-) + * rand-dummy.c (HAVE_GETTIMEOFDAY): Ditto. + +Mon Sep 28 13:23:09 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_digest): New. + (md_reset): New. + +Wed Sep 23 12:27:02 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c (TIGER_CONTEXT): moved "buf", so that it is 64 bit aligned. + +Mon Sep 21 06:22:53 1998 Werner Koch (wk@(none)) + + * des.c: Some patches from Michael. + +Thu Sep 17 19:00:06 1998 Werner Koch (wk@(none)) + + * des.c : New file from Michael Roth + +Mon Sep 14 11:10:55 1998 Werner Koch (wk@(none)) + + * blowfish.c (bf_setkey): Niklas Hernaeus patch to detect weak keys. + +Mon Sep 14 09:19:25 1998 Werner Koch (wk@(none)) + + * dynload.c (RTLD_NOW): Now defined to 1 if it is undefined. + +Mon Sep 7 17:04:33 1998 Werner Koch (wk@(none)) + + * Makefile.am: Fixes to allow a different build directory + +Thu Aug 6 17:25:38 1998 Werner Koch,mobil,,, (wk@tobold) + + * random.c (get_random_byte): Removed and changed all callers + to use get_random_bits() + +Mon Jul 27 10:30:22 1998 Werner Koch (wk@(none)) + + * cipher.c : Support for other blocksizes + (cipher_get_blocksize): New. + * twofish.c: New. + * Makefile.am: Add twofish module. + +Mon Jul 13 21:30:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (read_pool): Simple alloc if secure_alloc is not set. + (get_random_bits): Ditto. + +Thu Jul 9 13:01:14 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dynload.c (load_extension): Function now nbails out if + the program is run setuid. + +Wed Jul 8 18:58:23 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (rmd160_hash_buffer): New. + +Thu Jul 2 10:50:30 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c (cipher_open): algos >=100 use standard CFB + +Thu Jun 25 11:18:25 1998 Werner Koch (wk@isil.d.shuttle.de) + + * Makefile.am: Support for extensions + +Thu Jun 18 12:09:38 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (mix_pool): simpler handling for level 0 + +Mon Jun 15 14:40:48 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c: Removed from dist, will reappear as dynload module + +Sat Jun 13 14:16:57 1998 Werner Koch (wk@isil.d.shuttle.de) + + * pubkey.c: Major changes to allow extensions. Changed the inteface + of all public key ciphers and added the ability to load extensions + on demand. + + * misc.c: Removed. + +Wed Jun 10 07:52:08 1998 Werner Koch,mobil,,, (wk@tobold) + + * dynload.c: New. + * cipher.c: Major changes to allow extensions. + +Mon Jun 8 22:43:00 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c: Major internal chnages to support extensions. + * blowfish.c (blowfish_get_info): New and made all internal + functions static, changed heder. + * cast5.c (cast5_get_info): Likewise. + +Mon Jun 8 12:27:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c (transform): Fix for big endian + + * cipher.c (do_cfb_decrypt): Big endian fix. + +Fri May 22 07:30:39 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_get_oid): Add a new one for TIGER. + +Thu May 21 13:24:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c: Add support for a dummy cipher + +Thu May 14 15:40:36 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (transform): fixed sigbus - I should better + add Christian von Roques's new implemenation of rmd160_write. + +Fri May 8 18:07:44 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rand-internal.h, rand-unix.c, rand-w32.c, rand_dummy.c: New + * random.c: Moved system specific functions to rand-****.c + +Fri May 8 14:01:17 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (fast_random_poll): add call to gethrtime. + +Tue May 5 21:28:55 1998 Werner Koch (wk@isil.d.shuttle.de) + + * elgamal.c (elg_generate): choosing x was not correct, could + yield 6 bytes which are not from the random pool, tsss, tsss.. + +Tue May 5 14:09:06 1998 Werner Koch (wk@isil.d.shuttle.de) + + * primegen.c (generate_elg_prime): Add arg mode, changed all + callers and implemented mode 1. + +Mon Apr 27 14:41:58 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c (cipher_get_keylen): New. + +Sun Apr 26 14:44:52 1998 Werner Koch (wk@isil.d.shuttle.de) + + * tiger.c, tiger.h: New. + +Wed Apr 8 14:57:11 1998 Werner Koch (wk@isil.d.shuttle.de) + + * misc.c (check_pubkey_algo2): New. + +Tue Apr 7 18:46:49 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cipher.c: New + * misc.c (check_cipher_algo): Moved to cipher.c + * cast5.c: Moved many functions to cipher.c + * blowfish.c: Likewise. + +Sat Apr 4 19:52:08 1998 Werner Koch (wk@isil.d.shuttle.de) + + * cast5.c: Implemented and tested. + +Wed Apr 1 16:38:27 1998 Werner Koch (wk@isil.d.shuttle.de) + + * elgamal.c (elg_generate): Faster generation of x in some cases. + +Thu Mar 19 13:54:48 1998 Werner Koch (wk@isil.d.shuttle.de) + + * blowfish.c (blowfish_decode_cfb): changed XOR operation + (blowfish_encode_cfb): Ditto. + +Thu Mar 12 14:04:05 1998 Werner Koch (wk@isil.d.shuttle.de) + + * sha1.c (transform): Rewrote + + * blowfish.c (encrypt): Unrolled for rounds == 16 + (decrypt): Ditto. + +Tue Mar 10 16:32:08 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (transform): Unrolled the loop. + +Tue Mar 10 13:05:14 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (read_pool): Add pool_balance stuff. + (get_random_bits): New. + + * elgamal.c (elg_generate): Now uses get_random_bits to generate x. + + +Tue Mar 10 11:33:51 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_digest_length): New. + +Tue Mar 10 11:27:41 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dsa.c (dsa_verify): Works. + +Mon Mar 9 12:59:08 1998 Werner Koch (wk@isil.d.shuttle.de) + + * dsa.c, dsa.h: Removed some unused code. + +Wed Mar 4 10:39:22 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_open): Add call to fast_random_poll. + blowfish.c (blowfish_setkey): Ditto. + +Tue Mar 3 13:32:54 1998 Werner Koch (wk@isil.d.shuttle.de) + + * rmd160.c (rmd160_mixblock): New. + * random.c: Restructured to start with a new RNG implementation. + * random.h: New. + +Mon Mar 2 19:21:46 1998 Werner Koch (wk@isil.d.shuttle.de) + + * gost.c, gost.h: Removed because they did only contain trash. + +Sun Mar 1 16:42:29 1998 Werner Koch (wk@isil.d.shuttle.de) + + * random.c (fill_buffer): removed error message if n == -1. + +Fri Feb 27 16:39:34 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c (md_enable): No init if called twice. + +Thu Feb 26 07:57:02 1998 Werner Koch (wk@isil.d.shuttle.de) + + * primegen.c (generate_elg_prime): Changed the progress printing. + (gen_prime): Ditto. + +Tue Feb 24 12:28:42 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md5.c, md.5 : Replaced by a modified version of md5.c from + GNU textutils 1.22. + +Wed Feb 18 14:08:30 1998 Werner Koch (wk@isil.d.shuttle.de) + + * md.c, md.h : New debugging support + +Mon Feb 16 10:08:47 1998 Werner Koch (wk@isil.d.shuttle.de) + + * misc.c (cipher_algo_to_string): New + (pubkey_algo_to_string): New. + (digest_algo_to_string): New. + + + Copyright 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006 + 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. + + This file is free software; as a special exception the author gives + unlimited permission to copy and/or distribute it, with or without + modifications, as long as this notice is preserved. + + This file is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY, to the extent permitted by law; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + +Local Variables: +buffer-read-only: t +End: diff --git a/comm/third_party/libgcrypt/cipher/Makefile.am b/comm/third_party/libgcrypt/cipher/Makefile.am new file mode 100644 index 0000000000..d644005634 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/Makefile.am @@ -0,0 +1,258 @@ +# Makefile for cipher modules +# Copyright (C) 1998, 1999, 2000, 2001, 2002, +# 2003, 2009 Free Software Foundation, Inc. +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see . + +# Process this file with automake to produce Makefile.in + +# Need to include ../src in addition to top_srcdir because gcrypt.h is +# a built header. +AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi +AM_CFLAGS = $(GPG_ERROR_CFLAGS) + +AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) + +EXTRA_DIST = gost-s-box.c + +CLEANFILES = gost-s-box +DISTCLEANFILES = gost-sb.h + +noinst_LTLIBRARIES = libcipher.la + +GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ + @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ + +libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) +libcipher_la_LIBADD = $(GCRYPT_MODULES) + +libcipher_la_SOURCES = \ + cipher.c cipher-internal.h \ + cipher-cbc.c \ + cipher-cfb.c \ + cipher-ofb.c \ + cipher-ctr.c \ + cipher-aeswrap.c \ + cipher-ccm.c \ + cipher-cmac.c \ + cipher-gcm.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ + cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ + cipher-poly1305.c \ + cipher-ocb.c \ + cipher-xts.c \ + cipher-eax.c \ + cipher-selftest.c cipher-selftest.h \ + pubkey.c pubkey-internal.h pubkey-util.c \ + md.c \ + mac.c mac-internal.h \ + mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ + poly1305.c poly1305-internal.h \ + poly1305-s390x.S \ + kdf.c kdf-internal.h \ + bithelp.h \ + bufhelp.h \ + primegen.c \ + hash-common.c hash-common.h \ + dsa-common.c rsa-common.c \ + sha1.h + +EXTRA_libcipher_la_SOURCES = \ + asm-common-aarch64.h \ + asm-common-amd64.h \ + asm-common-s390x.h \ + asm-inline-s390x.h \ + asm-poly1305-aarch64.h \ + asm-poly1305-amd64.h \ + asm-poly1305-s390x.h \ + arcfour.c arcfour-amd64.S \ + blowfish.c blowfish-amd64.S blowfish-arm.S \ + cast5.c cast5-amd64.S cast5-arm.S \ + chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ + chacha20-armv7-neon.S chacha20-aarch64.S \ + chacha20-ppc.c chacha20-s390x.S \ + crc.c crc-intel-pclmul.c crc-armv8-ce.c \ + crc-armv8-aarch64-ce.S \ + crc-ppc.c \ + des.c des-amd64.S \ + dsa.c \ + elgamal.c \ + ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ + ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ + idea.c \ + gost28147.c gost.h \ + gostr3411-94.c \ + md4.c \ + md5.c \ + rijndael.c rijndael-internal.h rijndael-tables.h \ + rijndael-aesni.c rijndael-padlock.c \ + rijndael-amd64.S rijndael-arm.S \ + rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ + rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ + rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ + rijndael-ppc.c rijndael-ppc9le.c \ + rijndael-ppc-common.h rijndael-ppc-functions.h \ + rijndael-s390x.c \ + rmd160.c \ + rsa.c \ + salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ + scrypt.c \ + seed.c \ + serpent.c serpent-sse2-amd64.S \ + sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \ + serpent-avx2-amd64.S serpent-armv7-neon.S \ + sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ + sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \ + sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \ + sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ + sha256-avx2-bmi2-amd64.S \ + sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ + sha256-intel-shaext.c sha256-ppc.c \ + sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ + sha512-avx2-bmi2-amd64.S \ + sha512-armv7-neon.S sha512-arm.S \ + sha512-ppc.c sha512-ssse3-i386.c \ + sm3.c \ + keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ + stribog.c \ + tiger.c \ + whirlpool.c whirlpool-sse2-amd64.S \ + twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ + twofish-avx2-amd64.S \ + rfc2268.c \ + camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ + camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ + blake2.c \ + blake2b-amd64-avx2.S blake2s-amd64-avx.S + +gost28147.lo: gost-sb.h +gost-sb.h: gost-s-box + ./gost-s-box $@ + +gost-s-box: gost-s-box.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \ + $(CPPFLAGS_FOR_BUILD)-o $@ $(srcdir)/gost-s-box.c + + +if ENABLE_O_FLAG_MUNGING +o_flag_munging = sed -e 's/-O\([2-9sg][2-9sg]*\)/-O1/' -e 's/-Ofast/-O1/g' +else +o_flag_munging = cat +endif + + +# We need to lower the optimization for this module. +tiger.o: $(srcdir)/tiger.c Makefile + `echo $(COMPILE) -c $< | $(o_flag_munging) ` + +tiger.lo: $(srcdir)/tiger.c Makefile + `echo $(LTCOMPILE) -c $< | $(o_flag_munging) ` + + +# We need to disable instrumentation for these modules as they use cc as +# thin assembly front-end and do not tolerate in-between function calls +# inserted by compiler as those functions may clobber the XMM registers. +if ENABLE_INSTRUMENTATION_MUNGING +instrumentation_munging = sed \ + -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ + -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ + -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' +else +instrumentation_munging = cat +endif + +rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +if ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS +ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto +else +ppc_vcrypto_cflags = +endif + +rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +crc-ppc.o: $(srcdir)/crc-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` diff --git a/comm/third_party/libgcrypt/cipher/Makefile.in b/comm/third_party/libgcrypt/cipher/Makefile.in new file mode 100644 index 0000000000..ceba51b45a --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/Makefile.in @@ -0,0 +1,1445 @@ +# Makefile.in generated by automake 1.16.1 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2018 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# Makefile for cipher modules +# Copyright (C) 1998, 1999, 2000, 2001, 2002, +# 2003, 2009 Free Software Foundation, Inc. +# +# This file is part of Libgcrypt. +# +# Libgcrypt is free software; you can redistribute it and/or modify +# it under the terms of the GNU Lesser General Public License as +# published by the Free Software Foundation; either version 2.1 of +# the License, or (at your option) any later version. +# +# Libgcrypt is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU Lesser General Public License for more details. +# +# You should have received a copy of the GNU Lesser General Public +# License along with this program; if not, see . + +# Process this file with automake to produce Makefile.in + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +subdir = cipher +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cc_for_build.m4 \ + $(top_srcdir)/m4/gpg-error.m4 $(top_srcdir)/m4/libtool.m4 \ + $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \ + $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \ + $(top_srcdir)/m4/noexecstack.m4 $(top_srcdir)/m4/socklen.m4 \ + $(top_srcdir)/acinclude.m4 $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +am__DEPENDENCIES_1 = +am_libcipher_la_OBJECTS = cipher.lo cipher-cbc.lo cipher-cfb.lo \ + cipher-ofb.lo cipher-ctr.lo cipher-aeswrap.lo cipher-ccm.lo \ + cipher-cmac.lo cipher-gcm.lo cipher-gcm-intel-pclmul.lo \ + cipher-gcm-armv7-neon.lo cipher-gcm-armv8-aarch32-ce.lo \ + cipher-gcm-armv8-aarch64-ce.lo cipher-poly1305.lo \ + cipher-ocb.lo cipher-xts.lo cipher-eax.lo cipher-selftest.lo \ + pubkey.lo pubkey-util.lo md.lo mac.lo mac-hmac.lo mac-cmac.lo \ + mac-gmac.lo mac-poly1305.lo poly1305.lo poly1305-s390x.lo \ + kdf.lo primegen.lo hash-common.lo dsa-common.lo rsa-common.lo +libcipher_la_OBJECTS = $(am_libcipher_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir) +depcomp = $(SHELL) $(top_srcdir)/build-aux/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/arcfour-amd64.Plo \ + ./$(DEPDIR)/arcfour.Plo ./$(DEPDIR)/blake2.Plo \ + ./$(DEPDIR)/blake2b-amd64-avx2.Plo \ + ./$(DEPDIR)/blake2s-amd64-avx.Plo \ + ./$(DEPDIR)/blowfish-amd64.Plo ./$(DEPDIR)/blowfish-arm.Plo \ + ./$(DEPDIR)/blowfish.Plo ./$(DEPDIR)/camellia-aarch64.Plo \ + ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo \ + ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo \ + ./$(DEPDIR)/camellia-arm.Plo ./$(DEPDIR)/camellia-glue.Plo \ + ./$(DEPDIR)/camellia.Plo ./$(DEPDIR)/cast5-amd64.Plo \ + ./$(DEPDIR)/cast5-arm.Plo ./$(DEPDIR)/cast5.Plo \ + ./$(DEPDIR)/chacha20-aarch64.Plo \ + ./$(DEPDIR)/chacha20-amd64-avx2.Plo \ + ./$(DEPDIR)/chacha20-amd64-ssse3.Plo \ + ./$(DEPDIR)/chacha20-armv7-neon.Plo \ + ./$(DEPDIR)/chacha20-ppc.Plo ./$(DEPDIR)/chacha20-s390x.Plo \ + ./$(DEPDIR)/chacha20.Plo ./$(DEPDIR)/cipher-aeswrap.Plo \ + ./$(DEPDIR)/cipher-cbc.Plo ./$(DEPDIR)/cipher-ccm.Plo \ + ./$(DEPDIR)/cipher-cfb.Plo ./$(DEPDIR)/cipher-cmac.Plo \ + ./$(DEPDIR)/cipher-ctr.Plo ./$(DEPDIR)/cipher-eax.Plo \ + ./$(DEPDIR)/cipher-gcm-armv7-neon.Plo \ + ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo \ + ./$(DEPDIR)/cipher-gcm.Plo ./$(DEPDIR)/cipher-ocb.Plo \ + ./$(DEPDIR)/cipher-ofb.Plo ./$(DEPDIR)/cipher-poly1305.Plo \ + ./$(DEPDIR)/cipher-selftest.Plo ./$(DEPDIR)/cipher-xts.Plo \ + ./$(DEPDIR)/cipher.Plo ./$(DEPDIR)/crc-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/crc-armv8-ce.Plo ./$(DEPDIR)/crc-intel-pclmul.Plo \ + ./$(DEPDIR)/crc-ppc.Plo ./$(DEPDIR)/crc.Plo \ + ./$(DEPDIR)/des-amd64.Plo ./$(DEPDIR)/des.Plo \ + ./$(DEPDIR)/dsa-common.Plo ./$(DEPDIR)/dsa.Plo \ + ./$(DEPDIR)/ecc-curves.Plo ./$(DEPDIR)/ecc-ecdh.Plo \ + ./$(DEPDIR)/ecc-ecdsa.Plo ./$(DEPDIR)/ecc-eddsa.Plo \ + ./$(DEPDIR)/ecc-gost.Plo ./$(DEPDIR)/ecc-misc.Plo \ + ./$(DEPDIR)/ecc-sm2.Plo ./$(DEPDIR)/ecc.Plo \ + ./$(DEPDIR)/elgamal.Plo ./$(DEPDIR)/gost28147.Plo \ + ./$(DEPDIR)/gostr3411-94.Plo ./$(DEPDIR)/hash-common.Plo \ + ./$(DEPDIR)/idea.Plo ./$(DEPDIR)/kdf.Plo \ + ./$(DEPDIR)/keccak-armv7-neon.Plo ./$(DEPDIR)/keccak.Plo \ + ./$(DEPDIR)/mac-cmac.Plo ./$(DEPDIR)/mac-gmac.Plo \ + ./$(DEPDIR)/mac-hmac.Plo ./$(DEPDIR)/mac-poly1305.Plo \ + ./$(DEPDIR)/mac.Plo ./$(DEPDIR)/md.Plo ./$(DEPDIR)/md4.Plo \ + ./$(DEPDIR)/md5.Plo ./$(DEPDIR)/poly1305-s390x.Plo \ + ./$(DEPDIR)/poly1305.Plo ./$(DEPDIR)/primegen.Plo \ + ./$(DEPDIR)/pubkey-util.Plo ./$(DEPDIR)/pubkey.Plo \ + ./$(DEPDIR)/rfc2268.Plo ./$(DEPDIR)/rijndael-aarch64.Plo \ + ./$(DEPDIR)/rijndael-aesni.Plo ./$(DEPDIR)/rijndael-amd64.Plo \ + ./$(DEPDIR)/rijndael-arm.Plo \ + ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/rijndael-armv8-ce.Plo \ + ./$(DEPDIR)/rijndael-padlock.Plo ./$(DEPDIR)/rijndael-ppc.Plo \ + ./$(DEPDIR)/rijndael-ppc9le.Plo ./$(DEPDIR)/rijndael-s390x.Plo \ + ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo \ + ./$(DEPDIR)/rijndael-ssse3-amd64.Plo ./$(DEPDIR)/rijndael.Plo \ + ./$(DEPDIR)/rmd160.Plo ./$(DEPDIR)/rsa-common.Plo \ + ./$(DEPDIR)/rsa.Plo ./$(DEPDIR)/salsa20-amd64.Plo \ + ./$(DEPDIR)/salsa20-armv7-neon.Plo ./$(DEPDIR)/salsa20.Plo \ + ./$(DEPDIR)/scrypt.Plo ./$(DEPDIR)/seed.Plo \ + ./$(DEPDIR)/serpent-armv7-neon.Plo \ + ./$(DEPDIR)/serpent-avx2-amd64.Plo \ + ./$(DEPDIR)/serpent-sse2-amd64.Plo ./$(DEPDIR)/serpent.Plo \ + ./$(DEPDIR)/sha1-armv7-neon.Plo \ + ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/sha1-avx-amd64.Plo \ + ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha1-intel-shaext.Plo \ + ./$(DEPDIR)/sha1-ssse3-amd64.Plo ./$(DEPDIR)/sha1.Plo \ + ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo \ + ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo \ + ./$(DEPDIR)/sha256-avx-amd64.Plo \ + ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha256-intel-shaext.Plo ./$(DEPDIR)/sha256-ppc.Plo \ + ./$(DEPDIR)/sha256-ssse3-amd64.Plo ./$(DEPDIR)/sha256.Plo \ + ./$(DEPDIR)/sha512-arm.Plo ./$(DEPDIR)/sha512-armv7-neon.Plo \ + ./$(DEPDIR)/sha512-avx-amd64.Plo \ + ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo \ + ./$(DEPDIR)/sha512-ppc.Plo ./$(DEPDIR)/sha512-ssse3-amd64.Plo \ + ./$(DEPDIR)/sha512-ssse3-i386.Plo ./$(DEPDIR)/sha512.Plo \ + ./$(DEPDIR)/sm3.Plo ./$(DEPDIR)/sm4-aesni-avx-amd64.Plo \ + ./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo ./$(DEPDIR)/sm4.Plo \ + ./$(DEPDIR)/stribog.Plo ./$(DEPDIR)/tiger.Plo \ + ./$(DEPDIR)/twofish-aarch64.Plo ./$(DEPDIR)/twofish-amd64.Plo \ + ./$(DEPDIR)/twofish-arm.Plo ./$(DEPDIR)/twofish-avx2-amd64.Plo \ + ./$(DEPDIR)/twofish.Plo ./$(DEPDIR)/whirlpool-sse2-amd64.Plo \ + ./$(DEPDIR)/whirlpool.Plo +am__mv = mv -f +CPPASCOMPILE = $(CCAS) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) \ + $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CCASFLAGS) $(CCASFLAGS) +LTCPPASCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CCAS) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CCASFLAGS) $(CCASFLAGS) +AM_V_CPPAS = $(am__v_CPPAS_@AM_V@) +am__v_CPPAS_ = $(am__v_CPPAS_@AM_DEFAULT_V@) +am__v_CPPAS_0 = @echo " CPPAS " $@; +am__v_CPPAS_1 = +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libcipher_la_SOURCES) $(EXTRA_libcipher_la_SOURCES) +DIST_SOURCES = $(libcipher_la_SOURCES) $(EXTRA_libcipher_la_SOURCES) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +ETAGS = etags +CTAGS = ctags +am__DIST_COMMON = $(srcdir)/Makefile.in \ + $(top_srcdir)/build-aux/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AS = @AS@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BUILD_FILEVERSION = @BUILD_FILEVERSION@ +BUILD_REVISION = @BUILD_REVISION@ +BUILD_TIMESTAMP = @BUILD_TIMESTAMP@ +BUILD_VERSION = @BUILD_VERSION@ +CC = @CC@ +CCAS = @CCAS@ +CCASDEPMODE = @CCASDEPMODE@ +CCASFLAGS = @CCASFLAGS@ +CCDEPMODE = @CCDEPMODE@ +CC_FOR_BUILD = @CC_FOR_BUILD@ +CFLAGS = @CFLAGS@ +CPP = @CPP@ +CPPFLAGS = @CPPFLAGS@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DL_LIBS = @DL_LIBS@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +EXEEXT = @EXEEXT@ +EXEEXT_FOR_BUILD = @EXEEXT_FOR_BUILD@ +FALLBACK_SOCKLEN_T = @FALLBACK_SOCKLEN_T@ +FGREP = @FGREP@ +GCRYPT_CIPHERS = @GCRYPT_CIPHERS@ +GCRYPT_DIGESTS = @GCRYPT_DIGESTS@ +GCRYPT_HWF_MODULES = @GCRYPT_HWF_MODULES@ +GCRYPT_KDFS = @GCRYPT_KDFS@ +GCRYPT_PUBKEY_CIPHERS = @GCRYPT_PUBKEY_CIPHERS@ +GCRYPT_RANDOM = @GCRYPT_RANDOM@ +GPGRT_CONFIG = @GPGRT_CONFIG@ +GPG_ERROR_CFLAGS = @GPG_ERROR_CFLAGS@ +GPG_ERROR_CONFIG = @GPG_ERROR_CONFIG@ +GPG_ERROR_LIBS = @GPG_ERROR_LIBS@ +GPG_ERROR_MT_CFLAGS = @GPG_ERROR_MT_CFLAGS@ +GPG_ERROR_MT_LIBS = @GPG_ERROR_MT_LIBS@ +GREP = @GREP@ +INSERT_SYS_SELECT_H = @INSERT_SYS_SELECT_H@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +LD = @LD@ +LDADD_FOR_TESTS_KLUDGE = @LDADD_FOR_TESTS_KLUDGE@ +LDFLAGS = @LDFLAGS@ +LIBGCRYPT_CIPHERS = @LIBGCRYPT_CIPHERS@ +LIBGCRYPT_CONFIG_API_VERSION = @LIBGCRYPT_CONFIG_API_VERSION@ +LIBGCRYPT_CONFIG_CFLAGS = @LIBGCRYPT_CONFIG_CFLAGS@ +LIBGCRYPT_CONFIG_HOST = @LIBGCRYPT_CONFIG_HOST@ +LIBGCRYPT_CONFIG_LIBS = @LIBGCRYPT_CONFIG_LIBS@ +LIBGCRYPT_DIGESTS = @LIBGCRYPT_DIGESTS@ +LIBGCRYPT_LT_AGE = @LIBGCRYPT_LT_AGE@ +LIBGCRYPT_LT_CURRENT = @LIBGCRYPT_LT_CURRENT@ +LIBGCRYPT_LT_REVISION = @LIBGCRYPT_LT_REVISION@ +LIBGCRYPT_PUBKEY_CIPHERS = @LIBGCRYPT_PUBKEY_CIPHERS@ +LIBGCRYPT_THREAD_MODULES = @LIBGCRYPT_THREAD_MODULES@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MKDIR_P = @MKDIR_P@ +MPI_SFLAGS = @MPI_SFLAGS@ +NM = @NM@ +NMEDIT = @NMEDIT@ +NOEXECSTACK_FLAGS = @NOEXECSTACK_FLAGS@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PTH_CFLAGS = @PTH_CFLAGS@ +PTH_CONFIG = @PTH_CONFIG@ +PTH_LIBS = @PTH_LIBS@ +RANLIB = @RANLIB@ +RC = @RC@ +RUN_LARGE_DATA_TESTS = @RUN_LARGE_DATA_TESTS@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +SYSROOT = @SYSROOT@ +VERSION = @VERSION@ +VERSION_NUMBER = @VERSION_NUMBER@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +emacs_local_vars_begin = @emacs_local_vars_begin@ +emacs_local_vars_end = @emacs_local_vars_end@ +emacs_local_vars_read_only = @emacs_local_vars_read_only@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ + +# Need to include ../src in addition to top_srcdir because gcrypt.h is +# a built header. +AM_CPPFLAGS = -I../src -I$(top_srcdir)/src -I../mpi -I$(top_srcdir)/mpi +AM_CFLAGS = $(GPG_ERROR_CFLAGS) +AM_CCASFLAGS = $(NOEXECSTACK_FLAGS) +EXTRA_DIST = gost-s-box.c +CLEANFILES = gost-s-box +DISTCLEANFILES = gost-sb.h +noinst_LTLIBRARIES = libcipher.la +GCRYPT_MODULES = @GCRYPT_CIPHERS@ @GCRYPT_PUBKEY_CIPHERS@ \ + @GCRYPT_DIGESTS@ @GCRYPT_KDFS@ + +libcipher_la_DEPENDENCIES = $(GCRYPT_MODULES) +libcipher_la_LIBADD = $(GCRYPT_MODULES) +libcipher_la_SOURCES = \ + cipher.c cipher-internal.h \ + cipher-cbc.c \ + cipher-cfb.c \ + cipher-ofb.c \ + cipher-ctr.c \ + cipher-aeswrap.c \ + cipher-ccm.c \ + cipher-cmac.c \ + cipher-gcm.c cipher-gcm-intel-pclmul.c cipher-gcm-armv7-neon.S \ + cipher-gcm-armv8-aarch32-ce.S cipher-gcm-armv8-aarch64-ce.S \ + cipher-poly1305.c \ + cipher-ocb.c \ + cipher-xts.c \ + cipher-eax.c \ + cipher-selftest.c cipher-selftest.h \ + pubkey.c pubkey-internal.h pubkey-util.c \ + md.c \ + mac.c mac-internal.h \ + mac-hmac.c mac-cmac.c mac-gmac.c mac-poly1305.c \ + poly1305.c poly1305-internal.h \ + poly1305-s390x.S \ + kdf.c kdf-internal.h \ + bithelp.h \ + bufhelp.h \ + primegen.c \ + hash-common.c hash-common.h \ + dsa-common.c rsa-common.c \ + sha1.h + +EXTRA_libcipher_la_SOURCES = \ + asm-common-aarch64.h \ + asm-common-amd64.h \ + asm-common-s390x.h \ + asm-inline-s390x.h \ + asm-poly1305-aarch64.h \ + asm-poly1305-amd64.h \ + asm-poly1305-s390x.h \ + arcfour.c arcfour-amd64.S \ + blowfish.c blowfish-amd64.S blowfish-arm.S \ + cast5.c cast5-amd64.S cast5-arm.S \ + chacha20.c chacha20-amd64-ssse3.S chacha20-amd64-avx2.S \ + chacha20-armv7-neon.S chacha20-aarch64.S \ + chacha20-ppc.c chacha20-s390x.S \ + crc.c crc-intel-pclmul.c crc-armv8-ce.c \ + crc-armv8-aarch64-ce.S \ + crc-ppc.c \ + des.c des-amd64.S \ + dsa.c \ + elgamal.c \ + ecc.c ecc-curves.c ecc-misc.c ecc-common.h \ + ecc-ecdh.c ecc-ecdsa.c ecc-eddsa.c ecc-gost.c ecc-sm2.c \ + idea.c \ + gost28147.c gost.h \ + gostr3411-94.c \ + md4.c \ + md5.c \ + rijndael.c rijndael-internal.h rijndael-tables.h \ + rijndael-aesni.c rijndael-padlock.c \ + rijndael-amd64.S rijndael-arm.S \ + rijndael-ssse3-amd64.c rijndael-ssse3-amd64-asm.S \ + rijndael-armv8-ce.c rijndael-armv8-aarch32-ce.S \ + rijndael-armv8-aarch64-ce.S rijndael-aarch64.S \ + rijndael-ppc.c rijndael-ppc9le.c \ + rijndael-ppc-common.h rijndael-ppc-functions.h \ + rijndael-s390x.c \ + rmd160.c \ + rsa.c \ + salsa20.c salsa20-amd64.S salsa20-armv7-neon.S \ + scrypt.c \ + seed.c \ + serpent.c serpent-sse2-amd64.S \ + sm4.c sm4-aesni-avx-amd64.S sm4-aesni-avx2-amd64.S \ + serpent-avx2-amd64.S serpent-armv7-neon.S \ + sha1.c sha1-ssse3-amd64.S sha1-avx-amd64.S sha1-avx-bmi2-amd64.S \ + sha1-avx2-bmi2-amd64.S sha1-armv7-neon.S sha1-armv8-aarch32-ce.S \ + sha1-armv8-aarch64-ce.S sha1-intel-shaext.c \ + sha256.c sha256-ssse3-amd64.S sha256-avx-amd64.S \ + sha256-avx2-bmi2-amd64.S \ + sha256-armv8-aarch32-ce.S sha256-armv8-aarch64-ce.S \ + sha256-intel-shaext.c sha256-ppc.c \ + sha512.c sha512-ssse3-amd64.S sha512-avx-amd64.S \ + sha512-avx2-bmi2-amd64.S \ + sha512-armv7-neon.S sha512-arm.S \ + sha512-ppc.c sha512-ssse3-i386.c \ + sm3.c \ + keccak.c keccak_permute_32.h keccak_permute_64.h keccak-armv7-neon.S \ + stribog.c \ + tiger.c \ + whirlpool.c whirlpool-sse2-amd64.S \ + twofish.c twofish-amd64.S twofish-arm.S twofish-aarch64.S \ + twofish-avx2-amd64.S \ + rfc2268.c \ + camellia.c camellia.h camellia-glue.c camellia-aesni-avx-amd64.S \ + camellia-aesni-avx2-amd64.S camellia-arm.S camellia-aarch64.S \ + blake2.c \ + blake2b-amd64-avx2.S blake2s-amd64-avx.S + +@ENABLE_O_FLAG_MUNGING_FALSE@o_flag_munging = cat +@ENABLE_O_FLAG_MUNGING_TRUE@o_flag_munging = sed -e 's/-O\([2-9sg][2-9sg]*\)/-O1/' -e 's/-Ofast/-O1/g' +@ENABLE_INSTRUMENTATION_MUNGING_FALSE@instrumentation_munging = cat + +# We need to disable instrumentation for these modules as they use cc as +# thin assembly front-end and do not tolerate in-between function calls +# inserted by compiler as those functions may clobber the XMM registers. +@ENABLE_INSTRUMENTATION_MUNGING_TRUE@instrumentation_munging = sed \ +@ENABLE_INSTRUMENTATION_MUNGING_TRUE@ -e 's/-fsanitize[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ +@ENABLE_INSTRUMENTATION_MUNGING_TRUE@ -e 's/-fprofile[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' \ +@ENABLE_INSTRUMENTATION_MUNGING_TRUE@ -e 's/-fcoverage[=,\-][=,a-z,A-Z,0-9,\,,\-]*//g' + +@ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_FALSE@ppc_vcrypto_cflags = +@ENABLE_PPC_VCRYPTO_EXTRA_CFLAGS_TRUE@ppc_vcrypto_cflags = -maltivec -mvsx -mcrypto +all: all-am + +.SUFFIXES: +.SUFFIXES: .S .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu cipher/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu cipher/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libcipher.la: $(libcipher_la_OBJECTS) $(libcipher_la_DEPENDENCIES) $(EXTRA_libcipher_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(libcipher_la_OBJECTS) $(libcipher_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcfour-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/arcfour.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2b-amd64-avx2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blake2s-amd64-avx.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/blowfish.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aesni-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia-glue.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/camellia.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cast5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-amd64-avx2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-amd64-ssse3.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-ppc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20-s390x.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/chacha20.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-aeswrap.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cbc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ccm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cfb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-cmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ctr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-eax.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-gcm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ocb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-ofb.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-poly1305.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-selftest.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher-xts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cipher.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-armv8-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-intel-pclmul.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc-ppc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/crc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/des-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/des.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsa-common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-curves.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-ecdh.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-ecdsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-eddsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-gost.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-misc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc-sm2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ecc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/elgamal.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gost28147.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/gostr3411-94.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/hash-common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/idea.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/kdf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keccak-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/keccak.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-cmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-gmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-hmac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac-poly1305.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/mac.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md4.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/md5.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305-s390x.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/poly1305.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/primegen.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pubkey-util.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pubkey.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rfc2268.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-aesni.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-armv8-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-padlock.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ppc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ppc9le.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-s390x.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rijndael.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rmd160.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rsa-common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rsa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/salsa20.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scrypt.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/seed.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent-sse2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/serpent.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-intel-shaext.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha1.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-intel-shaext.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-ppc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha256.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-armv7-neon.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ppc.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ssse3-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512-ssse3-i386.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sha512.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm3.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm4-aesni-avx-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sm4.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stribog.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/tiger.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-aarch64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-arm.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish-avx2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/twofish.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/whirlpool-sse2-amd64.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/whirlpool.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.S.o: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ $< + +.S.obj: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(CPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(CPPASCOMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.S.lo: +@am__fastdepCCAS_TRUE@ $(AM_V_CPPAS)$(LTCPPASCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCCAS_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCCAS_FALSE@ DEPDIR=$(DEPDIR) $(CCASDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCCAS_FALSE@ $(AM_V_CPPAS@am__nodep@)$(LTCPPASCOMPILE) -c -o $@ $< + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + -test -z "$(DISTCLEANFILES)" || rm -f $(DISTCLEANFILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/arcfour-amd64.Plo + -rm -f ./$(DEPDIR)/arcfour.Plo + -rm -f ./$(DEPDIR)/blake2.Plo + -rm -f ./$(DEPDIR)/blake2b-amd64-avx2.Plo + -rm -f ./$(DEPDIR)/blake2s-amd64-avx.Plo + -rm -f ./$(DEPDIR)/blowfish-amd64.Plo + -rm -f ./$(DEPDIR)/blowfish-arm.Plo + -rm -f ./$(DEPDIR)/blowfish.Plo + -rm -f ./$(DEPDIR)/camellia-aarch64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-arm.Plo + -rm -f ./$(DEPDIR)/camellia-glue.Plo + -rm -f ./$(DEPDIR)/camellia.Plo + -rm -f ./$(DEPDIR)/cast5-amd64.Plo + -rm -f ./$(DEPDIR)/cast5-arm.Plo + -rm -f ./$(DEPDIR)/cast5.Plo + -rm -f ./$(DEPDIR)/chacha20-aarch64.Plo + -rm -f ./$(DEPDIR)/chacha20-amd64-avx2.Plo + -rm -f ./$(DEPDIR)/chacha20-amd64-ssse3.Plo + -rm -f ./$(DEPDIR)/chacha20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/chacha20-ppc.Plo + -rm -f ./$(DEPDIR)/chacha20-s390x.Plo + -rm -f ./$(DEPDIR)/chacha20.Plo + -rm -f ./$(DEPDIR)/cipher-aeswrap.Plo + -rm -f ./$(DEPDIR)/cipher-cbc.Plo + -rm -f ./$(DEPDIR)/cipher-ccm.Plo + -rm -f ./$(DEPDIR)/cipher-cfb.Plo + -rm -f ./$(DEPDIR)/cipher-cmac.Plo + -rm -f ./$(DEPDIR)/cipher-ctr.Plo + -rm -f ./$(DEPDIR)/cipher-eax.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv7-neon.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/cipher-gcm.Plo + -rm -f ./$(DEPDIR)/cipher-ocb.Plo + -rm -f ./$(DEPDIR)/cipher-ofb.Plo + -rm -f ./$(DEPDIR)/cipher-poly1305.Plo + -rm -f ./$(DEPDIR)/cipher-selftest.Plo + -rm -f ./$(DEPDIR)/cipher-xts.Plo + -rm -f ./$(DEPDIR)/cipher.Plo + -rm -f ./$(DEPDIR)/crc-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/crc-armv8-ce.Plo + -rm -f ./$(DEPDIR)/crc-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/crc-ppc.Plo + -rm -f ./$(DEPDIR)/crc.Plo + -rm -f ./$(DEPDIR)/des-amd64.Plo + -rm -f ./$(DEPDIR)/des.Plo + -rm -f ./$(DEPDIR)/dsa-common.Plo + -rm -f ./$(DEPDIR)/dsa.Plo + -rm -f ./$(DEPDIR)/ecc-curves.Plo + -rm -f ./$(DEPDIR)/ecc-ecdh.Plo + -rm -f ./$(DEPDIR)/ecc-ecdsa.Plo + -rm -f ./$(DEPDIR)/ecc-eddsa.Plo + -rm -f ./$(DEPDIR)/ecc-gost.Plo + -rm -f ./$(DEPDIR)/ecc-misc.Plo + -rm -f ./$(DEPDIR)/ecc-sm2.Plo + -rm -f ./$(DEPDIR)/ecc.Plo + -rm -f ./$(DEPDIR)/elgamal.Plo + -rm -f ./$(DEPDIR)/gost28147.Plo + -rm -f ./$(DEPDIR)/gostr3411-94.Plo + -rm -f ./$(DEPDIR)/hash-common.Plo + -rm -f ./$(DEPDIR)/idea.Plo + -rm -f ./$(DEPDIR)/kdf.Plo + -rm -f ./$(DEPDIR)/keccak-armv7-neon.Plo + -rm -f ./$(DEPDIR)/keccak.Plo + -rm -f ./$(DEPDIR)/mac-cmac.Plo + -rm -f ./$(DEPDIR)/mac-gmac.Plo + -rm -f ./$(DEPDIR)/mac-hmac.Plo + -rm -f ./$(DEPDIR)/mac-poly1305.Plo + -rm -f ./$(DEPDIR)/mac.Plo + -rm -f ./$(DEPDIR)/md.Plo + -rm -f ./$(DEPDIR)/md4.Plo + -rm -f ./$(DEPDIR)/md5.Plo + -rm -f ./$(DEPDIR)/poly1305-s390x.Plo + -rm -f ./$(DEPDIR)/poly1305.Plo + -rm -f ./$(DEPDIR)/primegen.Plo + -rm -f ./$(DEPDIR)/pubkey-util.Plo + -rm -f ./$(DEPDIR)/pubkey.Plo + -rm -f ./$(DEPDIR)/rfc2268.Plo + -rm -f ./$(DEPDIR)/rijndael-aarch64.Plo + -rm -f ./$(DEPDIR)/rijndael-aesni.Plo + -rm -f ./$(DEPDIR)/rijndael-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael-arm.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-padlock.Plo + -rm -f ./$(DEPDIR)/rijndael-ppc.Plo + -rm -f ./$(DEPDIR)/rijndael-ppc9le.Plo + -rm -f ./$(DEPDIR)/rijndael-s390x.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael.Plo + -rm -f ./$(DEPDIR)/rmd160.Plo + -rm -f ./$(DEPDIR)/rsa-common.Plo + -rm -f ./$(DEPDIR)/rsa.Plo + -rm -f ./$(DEPDIR)/salsa20-amd64.Plo + -rm -f ./$(DEPDIR)/salsa20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/salsa20.Plo + -rm -f ./$(DEPDIR)/scrypt.Plo + -rm -f ./$(DEPDIR)/seed.Plo + -rm -f ./$(DEPDIR)/serpent-armv7-neon.Plo + -rm -f ./$(DEPDIR)/serpent-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent.Plo + -rm -f ./$(DEPDIR)/sha1-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha1-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-intel-shaext.Plo + -rm -f ./$(DEPDIR)/sha1-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha1.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha256-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-intel-shaext.Plo + -rm -f ./$(DEPDIR)/sha256-ppc.Plo + -rm -f ./$(DEPDIR)/sha256-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha256.Plo + -rm -f ./$(DEPDIR)/sha512-arm.Plo + -rm -f ./$(DEPDIR)/sha512-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha512-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-ppc.Plo + -rm -f ./$(DEPDIR)/sha512-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-ssse3-i386.Plo + -rm -f ./$(DEPDIR)/sha512.Plo + -rm -f ./$(DEPDIR)/sm3.Plo + -rm -f ./$(DEPDIR)/sm4-aesni-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/sm4.Plo + -rm -f ./$(DEPDIR)/stribog.Plo + -rm -f ./$(DEPDIR)/tiger.Plo + -rm -f ./$(DEPDIR)/twofish-aarch64.Plo + -rm -f ./$(DEPDIR)/twofish-amd64.Plo + -rm -f ./$(DEPDIR)/twofish-arm.Plo + -rm -f ./$(DEPDIR)/twofish-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/twofish.Plo + -rm -f ./$(DEPDIR)/whirlpool-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/whirlpool.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/arcfour-amd64.Plo + -rm -f ./$(DEPDIR)/arcfour.Plo + -rm -f ./$(DEPDIR)/blake2.Plo + -rm -f ./$(DEPDIR)/blake2b-amd64-avx2.Plo + -rm -f ./$(DEPDIR)/blake2s-amd64-avx.Plo + -rm -f ./$(DEPDIR)/blowfish-amd64.Plo + -rm -f ./$(DEPDIR)/blowfish-arm.Plo + -rm -f ./$(DEPDIR)/blowfish.Plo + -rm -f ./$(DEPDIR)/camellia-aarch64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-aesni-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/camellia-arm.Plo + -rm -f ./$(DEPDIR)/camellia-glue.Plo + -rm -f ./$(DEPDIR)/camellia.Plo + -rm -f ./$(DEPDIR)/cast5-amd64.Plo + -rm -f ./$(DEPDIR)/cast5-arm.Plo + -rm -f ./$(DEPDIR)/cast5.Plo + -rm -f ./$(DEPDIR)/chacha20-aarch64.Plo + -rm -f ./$(DEPDIR)/chacha20-amd64-avx2.Plo + -rm -f ./$(DEPDIR)/chacha20-amd64-ssse3.Plo + -rm -f ./$(DEPDIR)/chacha20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/chacha20-ppc.Plo + -rm -f ./$(DEPDIR)/chacha20-s390x.Plo + -rm -f ./$(DEPDIR)/chacha20.Plo + -rm -f ./$(DEPDIR)/cipher-aeswrap.Plo + -rm -f ./$(DEPDIR)/cipher-cbc.Plo + -rm -f ./$(DEPDIR)/cipher-ccm.Plo + -rm -f ./$(DEPDIR)/cipher-cfb.Plo + -rm -f ./$(DEPDIR)/cipher-cmac.Plo + -rm -f ./$(DEPDIR)/cipher-ctr.Plo + -rm -f ./$(DEPDIR)/cipher-eax.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv7-neon.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/cipher-gcm-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/cipher-gcm.Plo + -rm -f ./$(DEPDIR)/cipher-ocb.Plo + -rm -f ./$(DEPDIR)/cipher-ofb.Plo + -rm -f ./$(DEPDIR)/cipher-poly1305.Plo + -rm -f ./$(DEPDIR)/cipher-selftest.Plo + -rm -f ./$(DEPDIR)/cipher-xts.Plo + -rm -f ./$(DEPDIR)/cipher.Plo + -rm -f ./$(DEPDIR)/crc-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/crc-armv8-ce.Plo + -rm -f ./$(DEPDIR)/crc-intel-pclmul.Plo + -rm -f ./$(DEPDIR)/crc-ppc.Plo + -rm -f ./$(DEPDIR)/crc.Plo + -rm -f ./$(DEPDIR)/des-amd64.Plo + -rm -f ./$(DEPDIR)/des.Plo + -rm -f ./$(DEPDIR)/dsa-common.Plo + -rm -f ./$(DEPDIR)/dsa.Plo + -rm -f ./$(DEPDIR)/ecc-curves.Plo + -rm -f ./$(DEPDIR)/ecc-ecdh.Plo + -rm -f ./$(DEPDIR)/ecc-ecdsa.Plo + -rm -f ./$(DEPDIR)/ecc-eddsa.Plo + -rm -f ./$(DEPDIR)/ecc-gost.Plo + -rm -f ./$(DEPDIR)/ecc-misc.Plo + -rm -f ./$(DEPDIR)/ecc-sm2.Plo + -rm -f ./$(DEPDIR)/ecc.Plo + -rm -f ./$(DEPDIR)/elgamal.Plo + -rm -f ./$(DEPDIR)/gost28147.Plo + -rm -f ./$(DEPDIR)/gostr3411-94.Plo + -rm -f ./$(DEPDIR)/hash-common.Plo + -rm -f ./$(DEPDIR)/idea.Plo + -rm -f ./$(DEPDIR)/kdf.Plo + -rm -f ./$(DEPDIR)/keccak-armv7-neon.Plo + -rm -f ./$(DEPDIR)/keccak.Plo + -rm -f ./$(DEPDIR)/mac-cmac.Plo + -rm -f ./$(DEPDIR)/mac-gmac.Plo + -rm -f ./$(DEPDIR)/mac-hmac.Plo + -rm -f ./$(DEPDIR)/mac-poly1305.Plo + -rm -f ./$(DEPDIR)/mac.Plo + -rm -f ./$(DEPDIR)/md.Plo + -rm -f ./$(DEPDIR)/md4.Plo + -rm -f ./$(DEPDIR)/md5.Plo + -rm -f ./$(DEPDIR)/poly1305-s390x.Plo + -rm -f ./$(DEPDIR)/poly1305.Plo + -rm -f ./$(DEPDIR)/primegen.Plo + -rm -f ./$(DEPDIR)/pubkey-util.Plo + -rm -f ./$(DEPDIR)/pubkey.Plo + -rm -f ./$(DEPDIR)/rfc2268.Plo + -rm -f ./$(DEPDIR)/rijndael-aarch64.Plo + -rm -f ./$(DEPDIR)/rijndael-aesni.Plo + -rm -f ./$(DEPDIR)/rijndael-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael-arm.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-armv8-ce.Plo + -rm -f ./$(DEPDIR)/rijndael-padlock.Plo + -rm -f ./$(DEPDIR)/rijndael-ppc.Plo + -rm -f ./$(DEPDIR)/rijndael-ppc9le.Plo + -rm -f ./$(DEPDIR)/rijndael-s390x.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64-asm.Plo + -rm -f ./$(DEPDIR)/rijndael-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/rijndael.Plo + -rm -f ./$(DEPDIR)/rmd160.Plo + -rm -f ./$(DEPDIR)/rsa-common.Plo + -rm -f ./$(DEPDIR)/rsa.Plo + -rm -f ./$(DEPDIR)/salsa20-amd64.Plo + -rm -f ./$(DEPDIR)/salsa20-armv7-neon.Plo + -rm -f ./$(DEPDIR)/salsa20.Plo + -rm -f ./$(DEPDIR)/scrypt.Plo + -rm -f ./$(DEPDIR)/seed.Plo + -rm -f ./$(DEPDIR)/serpent-armv7-neon.Plo + -rm -f ./$(DEPDIR)/serpent-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/serpent.Plo + -rm -f ./$(DEPDIR)/sha1-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha1-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha1-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-avx-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha1-intel-shaext.Plo + -rm -f ./$(DEPDIR)/sha1-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha1.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch32-ce.Plo + -rm -f ./$(DEPDIR)/sha256-armv8-aarch64-ce.Plo + -rm -f ./$(DEPDIR)/sha256-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha256-intel-shaext.Plo + -rm -f ./$(DEPDIR)/sha256-ppc.Plo + -rm -f ./$(DEPDIR)/sha256-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha256.Plo + -rm -f ./$(DEPDIR)/sha512-arm.Plo + -rm -f ./$(DEPDIR)/sha512-armv7-neon.Plo + -rm -f ./$(DEPDIR)/sha512-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-avx2-bmi2-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-ppc.Plo + -rm -f ./$(DEPDIR)/sha512-ssse3-amd64.Plo + -rm -f ./$(DEPDIR)/sha512-ssse3-i386.Plo + -rm -f ./$(DEPDIR)/sha512.Plo + -rm -f ./$(DEPDIR)/sm3.Plo + -rm -f ./$(DEPDIR)/sm4-aesni-avx-amd64.Plo + -rm -f ./$(DEPDIR)/sm4-aesni-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/sm4.Plo + -rm -f ./$(DEPDIR)/stribog.Plo + -rm -f ./$(DEPDIR)/tiger.Plo + -rm -f ./$(DEPDIR)/twofish-aarch64.Plo + -rm -f ./$(DEPDIR)/twofish-amd64.Plo + -rm -f ./$(DEPDIR)/twofish-arm.Plo + -rm -f ./$(DEPDIR)/twofish-avx2-amd64.Plo + -rm -f ./$(DEPDIR)/twofish.Plo + -rm -f ./$(DEPDIR)/whirlpool-sse2-amd64.Plo + -rm -f ./$(DEPDIR)/whirlpool.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +gost28147.lo: gost-sb.h +gost-sb.h: gost-s-box + ./gost-s-box $@ + +gost-s-box: gost-s-box.c + $(CC_FOR_BUILD) $(CFLAGS_FOR_BUILD) $(LDFLAGS_FOR_BUILD) \ + $(CPPFLAGS_FOR_BUILD)-o $@ $(srcdir)/gost-s-box.c + +# We need to lower the optimization for this module. +tiger.o: $(srcdir)/tiger.c Makefile + `echo $(COMPILE) -c $< | $(o_flag_munging) ` + +tiger.lo: $(srcdir)/tiger.c Makefile + `echo $(LTCOMPILE) -c $< | $(o_flag_munging) ` + +rijndael-aesni.o: $(srcdir)/rijndael-aesni.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-aesni.lo: $(srcdir)/rijndael-aesni.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ssse3-amd64.o: $(srcdir)/rijndael-ssse3-amd64.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ssse3-amd64.lo: $(srcdir)/rijndael-ssse3-amd64.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +cipher-gcm-intel-pclmul.o: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +cipher-gcm-intel-pclmul.lo: $(srcdir)/cipher-gcm-intel-pclmul.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha1-intel-shaext.o: $(srcdir)/sha1-intel-shaext.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha1-intel-shaext.lo: $(srcdir)/sha1-intel-shaext.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha256-intel-shaext.o: $(srcdir)/sha256-intel-shaext.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha256-intel-shaext.lo: $(srcdir)/sha256-intel-shaext.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +sha256-ssse3-i386.o: $(srcdir)/sha256-ssse3-i386.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +sha256-ssse3-i386.lo: $(srcdir)/sha256-ssse3-i386.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +crc-intel-pclmul.o: $(srcdir)/crc-intel-pclmul.c Makefile + `echo $(COMPILE) -c $< | $(instrumentation_munging) ` + +crc-intel-pclmul.lo: $(srcdir)/crc-intel-pclmul.c Makefile + `echo $(LTCOMPILE) -c $< | $(instrumentation_munging) ` + +rijndael-ppc.o: $(srcdir)/rijndael-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc.lo: $(srcdir)/rijndael-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc9le.o: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +rijndael-ppc9le.lo: $(srcdir)/rijndael-ppc9le.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha256-ppc.o: $(srcdir)/sha256-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha256-ppc.lo: $(srcdir)/sha256-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha512-ppc.o: $(srcdir)/sha512-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +sha512-ppc.lo: $(srcdir)/sha512-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +chacha20-ppc.o: $(srcdir)/chacha20-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +chacha20-ppc.lo: $(srcdir)/chacha20-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +crc-ppc.o: $(srcdir)/crc-ppc.c Makefile + `echo $(COMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +crc-ppc.lo: $(srcdir)/crc-ppc.c Makefile + `echo $(LTCOMPILE) $(ppc_vcrypto_cflags) -c $< | $(instrumentation_munging) ` + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/comm/third_party/libgcrypt/cipher/arcfour-amd64.S b/comm/third_party/libgcrypt/cipher/arcfour-amd64.S new file mode 100644 index 0000000000..221dfeff77 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/arcfour-amd64.S @@ -0,0 +1,108 @@ +/* +** RC4 implementation optimized for AMD64. +** +** Author: Marc Bevand +** Licence: I hereby disclaim the copyright on this code and place it +** in the public domain. +** +** The throughput achieved by this code is about 320 MBytes/sec, on +** a 1.8 GHz AMD Opteron (rev C0) processor. +** +** 2013/12/20 : +** - Integrated to libgcrypt +** - 4.18 cycles/byte on Intel i5-4570 +*/ + +#ifdef __x86_64__ +#include +#if defined(USE_ARCFOUR) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#include "asm-common-amd64.h" + +.text +.align 16 +.globl _gcry_arcfour_amd64 +ELF(.type _gcry_arcfour_amd64,@function) +_gcry_arcfour_amd64: + CFI_STARTPROC() + ENTER_SYSV_FUNC_PARAMS_0_4 + push %rbp + CFI_PUSH(%rbp) + push %rbx + CFI_PUSH(%rbx) + mov %rdi, %rbp # key = ARG(key) + mov %rsi, %rbx # rbx = ARG(len) + mov %rdx, %rsi # in = ARG(in) + mov %rcx, %rdi # out = ARG(out) + mov (4*256)(%rbp), %ecx # x = key->x + mov (4*256+4)(%rbp),%edx # y = key->y + inc %rcx # x++ + and $255, %rcx # x &= 0xff + lea -8(%rbx,%rsi), %rbx # rbx = in+len-8 + mov %rbx, %r9 # tmp = in+len-8 + mov (%rbp,%rcx,4), %eax # tx = d[x] + cmp %rsi, %rbx # cmp in with in+len-8 + jl .Lend # jump if (in+len-8 < in) + +.Lstart: + add $8, %rsi # increment in + add $8, %rdi # increment out + + # generate the next 8 bytes of the rc4 stream into %r8 + mov $8, %r11 # byte counter +1: add %al, %dl # y += tx + mov (%rbp,%rdx,4), %ebx # ty = d[y] + mov %ebx, (%rbp,%rcx,4) # d[x] = ty + add %al, %bl # val = ty + tx + mov %eax, (%rbp,%rdx,4) # d[y] = tx + inc %cl # x++ (NEXT ROUND) + mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND) + shl $8, %r8 + movb (%rbp,%rbx,4), %r8b # val = d[val] + dec %r11b + jnz 1b + + # xor 8 bytes + bswap %r8 + xor -8(%rsi), %r8 + cmp %r9, %rsi # cmp in+len-8 with in + mov %r8, -8(%rdi) + jle .Lstart # jump if (in <= in+len-8) + +.Lend: + add $8, %r9 # tmp = in+len + + # handle the last bytes, one by one +1: cmp %rsi, %r9 # cmp in with in+len + jle .Lfinished # jump if (in+len <= in) + add %al, %dl # y += tx + mov (%rbp,%rdx,4), %ebx # ty = d[y] + mov %ebx, (%rbp,%rcx,4) # d[x] = ty + add %al, %bl # val = ty + tx + mov %eax, (%rbp,%rdx,4) # d[y] = tx + inc %cl # x++ (NEXT ROUND) + mov (%rbp,%rcx,4), %eax # tx = d[x] (NEXT ROUND) + movb (%rbp,%rbx,4), %r8b # val = d[val] + xor (%rsi), %r8b # xor 1 byte + movb %r8b, (%rdi) + inc %rsi # in++ + inc %rdi # out++ + jmp 1b + +.Lfinished: + dec %rcx # x-- + movb %cl, (4*256)(%rbp) # key->y = y + movb %dl, (4*256+4)(%rbp) # key->x = x + pop %rbx + CFI_POP(%rbx) + pop %rbp + CFI_POP(%rbp) + EXIT_SYSV_FUNC + ret + CFI_ENDPROC() +.L__gcry_arcfour_amd64_end: +ELF(.size _gcry_arcfour_amd64,.L__gcry_arcfour_amd64_end-_gcry_arcfour_amd64) + +#endif +#endif diff --git a/comm/third_party/libgcrypt/cipher/arcfour.c b/comm/third_party/libgcrypt/cipher/arcfour.c new file mode 100644 index 0000000000..353de00bd7 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/arcfour.c @@ -0,0 +1,216 @@ +/* arcfour.c - The arcfour stream cipher + * Copyright (C) 2000, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * For a description of the algorithm, see: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. Pages 397 ff. + */ + + +#include +#include +#include +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "cipher-internal.h" + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +static const char *selftest(void); + +#ifdef USE_AMD64_ASM + +typedef struct { + u32 sbox[256]; + u32 idx_i, idx_j; +} ARCFOUR_context; + +void _gcry_arcfour_amd64(void *key, size_t len, const byte *indata, + byte *outdata); + +static void +encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + _gcry_arcfour_amd64 (context, length, inbuf, outbuf ); +} + +#else /*!USE_AMD64_ASM*/ + +typedef struct { + byte sbox[256]; + int idx_i, idx_j; +} ARCFOUR_context; + +static void +do_encrypt_stream( ARCFOUR_context *ctx, + byte *outbuf, const byte *inbuf, size_t length ) +{ +#ifndef __i386__ + register unsigned int i = ctx->idx_i; + register byte j = ctx->idx_j; + register byte *sbox = ctx->sbox; + register byte t, u; + + while ( length-- ) + { + i++; + t = sbox[(byte)i]; + j += t; + u = sbox[j]; + sbox[(byte)i] = u; + u += t; + sbox[j] = t; + *outbuf++ = sbox[u] ^ *inbuf++; + } + + ctx->idx_i = (byte)i; + ctx->idx_j = (byte)j; +#else /*__i386__*/ + /* Old implementation of arcfour is faster on i386 than the version above. + * This is because version above increases register pressure which on i386 + * would push some of the variables to memory/stack. Therefore keep this + * version for i386 to avoid regressing performance. */ + register int i = ctx->idx_i; + register int j = ctx->idx_j; + register byte *sbox = ctx->sbox; + register int t; + + while ( length-- ) + { + i++; + i = i & 255; /* The and-op seems to be faster than the mod-op. */ + j += sbox[i]; + j &= 255; + t = sbox[i]; sbox[i] = sbox[j]; sbox[j] = t; + *outbuf++ = *inbuf++ ^ sbox[(sbox[i] + sbox[j]) & 255]; + } + + ctx->idx_i = i; + ctx->idx_j = j; +#endif +} + +static void +encrypt_stream (void *context, + byte *outbuf, const byte *inbuf, size_t length) +{ + ARCFOUR_context *ctx = (ARCFOUR_context *) context; + do_encrypt_stream (ctx, outbuf, inbuf, length ); + _gcry_burn_stack (64); +} + +#endif /*!USE_AMD64_ASM*/ + + +static gcry_err_code_t +do_arcfour_setkey (void *context, const byte *key, unsigned int keylen) +{ + static int initialized; + static const char* selftest_failed; + int i, j; + byte karr[256]; + ARCFOUR_context *ctx = (ARCFOUR_context *) context; + + if (!initialized ) + { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error ("ARCFOUR selftest failed (%s)\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + if( keylen < 40/8 ) /* we want at least 40 bits */ + return GPG_ERR_INV_KEYLEN; + + ctx->idx_i = ctx->idx_j = 0; + for (i=0; i < 256; i++ ) + ctx->sbox[i] = i; + for (i=j=0; i < 256; i++,j++ ) + { + if (j >= keylen) + j = 0; + karr[i] = key[j]; + } + for (i=j=0; i < 256; i++ ) + { + int t; + j = (j + ctx->sbox[i] + karr[i]) & 255; + t = ctx->sbox[i]; + ctx->sbox[i] = ctx->sbox[j]; + ctx->sbox[j] = t; + } + wipememory( karr, sizeof(karr) ); + + return GPG_ERR_NO_ERROR; +} + +static gcry_err_code_t +arcfour_setkey ( void *context, const byte *key, unsigned int keylen, + cipher_bulk_ops_t *bulk_ops ) +{ + ARCFOUR_context *ctx = (ARCFOUR_context *) context; + gcry_err_code_t rc = do_arcfour_setkey (ctx, key, keylen ); + (void)bulk_ops; + return rc; +} + + +static const char* +selftest(void) +{ + ARCFOUR_context ctx; + byte scratch[16]; + + /* Test vector from Cryptlib labeled there: "from the + State/Commerce Department". */ + static const byte key_1[] = + { 0x61, 0x8A, 0x63, 0xD2, 0xFB }; + static const byte plaintext_1[] = + { 0xDC, 0xEE, 0x4C, 0xF9, 0x2C }; + static const byte ciphertext_1[] = + { 0xF1, 0x38, 0x29, 0xC9, 0xDE }; + + arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL); + encrypt_stream( &ctx, scratch, plaintext_1, sizeof(plaintext_1)); + if ( memcmp (scratch, ciphertext_1, sizeof (ciphertext_1))) + return "Arcfour encryption test 1 failed."; + arcfour_setkey( &ctx, key_1, sizeof(key_1), NULL); + encrypt_stream(&ctx, scratch, scratch, sizeof(plaintext_1)); /* decrypt */ + if ( memcmp (scratch, plaintext_1, sizeof (plaintext_1))) + return "Arcfour decryption test 1 failed."; + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_arcfour = + { + GCRY_CIPHER_ARCFOUR, {0, 0}, + "ARCFOUR", NULL, NULL, 1, 128, sizeof (ARCFOUR_context), + arcfour_setkey, NULL, NULL, encrypt_stream, encrypt_stream, + }; diff --git a/comm/third_party/libgcrypt/cipher/asm-common-aarch64.h b/comm/third_party/libgcrypt/cipher/asm-common-aarch64.h new file mode 100644 index 0000000000..cf0afe1f87 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-common-aarch64.h @@ -0,0 +1,104 @@ +/* asm-common-aarch64.h - Common macros for AArch64 assembly + * + * Copyright (C) 2018 Martin Storsjö + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AARCH64_H +#define GCRY_ASM_COMMON_AARCH64_H + +#include + +#ifdef HAVE_GCC_ASM_ELF_DIRECTIVES +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __APPLE__ +#define GET_DATA_POINTER(reg, name) \ + adrp reg, name@GOTPAGE ; \ + add reg, reg, name@GOTPAGEOFF ; +#elif defined(_WIN32) +#define GET_DATA_POINTER(reg, name) \ + adrp reg, name ; \ + add reg, reg, #:lo12:name ; +#else +#define GET_DATA_POINTER(reg, name) \ + adrp reg, :got:name ; \ + ldr reg, [reg, #:got_lo12:name] ; +#endif + +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +/* CFI directives to emit DWARF stack unwinding information. */ +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_REMEMBER_STATE() .cfi_remember_state +# define CFI_RESTORE_STATE() .cfi_restore_state +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_REGISTER(ro,rn) .cfi_register ro, rn +# define CFI_RESTORE(reg) .cfi_restore reg + +/* CFA expressions are used for pointing CFA and registers to + * SP relative offsets. */ +# define DW_REGNO_SP 31 + +/* Fixed length encoding used for integers for now. */ +# define DW_SLEB128_7BIT(value) \ + 0x00|((value) & 0x7f) +# define DW_SLEB128_28BIT(value) \ + 0x80|((value)&0x7f), \ + 0x80|(((value)>>7)&0x7f), \ + 0x80|(((value)>>14)&0x7f), \ + 0x00|(((value)>>21)&0x7f) + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \ + .cfi_escape \ + 0x0f, /* DW_CFA_def_cfa_expression */ \ + DW_SLEB128_7BIT(11), /* length */ \ + 0x8f, /* DW_OP_breg31, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs), \ + 0x06, /* DW_OP_deref */ \ + 0x23, /* DW_OP_plus_constu */ \ + DW_SLEB128_28BIT((cfa_depth)+8) + +# define CFI_REG_ON_STACK(regno,rsp_offs) \ + .cfi_escape \ + 0x10, /* DW_CFA_expression */ \ + DW_SLEB128_7BIT(regno), \ + DW_SLEB128_7BIT(5), /* length */ \ + 0x8f, /* DW_OP_breg31, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs) + +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_REMEMBER_STATE() +# define CFI_RESTORE_STATE() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_REGISTER(ro,rn) +# define CFI_RESTORE(reg) + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) +# define CFI_REG_ON_STACK(reg,rsp_offs) +#endif + +#endif /* GCRY_ASM_COMMON_AARCH64_H */ diff --git a/comm/third_party/libgcrypt/cipher/asm-common-amd64.h b/comm/third_party/libgcrypt/cipher/asm-common-amd64.h new file mode 100644 index 0000000000..9d4a028a04 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-common-amd64.h @@ -0,0 +1,189 @@ +/* asm-common-amd64.h - Common macros for AMD64 assembly + * + * Copyright (C) 2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_AMD64_H +#define GCRY_ASM_COMMON_AMD64_H + +#include + +#ifdef HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef __PIC__ +# define rRIP (%rip) +#else +# define rRIP +#endif + +#ifdef __PIC__ +# define RIP %rip +#else +# define RIP +#endif + +#ifdef __PIC__ +# define ADD_RIP +rip +#else +# define ADD_RIP +#endif + +#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) || !defined(__PIC__) +# define GET_EXTERN_POINTER(name, reg) movabsq $name, reg +#else +# ifdef __code_model_large__ +# define GET_EXTERN_POINTER(name, reg) \ + pushq %r15; \ + pushq %r14; \ + 1: leaq 1b(%rip), reg; \ + movabsq $_GLOBAL_OFFSET_TABLE_-1b, %r14; \ + movabsq $name@GOT, %r15; \ + addq %r14, reg; \ + popq %r14; \ + movq (reg, %r15), reg; \ + popq %r15; +# else +# define GET_EXTERN_POINTER(name, reg) movq name@GOTPCREL(%rip), reg +# endif +#endif + +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +/* CFI directives to emit DWARF stack unwinding information. */ +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_REMEMBER_STATE() .cfi_remember_state +# define CFI_RESTORE_STATE() .cfi_restore_state +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_REGISTER(ro,rn) .cfi_register ro, rn +# define CFI_RESTORE(reg) .cfi_restore reg + +# define CFI_PUSH(reg) \ + CFI_ADJUST_CFA_OFFSET(8); CFI_REL_OFFSET(reg, 0) +# define CFI_POP(reg) \ + CFI_ADJUST_CFA_OFFSET(-8); CFI_RESTORE(reg) +# define CFI_POP_TMP_REG() \ + CFI_ADJUST_CFA_OFFSET(-8); +# define CFI_LEAVE() \ + CFI_ADJUST_CFA_OFFSET(-8); CFI_DEF_CFA_REGISTER(%rsp) + +/* CFA expressions are used for pointing CFA and registers to + * %rsp relative offsets. */ +# define DW_REGNO_rax 0 +# define DW_REGNO_rdx 1 +# define DW_REGNO_rcx 2 +# define DW_REGNO_rbx 3 +# define DW_REGNO_rsi 4 +# define DW_REGNO_rdi 5 +# define DW_REGNO_rbp 6 +# define DW_REGNO_rsp 7 +# define DW_REGNO_r8 8 +# define DW_REGNO_r9 9 +# define DW_REGNO_r10 10 +# define DW_REGNO_r11 11 +# define DW_REGNO_r12 12 +# define DW_REGNO_r13 13 +# define DW_REGNO_r14 14 +# define DW_REGNO_r15 15 + +# define DW_REGNO(reg) DW_REGNO_ ## reg + +/* Fixed length encoding used for integers for now. */ +# define DW_SLEB128_7BIT(value) \ + 0x00|((value) & 0x7f) +# define DW_SLEB128_28BIT(value) \ + 0x80|((value)&0x7f), \ + 0x80|(((value)>>7)&0x7f), \ + 0x80|(((value)>>14)&0x7f), \ + 0x00|(((value)>>21)&0x7f) + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \ + .cfi_escape \ + 0x0f, /* DW_CFA_def_cfa_expression */ \ + DW_SLEB128_7BIT(11), /* length */ \ + 0x77, /* DW_OP_breg7, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs), \ + 0x06, /* DW_OP_deref */ \ + 0x23, /* DW_OP_plus_constu */ \ + DW_SLEB128_28BIT((cfa_depth)+8) + +# define CFI_REG_ON_STACK(reg,rsp_offs) \ + .cfi_escape \ + 0x10, /* DW_CFA_expression */ \ + DW_SLEB128_7BIT(DW_REGNO(reg)), \ + DW_SLEB128_7BIT(5), /* length */ \ + 0x77, /* DW_OP_breg7, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs) + +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_REMEMBER_STATE() +# define CFI_RESTORE_STATE() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_REGISTER(ro,rn) +# define CFI_RESTORE(reg) + +# define CFI_PUSH(reg) +# define CFI_POP(reg) +# define CFI_POP_TMP_REG() +# define CFI_LEAVE() + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) +# define CFI_REG_ON_STACK(reg,rsp_offs) +#endif + +#ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ENTER_SYSV_FUNC_PARAMS_0_4 \ + pushq %rdi; \ + CFI_PUSH(%rdi); \ + pushq %rsi; \ + CFI_PUSH(%rsi); \ + movq %rcx, %rdi; \ + movq %rdx, %rsi; \ + movq %r8, %rdx; \ + movq %r9, %rcx; \ + +# define ENTER_SYSV_FUNC_PARAMS_5 \ + ENTER_SYSV_FUNC_PARAMS_0_4; \ + movq 0x38(%rsp), %r8; + +# define ENTER_SYSV_FUNC_PARAMS_6 \ + ENTER_SYSV_FUNC_PARAMS_5; \ + movq 0x40(%rsp), %r9; + +# define EXIT_SYSV_FUNC \ + popq %rsi; \ + CFI_POP(%rsi); \ + popq %rdi; \ + CFI_POP(%rdi); +#else +# define ENTER_SYSV_FUNC_PARAMS_0_4 +# define ENTER_SYSV_FUNC_PARAMS_5 +# define ENTER_SYSV_FUNC_PARAMS_6 +# define EXIT_SYSV_FUNC +#endif + +#endif /* GCRY_ASM_COMMON_AMD64_H */ diff --git a/comm/third_party/libgcrypt/cipher/asm-common-s390x.h b/comm/third_party/libgcrypt/cipher/asm-common-s390x.h new file mode 100644 index 0000000000..b3a996cd6e --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-common-s390x.h @@ -0,0 +1,90 @@ +/* asm-common-s390x.h - Common macros for zSeries assembly + * + * Copyright (C) 2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_COMMON_S390X_H +#define GCRY_ASM_COMMON_S390X_H + +#include + +#ifdef HAVE_GCC_ASM_ELF_DIRECTIVES +# define ELF(...) __VA_ARGS__ +#else +# define ELF(...) /*_*/ +#endif + +#ifdef HAVE_GCC_ASM_CFI_DIRECTIVES +/* CFI directives to emit DWARF stack unwinding information. */ +# define CFI_STARTPROC() .cfi_startproc +# define CFI_ENDPROC() .cfi_endproc +# define CFI_REMEMBER_STATE() .cfi_remember_state +# define CFI_RESTORE_STATE() .cfi_restore_state +# define CFI_ADJUST_CFA_OFFSET(off) .cfi_adjust_cfa_offset off +# define CFI_REL_OFFSET(reg,off) .cfi_rel_offset reg, off +# define CFI_DEF_CFA_REGISTER(reg) .cfi_def_cfa_register reg +# define CFI_REGISTER(ro,rn) .cfi_register ro, rn +# define CFI_RESTORE(reg) .cfi_restore reg + +/* CFA expressions are used for pointing CFA and registers to + * SP relative offsets. */ +# define DW_REGNO_SP 15 + +/* Fixed length encoding used for integers for now. */ +# define DW_SLEB128_7BIT(value) \ + 0x00|((value) & 0x7f) +# define DW_SLEB128_28BIT(value) \ + 0x80|((value)&0x7f), \ + 0x80|(((value)>>7)&0x7f), \ + 0x80|(((value)>>14)&0x7f), \ + 0x00|(((value)>>21)&0x7f) + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) \ + .cfi_escape \ + 0x0f, /* DW_CFA_def_cfa_expression */ \ + DW_SLEB128_7BIT(11), /* length */ \ + 0x7f, /* DW_OP_breg15, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs), \ + 0x06, /* DW_OP_deref */ \ + 0x23, /* DW_OP_plus_constu */ \ + DW_SLEB128_28BIT((cfa_depth)+160) + +# define CFI_REG_ON_STACK(regno,rsp_offs) \ + .cfi_escape \ + 0x10, /* DW_CFA_expression */ \ + DW_SLEB128_7BIT(regno), \ + DW_SLEB128_7BIT(5), /* length */ \ + 0x7f, /* DW_OP_breg15, rsp + constant */ \ + DW_SLEB128_28BIT(rsp_offs) + +#else +# define CFI_STARTPROC() +# define CFI_ENDPROC() +# define CFI_REMEMBER_STATE() +# define CFI_RESTORE_STATE() +# define CFI_ADJUST_CFA_OFFSET(off) +# define CFI_REL_OFFSET(reg,off) +# define CFI_DEF_CFA_REGISTER(reg) +# define CFI_REGISTER(ro,rn) +# define CFI_RESTORE(reg) + +# define CFI_CFA_ON_STACK(rsp_offs,cfa_depth) +# define CFI_REG_ON_STACK(reg,rsp_offs) +#endif + +#endif /* GCRY_ASM_COMMON_AMD64_H */ diff --git a/comm/third_party/libgcrypt/cipher/asm-inline-s390x.h b/comm/third_party/libgcrypt/cipher/asm-inline-s390x.h new file mode 100644 index 0000000000..bacb45fe2e --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-inline-s390x.h @@ -0,0 +1,157 @@ +/* asm-inline-s390x.h - Common macros for zSeries inline assembly + * + * Copyright (C) 2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_INLINE_S390X_H +#define GCRY_ASM_INLINE_S390X_H + +#include + +#define ALWAYS_INLINE inline __attribute__((always_inline)) + +typedef unsigned int u128_t __attribute__ ((mode (TI))); + +enum kmxx_functions_e +{ + KM_FUNCTION_AES_128 = 18, + KM_FUNCTION_AES_192 = 19, + KM_FUNCTION_AES_256 = 20, + KM_FUNCTION_XTS_AES_128 = 50, + KM_FUNCTION_XTS_AES_256 = 52, + + KMID_FUNCTION_SHA1 = 1, + KMID_FUNCTION_SHA256 = 2, + KMID_FUNCTION_SHA512 = 3, + KMID_FUNCTION_SHA3_224 = 32, + KMID_FUNCTION_SHA3_256 = 33, + KMID_FUNCTION_SHA3_384 = 34, + KMID_FUNCTION_SHA3_512 = 35, + KMID_FUNCTION_SHAKE128 = 36, + KMID_FUNCTION_SHAKE256 = 37, + KMID_FUNCTION_GHASH = 65, +}; + +enum kmxx_function_flags_e +{ + KM_ENCRYPT = 0 << 7, + KM_DECRYPT = 1 << 7, + + KMF_LCFB_16 = 16 << 24, + + KMA_LPC = 1 << 8, + KMA_LAAD = 1 << 9, + KMA_HS = 1 << 10, + + KLMD_PADDING_STATE = 1 << 8, +}; + +static ALWAYS_INLINE u128_t km_function_to_mask(enum kmxx_functions_e func) +{ + return (u128_t)1 << (127 - func); +} + +static inline u128_t kimd_query(void) +{ + static u128_t function_codes = 0; + static int initialized = 0; + register unsigned long reg0 asm("0") = 0; + register void *reg1 asm("1") = &function_codes; + u128_t r1; + + if (initialized) + return function_codes; + + asm volatile ("0: .insn rre,0xb93e << 16, 0, %[r1]\n\t" + " brc 1,0b\n\t" + : [r1] "=a" (r1) + : [reg0] "r" (reg0), [reg1] "r" (reg1) + : "cc", "memory"); + + initialized = 1; + return function_codes; +} + +static inline u128_t klmd_query(void) +{ + static u128_t function_codes = 0; + static int initialized = 0; + register unsigned long reg0 asm("0") = 0; + register void *reg1 asm("1") = &function_codes; + u128_t r1; + + if (initialized) + return function_codes; + + asm volatile ("0: .insn rre,0xb93f << 16, 0, %[r1]\n\t" + " brc 1,0b\n\t" + : [r1] "=a" (r1) + : [reg0] "r" (reg0), [reg1] "r" (reg1) + : "cc", "memory"); + + initialized = 1; + return function_codes; +} + +static ALWAYS_INLINE void +kimd_execute(unsigned int func, void *param_block, const void *src, + size_t src_len) +{ + register unsigned long reg0 asm("0") = func; + register byte *reg1 asm("1") = param_block; + u128_t r1 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len; + + asm volatile ("0: .insn rre,0xb93e << 16, 0, %[r1]\n\t" + " brc 1,0b\n\t" + : [r1] "+a" (r1) + : [func] "r" (reg0), [param_ptr] "r" (reg1) + : "cc", "memory"); +} + +static ALWAYS_INLINE void +klmd_execute(unsigned int func, void *param_block, const void *src, + size_t src_len) +{ + register unsigned long reg0 asm("0") = func; + register byte *reg1 asm("1") = param_block; + u128_t r1 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len; + + asm volatile ("0: .insn rre,0xb93f << 16, 0, %[r1]\n\t" + " brc 1,0b\n\t" + : [func] "+r" (reg0), [r1] "+a" (r1) + : [param_ptr] "r" (reg1) + : "cc", "memory"); +} + +static ALWAYS_INLINE void +klmd_shake_execute(unsigned int func, void *param_block, void *dst, + size_t dst_len, const void *src, size_t src_len) +{ + register unsigned long reg0 asm("0") = func; + register byte *reg1 asm("1") = param_block; + u128_t r1 = ((u128_t)(uintptr_t)dst << 64) | (u64)dst_len; + u128_t r2 = ((u128_t)(uintptr_t)src << 64) | (u64)src_len; + + asm volatile ("0: .insn rre,0xb93f << 16, %[r1], %[r2]\n\t" + " brc 1,0b\n\t" + : [func] "+r" (reg0), [r1] "+a" (r1), [r2] "+a" (r2) + : [param_ptr] "r" (reg1) + : "cc", "memory"); +} + +#endif /* GCRY_ASM_INLINE_S390X_H */ diff --git a/comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h b/comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h new file mode 100644 index 0000000000..9009270956 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-poly1305-aarch64.h @@ -0,0 +1,245 @@ +/* asm-common-aarch64.h - Poly1305 macros for ARMv8/AArch64 assembly + * + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_POLY1305_AARCH64_H +#define GCRY_ASM_POLY1305_AARCH64_H + +#include "asm-common-aarch64.h" + +#ifdef __AARCH64EL__ + #define le_to_host(reg) /*_*/ +#else + #define le_to_host(reg) rev reg, reg; +#endif + +/********************************************************************** + poly1305 for stitched chacha20-poly1305 Aarch64 implementations + **********************************************************************/ + +#define POLY_RSTATE x8 +#define POLY_RSRC x9 + +#define POLY_R_H0 x10 +#define POLY_R_H1 x11 +#define POLY_R_H2 x12 +#define POLY_R_H2d w12 +#define POLY_R_R0 x13 +#define POLY_R_R1 x14 +#define POLY_R_R1_MUL5 x15 +#define POLY_R_X0_HI x16 +#define POLY_R_X0_LO x17 +#define POLY_R_X1_HI x19 +#define POLY_R_X1_LO x20 +#define POLY_R_ONE x21 +#define POLY_R_ONEd w21 + +#define POLY_TMP0 x22 +#define POLY_TMP1 x23 +#define POLY_TMP2 x24 +#define POLY_TMP3 x25 + +#define POLY_CHACHA_ROUND x26 + +#define POLY_S_R0 (4 * 4 + 0 * 8) +#define POLY_S_R1 (4 * 4 + 1 * 8) +#define POLY_S_H0 (4 * 4 + 2 * 8 + 0 * 8) +#define POLY_S_H1 (4 * 4 + 2 * 8 + 1 * 8) +#define POLY_S_H2d (4 * 4 + 2 * 8 + 2 * 8) + +#define POLY1305_PUSH_REGS() \ + stp x19, x20, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + CFI_REG_ON_STACK(19, 0); \ + CFI_REG_ON_STACK(20, 8); \ + stp x21, x22, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + CFI_REG_ON_STACK(21, 0); \ + CFI_REG_ON_STACK(22, 8); \ + stp x23, x24, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + CFI_REG_ON_STACK(23, 0); \ + CFI_REG_ON_STACK(24, 8); \ + stp x25, x26, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + CFI_REG_ON_STACK(25, 0); \ + CFI_REG_ON_STACK(26, 8); + +#define POLY1305_POP_REGS() \ + ldp x25, x26, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + CFI_RESTORE(x25); \ + CFI_RESTORE(x26); \ + ldp x23, x24, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + CFI_RESTORE(x23); \ + CFI_RESTORE(x24); \ + ldp x21, x22, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + CFI_RESTORE(x21); \ + CFI_RESTORE(x22); \ + ldp x19, x20, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + CFI_RESTORE(x19); \ + CFI_RESTORE(x20); + +#define POLY1305_LOAD_STATE() \ + ldr POLY_R_R1, [POLY_RSTATE, #(POLY_S_R1)]; \ + ldr POLY_R_H0, [POLY_RSTATE, #(POLY_S_H0)]; \ + ldr POLY_R_H1, [POLY_RSTATE, #(POLY_S_H1)]; \ + ldr POLY_R_H2d, [POLY_RSTATE, #(POLY_S_H2d)]; \ + ldr POLY_R_R0, [POLY_RSTATE, #(POLY_S_R0)]; \ + add POLY_R_R1_MUL5, POLY_R_R1, POLY_R_R1, lsr #2; \ + mov POLY_R_ONE, #1; + +#define POLY1305_STORE_STATE() \ + str POLY_R_H0, [POLY_RSTATE, #(POLY_S_H0)]; \ + str POLY_R_H1, [POLY_RSTATE, #(POLY_S_H1)]; \ + str POLY_R_H2d, [POLY_RSTATE, #(POLY_S_H2d)]; + +#define POLY1305_BLOCK_PART1(src_offset) \ + /* a = h + m */ \ + ldr POLY_TMP0, [POLY_RSRC, #((src_offset) + 0 * 8)]; +#define POLY1305_BLOCK_PART2(src_offset) \ + ldr POLY_TMP1, [POLY_RSRC, #((src_offset) + 1 * 8)]; +#define POLY1305_BLOCK_PART3() \ + le_to_host(POLY_TMP0); +#define POLY1305_BLOCK_PART4() \ + le_to_host(POLY_TMP1); +#define POLY1305_BLOCK_PART5() \ + adds POLY_R_H0, POLY_R_H0, POLY_TMP0; +#define POLY1305_BLOCK_PART6() \ + adcs POLY_R_H1, POLY_R_H1, POLY_TMP1; +#define POLY1305_BLOCK_PART7() \ + adc POLY_R_H2d, POLY_R_H2d, POLY_R_ONEd; + +#define POLY1305_BLOCK_PART8() \ + /* h = a * r (partial mod 2^130-5): */ \ + mul POLY_R_X1_LO, POLY_R_H0, POLY_R_R1; /* lo: h0 * r1 */ +#define POLY1305_BLOCK_PART9() \ + mul POLY_TMP0, POLY_R_H1, POLY_R_R0; /* lo: h1 * r0 */ +#define POLY1305_BLOCK_PART10() \ + mul POLY_R_X0_LO, POLY_R_H0, POLY_R_R0; /* lo: h0 * r0 */ +#define POLY1305_BLOCK_PART11() \ + umulh POLY_R_X1_HI, POLY_R_H0, POLY_R_R1; /* hi: h0 * r1 */ +#define POLY1305_BLOCK_PART12() \ + adds POLY_R_X1_LO, POLY_R_X1_LO, POLY_TMP0; +#define POLY1305_BLOCK_PART13() \ + umulh POLY_TMP1, POLY_R_H1, POLY_R_R0; /* hi: h1 * r0 */ +#define POLY1305_BLOCK_PART14() \ + mul POLY_TMP2, POLY_R_H1, POLY_R_R1_MUL5; /* lo: h1 * r1 mod 2^130-5 */ +#define POLY1305_BLOCK_PART15() \ + umulh POLY_R_X0_HI, POLY_R_H0, POLY_R_R0; /* hi: h0 * r0 */ +#define POLY1305_BLOCK_PART16() \ + adc POLY_R_X1_HI, POLY_R_X1_HI, POLY_TMP1; +#define POLY1305_BLOCK_PART17() \ + umulh POLY_TMP3, POLY_R_H1, POLY_R_R1_MUL5; /* hi: h1 * r1 mod 2^130-5 */ +#define POLY1305_BLOCK_PART18() \ + adds POLY_R_X0_LO, POLY_R_X0_LO, POLY_TMP2; +#define POLY1305_BLOCK_PART19() \ + mul POLY_R_H1, POLY_R_H2, POLY_R_R1_MUL5; /* h2 * r1 mod 2^130-5 */ +#define POLY1305_BLOCK_PART20() \ + adc POLY_R_X0_HI, POLY_R_X0_HI, POLY_TMP3; +#define POLY1305_BLOCK_PART21() \ + mul POLY_R_H2, POLY_R_H2, POLY_R_R0; /* h2 * r0 */ +#define POLY1305_BLOCK_PART22() \ + adds POLY_R_H1, POLY_R_H1, POLY_R_X1_LO; +#define POLY1305_BLOCK_PART23() \ + adc POLY_R_H0, POLY_R_H2, POLY_R_X1_HI; + +#define POLY1305_BLOCK_PART24() \ + /* carry propagation */ \ + and POLY_R_H2, POLY_R_H0, #3; +#define POLY1305_BLOCK_PART25() \ + lsr POLY_R_H0, POLY_R_H0, #2; +#define POLY1305_BLOCK_PART26() \ + add POLY_R_H0, POLY_R_H0, POLY_R_H0, lsl #2; +#define POLY1305_BLOCK_PART27() \ + adds POLY_R_H0, POLY_R_H0, POLY_R_X0_LO; +#define POLY1305_BLOCK_PART28() \ + adcs POLY_R_H1, POLY_R_H1, POLY_R_X0_HI; +#define POLY1305_BLOCK_PART29() \ + adc POLY_R_H2d, POLY_R_H2d, wzr; + +//#define TESTING_POLY1305_ASM +#ifdef TESTING_POLY1305_ASM +/* for testing only. */ +.align 3 +.globl _gcry_poly1305_aarch64_blocks1 +ELF(.type _gcry_poly1305_aarch64_blocks1,%function;) +_gcry_poly1305_aarch64_blocks1: + /* input: + * x0: poly1305-state + * x1: src + * x2: nblks + */ + CFI_STARTPROC() + POLY1305_PUSH_REGS(); + + mov POLY_RSTATE, x0; + mov POLY_RSRC, x1; + + POLY1305_LOAD_STATE(); + +.L_gcry_poly1305_aarch64_loop1: + POLY1305_BLOCK_PART1(0 * 16); + POLY1305_BLOCK_PART2(0 * 16); + add POLY_RSRC, POLY_RSRC, #16; + POLY1305_BLOCK_PART3(); + POLY1305_BLOCK_PART4(); + POLY1305_BLOCK_PART5(); + POLY1305_BLOCK_PART6(); + POLY1305_BLOCK_PART7(); + POLY1305_BLOCK_PART8(); + POLY1305_BLOCK_PART9(); + POLY1305_BLOCK_PART10(); + POLY1305_BLOCK_PART11(); + POLY1305_BLOCK_PART12(); + POLY1305_BLOCK_PART13(); + POLY1305_BLOCK_PART14(); + POLY1305_BLOCK_PART15(); + POLY1305_BLOCK_PART16(); + POLY1305_BLOCK_PART17(); + POLY1305_BLOCK_PART18(); + POLY1305_BLOCK_PART19(); + POLY1305_BLOCK_PART20(); + POLY1305_BLOCK_PART21(); + POLY1305_BLOCK_PART22(); + POLY1305_BLOCK_PART23(); + POLY1305_BLOCK_PART24(); + POLY1305_BLOCK_PART25(); + POLY1305_BLOCK_PART26(); + POLY1305_BLOCK_PART27(); + POLY1305_BLOCK_PART28(); + POLY1305_BLOCK_PART29(); + + subs x2, x2, #1; + b.ne .L_gcry_poly1305_aarch64_loop1; + + POLY1305_STORE_STATE(); + + mov x0, #0; + + POLY1305_POP_REGS(); + ret; + CFI_ENDPROC() +ELF(.size _gcry_poly1305_aarch64_blocks1, .-_gcry_poly1305_aarch64_blocks1;) +#endif + +#endif /* GCRY_ASM_POLY1305_AARCH64_H */ diff --git a/comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h b/comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h new file mode 100644 index 0000000000..3f99ea3e16 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-poly1305-amd64.h @@ -0,0 +1,171 @@ +/* asm-common-amd64.h - Poly1305 macros for AMD64 assembly + * + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_POLY1305_AMD64_H +#define GCRY_ASM_POLY1305_AMD64_H + +#include "asm-common-amd64.h" + +/********************************************************************** + poly1305 for stitched chacha20-poly1305 AMD64 implementations + **********************************************************************/ + +#define POLY_RSTATE %r8 +#define POLY_RSRC %r9 + +#define POLY_R_H0 %rbx +#define POLY_R_H1 %rcx +#define POLY_R_H2 %r10 +#define POLY_R_H2d %r10d +#define POLY_R_R0 %r11 +#define POLY_R_R1_MUL5 %r12 +#define POLY_R_X0_HI %r13 +#define POLY_R_X0_LO %r14 +#define POLY_R_X1_HI %r15 +#define POLY_R_X1_LO %rsi + +#define POLY_S_R0 (4 * 4 + 0 * 8)(POLY_RSTATE) +#define POLY_S_R1 (4 * 4 + 1 * 8)(POLY_RSTATE) +#define POLY_S_H0 (4 * 4 + 2 * 8 + 0 * 8)(POLY_RSTATE) +#define POLY_S_H1 (4 * 4 + 2 * 8 + 1 * 8)(POLY_RSTATE) +#define POLY_S_H2d (4 * 4 + 2 * 8 + 2 * 8)(POLY_RSTATE) + +#define POLY1305_LOAD_STATE() \ + movq POLY_S_H0, POLY_R_H0; \ + movq POLY_S_H1, POLY_R_H1; \ + movl POLY_S_H2d, POLY_R_H2d; \ + movq POLY_S_R0, POLY_R_R0; \ + movq POLY_S_R1, POLY_R_R1_MUL5; \ + shrq $2, POLY_R_R1_MUL5; \ + addq POLY_S_R1, POLY_R_R1_MUL5; + +#define POLY1305_STORE_STATE() \ + movq POLY_R_H0, POLY_S_H0; \ + movq POLY_R_H1, POLY_S_H1; \ + movl POLY_R_H2d, POLY_S_H2d; + +/* a = h + m */ +#define POLY1305_BLOCK_PART1(src_offset) \ + addq ((src_offset) + 0 * 8)(POLY_RSRC), POLY_R_H0; \ + adcq ((src_offset) + 1 * 8)(POLY_RSRC), POLY_R_H1; \ + adcl $1, POLY_R_H2d; \ + \ + /* h = a * r (partial mod 2^130-5): */ \ + \ + /* h0 * r1 */ \ + movq POLY_R_H0, %rax; \ + mulq POLY_S_R1; \ + movq %rax, POLY_R_X1_LO; \ + movq %rdx, POLY_R_X1_HI; + +#define POLY1305_BLOCK_PART2() \ + \ + /* h0 * r0 */ \ + movq POLY_R_H0, %rax; \ + mulq POLY_R_R0; \ + movq %rax, POLY_R_X0_LO; \ + movq %rdx, POLY_R_X0_HI; + +#define POLY1305_BLOCK_PART3() \ + \ + /* h1 * r0 */ \ + movq POLY_R_H1, %rax; \ + mulq POLY_R_R0; \ + addq %rax, POLY_R_X1_LO; \ + adcq %rdx, POLY_R_X1_HI; \ + \ + /* h1 * r1 mod 2^130-5 */ \ + movq POLY_R_R1_MUL5, %rax; \ + mulq POLY_R_H1; + +#define POLY1305_BLOCK_PART4() \ + movq POLY_R_H2, POLY_R_H1; \ + imulq POLY_R_R1_MUL5, POLY_R_H1; /* h2 * r1 mod 2^130-5 */ \ + addq %rax, POLY_R_X0_LO; \ + adcq %rdx, POLY_R_X0_HI; \ + imulq POLY_R_R0, POLY_R_H2; /* h2 * r0 */ \ + addq POLY_R_X1_LO, POLY_R_H1; \ + adcq POLY_R_X1_HI, POLY_R_H2; + +#define POLY1305_BLOCK_PART5() \ + \ + /* carry propagation */ \ + movq POLY_R_H2, POLY_R_H0; \ + andl $3, POLY_R_H2d; \ + shrq $2, POLY_R_H0; \ + leaq (POLY_R_H0, POLY_R_H0, 4), POLY_R_H0; \ + addq POLY_R_X0_LO, POLY_R_H0; \ + adcq POLY_R_X0_HI, POLY_R_H1; \ + adcl $0, POLY_R_H2d; + +#ifdef TESTING_POLY1305_ASM +/* for testing only, mixed C/asm poly1305.c is marginally faster (~2%). */ +.align 8 +.globl _gcry_poly1305_amd64_ssse3_blocks1 +ELF(.type _gcry_poly1305_amd64_ssse3_blocks1,@function;) + +_gcry_poly1305_amd64_ssse3_blocks1: + /* input: + * %rdi: poly1305-state + * %rsi: src + * %rdx: nblks + */ + pushq %rbp; + movq %rsp, %rbp; + + subq $(10 * 8), %rsp; + movq %rbx, (1 * 8)(%rsp); + movq %r12, (2 * 8)(%rsp); + movq %r13, (3 * 8)(%rsp); + movq %r14, (4 * 8)(%rsp); + movq %r15, (5 * 8)(%rsp); + + movq %rdx, (8 * 8)(%rsp); # NBLKS + + movq %rdi, POLY_RSTATE; + movq %rsi, POLY_RSRC; + + POLY1305_LOAD_STATE(); + +.L_poly1: + POLY1305_BLOCK_PART1(0 * 16); + POLY1305_BLOCK_PART2(); + POLY1305_BLOCK_PART3(); + POLY1305_BLOCK_PART4(); + POLY1305_BLOCK_PART5(); + + subq $1, (8 * 8)(%rsp); # NBLKS + leaq (16)(POLY_RSRC), POLY_RSRC; + jnz .L_poly1; + + POLY1305_STORE_STATE(); + + movq (1 * 8)(%rsp), %rbx; + movq (2 * 8)(%rsp), %r12; + movq (3 * 8)(%rsp), %r13; + movq (4 * 8)(%rsp), %r14; + movq (5 * 8)(%rsp), %r15; + + xorl %eax, %eax; + leave + ret; +#endif + +#endif /* GCRY_ASM_POLY1305_AMD64_H */ diff --git a/comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h b/comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h new file mode 100644 index 0000000000..113ab94913 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/asm-poly1305-s390x.h @@ -0,0 +1,140 @@ +/* asm-common-amd64.h - Poly1305 macros for zSeries assembly + * + * Copyright (C) 2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ASM_POLY1305_S390X_H +#define GCRY_ASM_POLY1305_S390X_H + +#include "asm-common-s390x.h" + +/********************************************************************** + poly1305 for stitched chacha20-poly1305 + **********************************************************************/ + +#define POLY_RSTATE %r1 +#define POLY_RSRC %r14 + +#define POLY_R_H0_TMP_HI %r6 // even- +#define POLY_R_H0 %r7 // odd pair +#define POLY_R_H1_TMP_HI %r8 // even- +#define POLY_R_H1 %r9 // odd pair +#define POLY_R_H2 %r10 +#define POLY_R_R0 %r11 +#define POLY_R_R1 %r12 +#define POLY_R_R1_MUL5 %r13 +#define POLY_R_X0_HI %r2 // even- +#define POLY_R_X0_LO %r3 // odd pair +#define POLY_R_X1_HI %r4 // even- +#define POLY_R_X1_LO %r5 // odd pair + +#define POLY_S_R0 (4 * 4 + 0 * 8)(POLY_RSTATE) +#define POLY_S_R1 (4 * 4 + 1 * 8)(POLY_RSTATE) +#define POLY_S_H0 (4 * 4 + 2 * 8 + 0 * 8)(POLY_RSTATE) +#define POLY_S_H1 (4 * 4 + 2 * 8 + 1 * 8)(POLY_RSTATE) +#define POLY_S_H2d (4 * 4 + 2 * 8 + 2 * 8)(POLY_RSTATE) + +#define INC_POLY1305_SRC(a) \ + aghi POLY_RSRC, (a); + +#define POLY1305_LOAD_STATE() \ + lg POLY_R_H0, POLY_S_H0; \ + lg POLY_R_H1, POLY_S_H1; \ + llgf POLY_R_H2, POLY_S_H2d; \ + rllg POLY_R_H0, POLY_R_H0, 32; \ + rllg POLY_R_H1, POLY_R_H1, 32; \ + lg POLY_R_R0, POLY_S_R0; \ + lg POLY_R_R1, POLY_S_R1; \ + rllg POLY_R_R0, POLY_R_R0, 32; \ + rllg POLY_R_R1, POLY_R_R1, 32; \ + srlg POLY_R_R1_MUL5, POLY_R_R1, 2; \ + algr POLY_R_R1_MUL5, POLY_R_R1; + +#define POLY1305_STORE_STATE() \ + rllg POLY_R_H0, POLY_R_H0, 32; \ + rllg POLY_R_H1, POLY_R_H1, 32; \ + stg POLY_R_H0, POLY_S_H0; \ + stg POLY_R_H1, POLY_S_H1; \ + st POLY_R_H2, POLY_S_H2d; + +/* a = h + m */ +#define POLY1305_BLOCK_PART1_HB(src_offset, high_pad) \ + lrvg POLY_R_X0_HI, ((src_offset) + 1 * 8)(POLY_RSRC); \ + lrvg POLY_R_X0_LO, ((src_offset) + 0 * 8)(POLY_RSRC); \ + lghi POLY_R_H1_TMP_HI, (high_pad); + +#define POLY1305_BLOCK_PART1(src_offset) \ + POLY1305_BLOCK_PART1_HB(src_offset, 1); + +#define POLY1305_BLOCK_PART2() \ + algr POLY_R_H0, POLY_R_X0_LO; \ + alcgr POLY_R_H1, POLY_R_X0_HI; \ + alcgr POLY_R_H2, POLY_R_H1_TMP_HI; \ + lgr POLY_R_X1_LO, POLY_R_H0; \ + lgr POLY_R_X0_LO, POLY_R_H0; + +#define POLY1305_BLOCK_PART3() \ + /* h = a * r (partial mod 2^130-5): */ \ + \ + /* h0 * r1 */ \ + mlgr POLY_R_X1_HI, POLY_R_R1; \ + \ + /* h1 * r0 */ \ + lgr POLY_R_H0, POLY_R_H1; \ + mlgr POLY_R_H0_TMP_HI, POLY_R_R0; \ + \ + /* h1 * r1 mod 2^130-5 */ \ + mlgr POLY_R_H1_TMP_HI, POLY_R_R1_MUL5; + +#define POLY1305_BLOCK_PART4() \ + \ + /* h0 * r0 */ \ + mlgr POLY_R_X0_HI, POLY_R_R0; \ + \ + algr POLY_R_X1_LO, POLY_R_H0; \ + alcgr POLY_R_X1_HI, POLY_R_H0_TMP_HI; \ + \ + lgr POLY_R_H0_TMP_HI, POLY_R_H2; \ + msgr POLY_R_H0_TMP_HI, POLY_R_R1_MUL5; /* h2 * r1 mod 2^130-5 */ \ + msgr POLY_R_H2, POLY_R_R0; /* h2 * r0 */ + +#define POLY1305_BLOCK_PART5() \ + \ + algr POLY_R_X0_LO, POLY_R_H1; \ + alcgr POLY_R_X0_HI, POLY_R_H1_TMP_HI; + +#define POLY1305_BLOCK_PART6() \ + \ + algrk POLY_R_H1, POLY_R_H0_TMP_HI, POLY_R_X1_LO; \ + alcgr POLY_R_H2, POLY_R_X1_HI; + +#define POLY1305_BLOCK_PART7() \ + \ + /* carry propagation */ \ + srlg POLY_R_H0, POLY_R_H2, 2; \ + risbgn POLY_R_X1_LO, POLY_R_H2, 0, 0x80 | 61, 0; \ + lghi POLY_R_H1_TMP_HI, 0; \ + agr POLY_R_H0, POLY_R_X1_LO; \ + risbgn POLY_R_H2, POLY_R_H2, 62, 0x80 | 63, 0; + +#define POLY1305_BLOCK_PART8() \ + algr POLY_R_H0, POLY_R_X0_LO; \ + alcgr POLY_R_H1, POLY_R_X0_HI; \ + alcgr POLY_R_H2, POLY_R_H1_TMP_HI; + +#endif /* GCRY_ASM_POLY1305_AMD64_H */ diff --git a/comm/third_party/libgcrypt/cipher/bithelp.h b/comm/third_party/libgcrypt/cipher/bithelp.h new file mode 100644 index 0000000000..7793ce7ca3 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/bithelp.h @@ -0,0 +1,123 @@ +/* bithelp.h - Some bit manipulation helpers + * Copyright (C) 1999, 2002 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ +#ifndef GCRYPT_BITHELP_H +#define GCRYPT_BITHELP_H + +#include "types.h" + + +/**************** + * Rotate the 32 bit unsigned integer X by N bits left/right + */ +static inline u32 rol(u32 x, int n) +{ + return ( (x << (n&(32-1))) | (x >> ((32-n)&(32-1))) ); +} + +static inline u32 ror(u32 x, int n) +{ + return ( (x >> (n&(32-1))) | (x << ((32-n)&(32-1))) ); +} + +static inline u64 rol64(u64 x, int n) +{ + return ( (x << (n&(64-1))) | (x >> ((64-n)&(64-1))) ); +} + +/* Byte swap for 32-bit and 64-bit integers. If available, use compiler + provided helpers. */ +#ifdef HAVE_BUILTIN_BSWAP32 +# define _gcry_bswap32 __builtin_bswap32 +#else +static inline u32 +_gcry_bswap32(u32 x) +{ + return ((rol(x, 8) & 0x00ff00ffL) | (ror(x, 8) & 0xff00ff00L)); +} +#endif + +#ifdef HAVE_BUILTIN_BSWAP64 +# define _gcry_bswap64 __builtin_bswap64 +#else +static inline u64 +_gcry_bswap64(u64 x) +{ + return ((u64)_gcry_bswap32(x) << 32) | (_gcry_bswap32(x >> 32)); +} +#endif + +/* Endian dependent byte swap operations. */ +#ifdef WORDS_BIGENDIAN +# define le_bswap32(x) _gcry_bswap32(x) +# define be_bswap32(x) ((u32)(x)) +# define le_bswap64(x) _gcry_bswap64(x) +# define be_bswap64(x) ((u64)(x)) +#else +# define le_bswap32(x) ((u32)(x)) +# define be_bswap32(x) _gcry_bswap32(x) +# define le_bswap64(x) ((u64)(x)) +# define be_bswap64(x) _gcry_bswap64(x) +#endif + + +/* Count trailing zero bits in an unsigend int. We return an int + because that is what gcc's builtin does. Returns the number of + bits in X if X is 0. */ +static inline int +_gcry_ctz (unsigned int x) +{ +#if defined (HAVE_BUILTIN_CTZ) + return x ? __builtin_ctz (x) : 8 * sizeof (x); +#else + /* See + * http://graphics.stanford.edu/~seander/bithacks.html#ZerosOnRightModLookup + */ + static const unsigned char mod37[] = + { + sizeof (unsigned int)*8, + 0, 1, 26, 2, 23, 27, 0, 3, 16, 24, 30, 28, 11, 0, 13, + 4, 7, 17, 0, 25, 22, 31, 15, 29, 10, 12, 6, 0, 21, 14, 9, + 5, 20, 8, 19, 18 + }; + return (int)mod37[(-x & x) % 37]; +#endif +} + + +/* Count trailing zero bits in an u64. We return an int because that + is what gcc's builtin does. Returns the number of bits in X if X + is 0. */ +static inline int +_gcry_ctz64(u64 x) +{ +#if defined (HAVE_BUILTIN_CTZL) && SIZEOF_UNSIGNED_LONG >= 8 + return x ? __builtin_ctzl (x) : 8 * sizeof (x); +#elif defined (HAVE_BUILTIN_CTZ) && SIZEOF_UNSIGNED_INT >= 8 +#warning hello + return x ? __builtin_ctz (x) : 8 * sizeof (x); +#else + if ((x & 0xffffffff)) + return _gcry_ctz (x); + else + return 32 + _gcry_ctz (x >> 32); +#endif +} + + +#endif /*GCRYPT_BITHELP_H*/ diff --git a/comm/third_party/libgcrypt/cipher/blake2.c b/comm/third_party/libgcrypt/cipher/blake2.c new file mode 100644 index 0000000000..f2bf49e522 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blake2.c @@ -0,0 +1,996 @@ +/* blake2.c - BLAKE2b and BLAKE2s hash functions (RFC 7693) + * Copyright (C) 2017 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* The code is based on public-domain/CC0 BLAKE2 reference implementation + * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/ref + * Copyright 2012, Samuel Neves + */ + +#include +#include +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + +/* USE_AVX indicates whether to compile with Intel AVX code. */ +#undef USE_AVX +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX 1 +#endif + +/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX2 1 +#endif + +/* AMD64 assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_AVX2) && defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +#else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +#endif + +#define BLAKE2B_BLOCKBYTES 128 +#define BLAKE2B_OUTBYTES 64 +#define BLAKE2B_KEYBYTES 64 + +#define BLAKE2S_BLOCKBYTES 64 +#define BLAKE2S_OUTBYTES 32 +#define BLAKE2S_KEYBYTES 32 + +typedef struct +{ + u64 h[8]; + u64 t[2]; + u64 f[2]; +} BLAKE2B_STATE; + +struct blake2b_param_s +{ + byte digest_length; + byte key_length; + byte fanout; + byte depth; + byte leaf_length[4]; + byte node_offset[4]; + byte xof_length[4]; + byte node_depth; + byte inner_length; + byte reserved[14]; + byte salt[16]; + byte personal[16]; +}; + +typedef struct BLAKE2B_CONTEXT_S +{ + BLAKE2B_STATE state; + byte buf[BLAKE2B_BLOCKBYTES]; + size_t buflen; + size_t outlen; +#ifdef USE_AVX2 + unsigned int use_avx2:1; +#endif +} BLAKE2B_CONTEXT; + +typedef struct +{ + u32 h[8]; + u32 t[2]; + u32 f[2]; +} BLAKE2S_STATE; + +struct blake2s_param_s +{ + byte digest_length; + byte key_length; + byte fanout; + byte depth; + byte leaf_length[4]; + byte node_offset[4]; + byte xof_length[2]; + byte node_depth; + byte inner_length; + /* byte reserved[0]; */ + byte salt[8]; + byte personal[8]; +}; + +typedef struct BLAKE2S_CONTEXT_S +{ + BLAKE2S_STATE state; + byte buf[BLAKE2S_BLOCKBYTES]; + size_t buflen; + size_t outlen; +#ifdef USE_AVX + unsigned int use_avx:1; +#endif +} BLAKE2S_CONTEXT; + +typedef unsigned int (*blake2_transform_t)(void *S, const void *inblk, + size_t nblks); + + +static const u64 blake2b_IV[8] = +{ + U64_C(0x6a09e667f3bcc908), U64_C(0xbb67ae8584caa73b), + U64_C(0x3c6ef372fe94f82b), U64_C(0xa54ff53a5f1d36f1), + U64_C(0x510e527fade682d1), U64_C(0x9b05688c2b3e6c1f), + U64_C(0x1f83d9abfb41bd6b), U64_C(0x5be0cd19137e2179) +}; + +static const u32 blake2s_IV[8] = +{ + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL +}; + +static byte zero_block[BLAKE2B_BLOCKBYTES] = { 0, }; + + +static void blake2_write(void *S, const void *inbuf, size_t inlen, + byte *tmpbuf, size_t *tmpbuflen, size_t blkbytes, + blake2_transform_t transform_fn) +{ + const byte* in = inbuf; + unsigned int burn = 0; + + if (inlen > 0) + { + size_t left = *tmpbuflen; + size_t fill = blkbytes - left; + size_t nblks; + + if (inlen > fill) + { + if (fill > 0) + buf_cpy (tmpbuf + left, in, fill); /* Fill buffer */ + left = 0; + + burn = transform_fn (S, tmpbuf, 1); /* Increment counter + Compress */ + + in += fill; + inlen -= fill; + + nblks = inlen / blkbytes - !(inlen % blkbytes); + if (nblks) + { + burn = transform_fn(S, in, nblks); + in += blkbytes * nblks; + inlen -= blkbytes * nblks; + } + } + + gcry_assert (inlen > 0); + + buf_cpy (tmpbuf + left, in, inlen); + *tmpbuflen = left + inlen; + } + + if (burn) + _gcry_burn_stack (burn); + + return; +} + + +static inline void blake2b_set_lastblock(BLAKE2B_STATE *S) +{ + S->f[0] = U64_C(0xffffffffffffffff); +} + +static inline int blake2b_is_lastblock(const BLAKE2B_STATE *S) +{ + return S->f[0] != 0; +} + +static inline void blake2b_increment_counter(BLAKE2B_STATE *S, const int inc) +{ + S->t[0] += (u64)inc; + S->t[1] += (S->t[0] < (u64)inc) - (inc < 0); +} + +static inline u64 rotr64(u64 x, u64 n) +{ + return ((x >> (n & 63)) | (x << ((64 - n) & 63))); +} + +static unsigned int blake2b_transform_generic(BLAKE2B_STATE *S, + const void *inblks, + size_t nblks) +{ + static const byte blake2b_sigma[12][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 } + }; + const byte* in = inblks; + u64 m[16]; + u64 v[16]; + + while (nblks--) + { + /* Increment counter */ + blake2b_increment_counter (S, BLAKE2B_BLOCKBYTES); + + /* Compress */ + m[0] = buf_get_le64 (in + 0 * sizeof(m[0])); + m[1] = buf_get_le64 (in + 1 * sizeof(m[0])); + m[2] = buf_get_le64 (in + 2 * sizeof(m[0])); + m[3] = buf_get_le64 (in + 3 * sizeof(m[0])); + m[4] = buf_get_le64 (in + 4 * sizeof(m[0])); + m[5] = buf_get_le64 (in + 5 * sizeof(m[0])); + m[6] = buf_get_le64 (in + 6 * sizeof(m[0])); + m[7] = buf_get_le64 (in + 7 * sizeof(m[0])); + m[8] = buf_get_le64 (in + 8 * sizeof(m[0])); + m[9] = buf_get_le64 (in + 9 * sizeof(m[0])); + m[10] = buf_get_le64 (in + 10 * sizeof(m[0])); + m[11] = buf_get_le64 (in + 11 * sizeof(m[0])); + m[12] = buf_get_le64 (in + 12 * sizeof(m[0])); + m[13] = buf_get_le64 (in + 13 * sizeof(m[0])); + m[14] = buf_get_le64 (in + 14 * sizeof(m[0])); + m[15] = buf_get_le64 (in + 15 * sizeof(m[0])); + + v[ 0] = S->h[0]; + v[ 1] = S->h[1]; + v[ 2] = S->h[2]; + v[ 3] = S->h[3]; + v[ 4] = S->h[4]; + v[ 5] = S->h[5]; + v[ 6] = S->h[6]; + v[ 7] = S->h[7]; + v[ 8] = blake2b_IV[0]; + v[ 9] = blake2b_IV[1]; + v[10] = blake2b_IV[2]; + v[11] = blake2b_IV[3]; + v[12] = blake2b_IV[4] ^ S->t[0]; + v[13] = blake2b_IV[5] ^ S->t[1]; + v[14] = blake2b_IV[6] ^ S->f[0]; + v[15] = blake2b_IV[7] ^ S->f[1]; + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2b_sigma[r][2*i+0]]; \ + d = rotr64(d ^ a, 32); \ + c = c + d; \ + b = rotr64(b ^ c, 24); \ + a = a + b + m[blake2b_sigma[r][2*i+1]]; \ + d = rotr64(d ^ a, 16); \ + c = c + d; \ + b = rotr64(b ^ c, 63); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + ROUND(10); + ROUND(11); + +#undef G +#undef ROUND + + S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8]; + S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8]; + S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8]; + S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8]; + S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8]; + S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8]; + S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8]; + S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8]; + + in += BLAKE2B_BLOCKBYTES; + } + + return sizeof(void *) * 4 + sizeof(u64) * 16 * 2; +} + +#ifdef USE_AVX2 +unsigned int _gcry_blake2b_transform_amd64_avx2(BLAKE2B_STATE *S, + const void *inblks, + size_t nblks) ASM_FUNC_ABI; +#endif + +static unsigned int blake2b_transform(void *ctx, const void *inblks, + size_t nblks) +{ + BLAKE2B_CONTEXT *c = ctx; + unsigned int nburn; + + if (0) + {} +#ifdef USE_AVX2 + if (c->use_avx2) + nburn = _gcry_blake2b_transform_amd64_avx2(&c->state, inblks, nblks); +#endif + else + nburn = blake2b_transform_generic(&c->state, inblks, nblks); + + if (nburn) + nburn += ASM_EXTRA_STACK; + + return nburn; +} + +static void blake2b_final(void *ctx) +{ + BLAKE2B_CONTEXT *c = ctx; + BLAKE2B_STATE *S = &c->state; + unsigned int burn; + size_t i; + + gcry_assert (sizeof(c->buf) >= c->outlen); + if (blake2b_is_lastblock(S)) + return; + + if (c->buflen < BLAKE2B_BLOCKBYTES) + memset (c->buf + c->buflen, 0, BLAKE2B_BLOCKBYTES - c->buflen); /* Padding */ + blake2b_set_lastblock (S); + blake2b_increment_counter (S, (int)c->buflen - BLAKE2B_BLOCKBYTES); + burn = blake2b_transform (ctx, c->buf, 1); + + /* Output full hash to buffer */ + for (i = 0; i < 8; ++i) + buf_put_le64 (c->buf + sizeof(S->h[i]) * i, S->h[i]); + + /* Zero out extra buffer bytes. */ + if (c->outlen < sizeof(c->buf)) + memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen); + + if (burn) + _gcry_burn_stack (burn); +} + +static byte *blake2b_read(void *ctx) +{ + BLAKE2B_CONTEXT *c = ctx; + return c->buf; +} + +static void blake2b_write(void *ctx, const void *inbuf, size_t inlen) +{ + BLAKE2B_CONTEXT *c = ctx; + BLAKE2B_STATE *S = &c->state; + blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2B_BLOCKBYTES, + blake2b_transform); +} + +static inline void blake2b_init_param(BLAKE2B_STATE *S, + const struct blake2b_param_s *P) +{ + const byte *p = (const byte *)P; + size_t i; + + /* init xors IV with input parameter block */ + + /* IV XOR ParamBlock */ + for (i = 0; i < 8; ++i) + S->h[i] = blake2b_IV[i] ^ buf_get_le64(p + sizeof(S->h[i]) * i); +} + +static inline gcry_err_code_t blake2b_init(BLAKE2B_CONTEXT *ctx, + const byte *key, size_t keylen) +{ + struct blake2b_param_s P[1] = { { 0, } }; + BLAKE2B_STATE *S = &ctx->state; + + if (!ctx->outlen || ctx->outlen > BLAKE2B_OUTBYTES) + return GPG_ERR_INV_ARG; + if (sizeof(P[0]) != sizeof(u64) * 8) + return GPG_ERR_INTERNAL; + if (keylen && (!key || keylen > BLAKE2B_KEYBYTES)) + return GPG_ERR_INV_KEYLEN; + + P->digest_length = ctx->outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + + blake2b_init_param (S, P); + wipememory (P, sizeof(P)); + + if (key) + { + blake2b_write (ctx, key, keylen); + blake2b_write (ctx, zero_block, BLAKE2B_BLOCKBYTES - keylen); + } + + return 0; +} + +static gcry_err_code_t blake2b_init_ctx(void *ctx, unsigned int flags, + const byte *key, size_t keylen, + unsigned int dbits) +{ + BLAKE2B_CONTEXT *c = ctx; + unsigned int features = _gcry_get_hw_features (); + + (void)features; + (void)flags; + + memset (c, 0, sizeof (*c)); + +#ifdef USE_AVX2 + c->use_avx2 = !!(features & HWF_INTEL_AVX2); +#endif + + c->outlen = dbits / 8; + c->buflen = 0; + return blake2b_init(c, key, keylen); +} + +static inline void blake2s_set_lastblock(BLAKE2S_STATE *S) +{ + S->f[0] = 0xFFFFFFFFUL; +} + +static inline int blake2s_is_lastblock(BLAKE2S_STATE *S) +{ + return S->f[0] != 0; +} + +static inline void blake2s_increment_counter(BLAKE2S_STATE *S, const int inc) +{ + S->t[0] += (u32)inc; + S->t[1] += (S->t[0] < (u32)inc) - (inc < 0); +} + +static unsigned int blake2s_transform_generic(BLAKE2S_STATE *S, + const void *inblks, + size_t nblks) +{ + static const byte blake2s_sigma[10][16] = + { + { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, + { 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3 }, + { 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4 }, + { 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8 }, + { 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13 }, + { 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9 }, + { 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11 }, + { 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10 }, + { 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5 }, + { 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0 }, + }; + unsigned int burn = 0; + const byte* in = inblks; + u32 m[16]; + u32 v[16]; + + while (nblks--) + { + /* Increment counter */ + blake2s_increment_counter (S, BLAKE2S_BLOCKBYTES); + + /* Compress */ + m[0] = buf_get_le32 (in + 0 * sizeof(m[0])); + m[1] = buf_get_le32 (in + 1 * sizeof(m[0])); + m[2] = buf_get_le32 (in + 2 * sizeof(m[0])); + m[3] = buf_get_le32 (in + 3 * sizeof(m[0])); + m[4] = buf_get_le32 (in + 4 * sizeof(m[0])); + m[5] = buf_get_le32 (in + 5 * sizeof(m[0])); + m[6] = buf_get_le32 (in + 6 * sizeof(m[0])); + m[7] = buf_get_le32 (in + 7 * sizeof(m[0])); + m[8] = buf_get_le32 (in + 8 * sizeof(m[0])); + m[9] = buf_get_le32 (in + 9 * sizeof(m[0])); + m[10] = buf_get_le32 (in + 10 * sizeof(m[0])); + m[11] = buf_get_le32 (in + 11 * sizeof(m[0])); + m[12] = buf_get_le32 (in + 12 * sizeof(m[0])); + m[13] = buf_get_le32 (in + 13 * sizeof(m[0])); + m[14] = buf_get_le32 (in + 14 * sizeof(m[0])); + m[15] = buf_get_le32 (in + 15 * sizeof(m[0])); + + v[ 0] = S->h[0]; + v[ 1] = S->h[1]; + v[ 2] = S->h[2]; + v[ 3] = S->h[3]; + v[ 4] = S->h[4]; + v[ 5] = S->h[5]; + v[ 6] = S->h[6]; + v[ 7] = S->h[7]; + v[ 8] = blake2s_IV[0]; + v[ 9] = blake2s_IV[1]; + v[10] = blake2s_IV[2]; + v[11] = blake2s_IV[3]; + v[12] = S->t[0] ^ blake2s_IV[4]; + v[13] = S->t[1] ^ blake2s_IV[5]; + v[14] = S->f[0] ^ blake2s_IV[6]; + v[15] = S->f[1] ^ blake2s_IV[7]; + +#define G(r,i,a,b,c,d) \ + do { \ + a = a + b + m[blake2s_sigma[r][2*i+0]]; \ + d = ror(d ^ a, 16); \ + c = c + d; \ + b = ror(b ^ c, 12); \ + a = a + b + m[blake2s_sigma[r][2*i+1]]; \ + d = ror(d ^ a, 8); \ + c = c + d; \ + b = ror(b ^ c, 7); \ + } while(0) + +#define ROUND(r) \ + do { \ + G(r,0,v[ 0],v[ 4],v[ 8],v[12]); \ + G(r,1,v[ 1],v[ 5],v[ 9],v[13]); \ + G(r,2,v[ 2],v[ 6],v[10],v[14]); \ + G(r,3,v[ 3],v[ 7],v[11],v[15]); \ + G(r,4,v[ 0],v[ 5],v[10],v[15]); \ + G(r,5,v[ 1],v[ 6],v[11],v[12]); \ + G(r,6,v[ 2],v[ 7],v[ 8],v[13]); \ + G(r,7,v[ 3],v[ 4],v[ 9],v[14]); \ + } while(0) + + ROUND(0); + ROUND(1); + ROUND(2); + ROUND(3); + ROUND(4); + ROUND(5); + ROUND(6); + ROUND(7); + ROUND(8); + ROUND(9); + +#undef G +#undef ROUND + + S->h[0] = S->h[0] ^ v[0] ^ v[0 + 8]; + S->h[1] = S->h[1] ^ v[1] ^ v[1 + 8]; + S->h[2] = S->h[2] ^ v[2] ^ v[2 + 8]; + S->h[3] = S->h[3] ^ v[3] ^ v[3 + 8]; + S->h[4] = S->h[4] ^ v[4] ^ v[4 + 8]; + S->h[5] = S->h[5] ^ v[5] ^ v[5 + 8]; + S->h[6] = S->h[6] ^ v[6] ^ v[6 + 8]; + S->h[7] = S->h[7] ^ v[7] ^ v[7 + 8]; + + in += BLAKE2S_BLOCKBYTES; + } + + return burn; +} + +#ifdef USE_AVX +unsigned int _gcry_blake2s_transform_amd64_avx(BLAKE2S_STATE *S, + const void *inblks, + size_t nblks) ASM_FUNC_ABI; +#endif + +static unsigned int blake2s_transform(void *ctx, const void *inblks, + size_t nblks) +{ + BLAKE2S_CONTEXT *c = ctx; + unsigned int nburn; + + if (0) + {} +#ifdef USE_AVX + if (c->use_avx) + nburn = _gcry_blake2s_transform_amd64_avx(&c->state, inblks, nblks); +#endif + else + nburn = blake2s_transform_generic(&c->state, inblks, nblks); + + if (nburn) + nburn += ASM_EXTRA_STACK; + + return nburn; +} + +static void blake2s_final(void *ctx) +{ + BLAKE2S_CONTEXT *c = ctx; + BLAKE2S_STATE *S = &c->state; + unsigned int burn; + size_t i; + + gcry_assert (sizeof(c->buf) >= c->outlen); + if (blake2s_is_lastblock(S)) + return; + + if (c->buflen < BLAKE2S_BLOCKBYTES) + memset (c->buf + c->buflen, 0, BLAKE2S_BLOCKBYTES - c->buflen); /* Padding */ + blake2s_set_lastblock (S); + blake2s_increment_counter (S, (int)c->buflen - BLAKE2S_BLOCKBYTES); + burn = blake2s_transform (ctx, c->buf, 1); + + /* Output full hash to buffer */ + for (i = 0; i < 8; ++i) + buf_put_le32 (c->buf + sizeof(S->h[i]) * i, S->h[i]); + + /* Zero out extra buffer bytes. */ + if (c->outlen < sizeof(c->buf)) + memset (c->buf + c->outlen, 0, sizeof(c->buf) - c->outlen); + + if (burn) + _gcry_burn_stack (burn); +} + +static byte *blake2s_read(void *ctx) +{ + BLAKE2S_CONTEXT *c = ctx; + return c->buf; +} + +static void blake2s_write(void *ctx, const void *inbuf, size_t inlen) +{ + BLAKE2S_CONTEXT *c = ctx; + BLAKE2S_STATE *S = &c->state; + blake2_write(S, inbuf, inlen, c->buf, &c->buflen, BLAKE2S_BLOCKBYTES, + blake2s_transform); +} + +static inline void blake2s_init_param(BLAKE2S_STATE *S, + const struct blake2s_param_s *P) +{ + const byte *p = (const byte *)P; + size_t i; + + /* init2 xors IV with input parameter block */ + + /* IV XOR ParamBlock */ + for (i = 0; i < 8; ++i) + S->h[i] ^= blake2s_IV[i] ^ buf_get_le32(&p[i * 4]); +} + +static inline gcry_err_code_t blake2s_init(BLAKE2S_CONTEXT *ctx, + const byte *key, size_t keylen) +{ + struct blake2s_param_s P[1] = { { 0, } }; + BLAKE2S_STATE *S = &ctx->state; + + if (!ctx->outlen || ctx->outlen > BLAKE2S_OUTBYTES) + return GPG_ERR_INV_ARG; + if (sizeof(P[0]) != sizeof(u32) * 8) + return GPG_ERR_INTERNAL; + if (keylen && (!key || keylen > BLAKE2S_KEYBYTES)) + return GPG_ERR_INV_KEYLEN; + + P->digest_length = ctx->outlen; + P->key_length = keylen; + P->fanout = 1; + P->depth = 1; + + blake2s_init_param (S, P); + wipememory (P, sizeof(P)); + + if (key) + { + blake2s_write (ctx, key, keylen); + blake2s_write (ctx, zero_block, BLAKE2S_BLOCKBYTES - keylen); + } + + return 0; +} + +static gcry_err_code_t blake2s_init_ctx(void *ctx, unsigned int flags, + const byte *key, size_t keylen, + unsigned int dbits) +{ + BLAKE2S_CONTEXT *c = ctx; + unsigned int features = _gcry_get_hw_features (); + + (void)features; + (void)flags; + + memset (c, 0, sizeof (*c)); + +#ifdef USE_AVX + c->use_avx = !!(features & HWF_INTEL_AVX); +#endif + + c->outlen = dbits / 8; + c->buflen = 0; + return blake2s_init(c, key, keylen); +} + +/* Selftests from "RFC 7693, Appendix E. BLAKE2b and BLAKE2s Self-Test + * Module C Source". */ +static void selftest_seq(byte *out, size_t len, u32 seed) +{ + size_t i; + u32 t, a, b; + + a = 0xDEAD4BAD * seed; + b = 1; + + for (i = 0; i < len; i++) + { + t = a + b; + a = b; + b = t; + out[i] = (t >> 24) & 0xFF; + } +} + +static gpg_err_code_t +selftests_blake2b (int algo, int extended, selftest_report_func_t report) +{ + static const byte blake2b_res[32] = + { + 0xC2, 0x3A, 0x78, 0x00, 0xD9, 0x81, 0x23, 0xBD, + 0x10, 0xF5, 0x06, 0xC6, 0x1E, 0x29, 0xDA, 0x56, + 0x03, 0xD7, 0x63, 0xB8, 0xBB, 0xAD, 0x2E, 0x73, + 0x7F, 0x5E, 0x76, 0x5A, 0x7B, 0xCC, 0xD4, 0x75 + }; + static const size_t b2b_md_len[4] = { 20, 32, 48, 64 }; + static const size_t b2b_in_len[6] = { 0, 3, 128, 129, 255, 1024 }; + size_t i, j, outlen, inlen; + byte in[1024], key[64]; + BLAKE2B_CONTEXT ctx; + BLAKE2B_CONTEXT ctx2; + const char *what; + const char *errtxt; + + (void)extended; + + what = "rfc7693 BLAKE2b selftest"; + + /* 256-bit hash for testing */ + if (blake2b_init_ctx(&ctx, 0, NULL, 0, 32 * 8)) + { + errtxt = "init failed"; + goto failed; + } + + for (i = 0; i < 4; i++) + { + outlen = b2b_md_len[i]; + for (j = 0; j < 6; j++) + { + inlen = b2b_in_len[j]; + + selftest_seq(in, inlen, inlen); /* unkeyed hash */ + blake2b_init_ctx(&ctx2, 0, NULL, 0, outlen * 8); + blake2b_write(&ctx2, in, inlen); + blake2b_final(&ctx2); + blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + + selftest_seq(key, outlen, outlen); /* keyed hash */ + blake2b_init_ctx(&ctx2, 0, key, outlen, outlen * 8); + blake2b_write(&ctx2, in, inlen); + blake2b_final(&ctx2); + blake2b_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + } + } + + /* compute and compare the hash of hashes */ + blake2b_final(&ctx); + for (i = 0; i < 32; i++) + { + if (ctx.buf[i] != blake2b_res[i]) + { + errtxt = "digest mismatch"; + goto failed; + } + } + + return 0; + +failed: + if (report) + report ("digest", algo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +static gpg_err_code_t +selftests_blake2s (int algo, int extended, selftest_report_func_t report) +{ + static const byte blake2s_res[32] = + { + 0x6A, 0x41, 0x1F, 0x08, 0xCE, 0x25, 0xAD, 0xCD, + 0xFB, 0x02, 0xAB, 0xA6, 0x41, 0x45, 0x1C, 0xEC, + 0x53, 0xC5, 0x98, 0xB2, 0x4F, 0x4F, 0xC7, 0x87, + 0xFB, 0xDC, 0x88, 0x79, 0x7F, 0x4C, 0x1D, 0xFE + }; + static const size_t b2s_md_len[4] = { 16, 20, 28, 32 }; + static const size_t b2s_in_len[6] = { 0, 3, 64, 65, 255, 1024 }; + size_t i, j, outlen, inlen; + byte in[1024], key[32]; + BLAKE2S_CONTEXT ctx; + BLAKE2S_CONTEXT ctx2; + const char *what; + const char *errtxt; + + (void)extended; + + what = "rfc7693 BLAKE2s selftest"; + + /* 256-bit hash for testing */ + if (blake2s_init_ctx(&ctx, 0, NULL, 0, 32 * 8)) + { + errtxt = "init failed"; + goto failed; + } + + for (i = 0; i < 4; i++) + { + outlen = b2s_md_len[i]; + for (j = 0; j < 6; j++) + { + inlen = b2s_in_len[j]; + + selftest_seq(in, inlen, inlen); /* unkeyed hash */ + blake2s_init_ctx(&ctx2, 0, NULL, 0, outlen * 8); + blake2s_write(&ctx2, in, inlen); + blake2s_final(&ctx2); + blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + + selftest_seq(key, outlen, outlen); /* keyed hash */ + blake2s_init_ctx(&ctx2, 0, key, outlen, outlen * 8); + blake2s_write(&ctx2, in, inlen); + blake2s_final(&ctx2); + blake2s_write(&ctx, ctx2.buf, outlen); /* hash the hash */ + } + } + + /* compute and compare the hash of hashes */ + blake2s_final(&ctx); + for (i = 0; i < 32; i++) + { + if (ctx.buf[i] != blake2s_res[i]) + { + errtxt = "digest mismatch"; + goto failed; + } + } + + return 0; + +failed: + if (report) + report ("digest", algo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +gcry_err_code_t _gcry_blake2_init_with_key(void *ctx, unsigned int flags, + const unsigned char *key, + size_t keylen, int algo) +{ + gcry_err_code_t rc; + switch (algo) + { + case GCRY_MD_BLAKE2B_512: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 512); + break; + case GCRY_MD_BLAKE2B_384: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 384); + break; + case GCRY_MD_BLAKE2B_256: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 256); + break; + case GCRY_MD_BLAKE2B_160: + rc = blake2b_init_ctx (ctx, flags, key, keylen, 160); + break; + case GCRY_MD_BLAKE2S_256: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 256); + break; + case GCRY_MD_BLAKE2S_224: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 224); + break; + case GCRY_MD_BLAKE2S_160: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 160); + break; + case GCRY_MD_BLAKE2S_128: + rc = blake2s_init_ctx (ctx, flags, key, keylen, 128); + break; + default: + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + return rc; +} + + +#define DEFINE_BLAKE2_VARIANT(bs, BS, dbits, oid_branch) \ + static void blake2##bs##_##dbits##_init(void *ctx, unsigned int flags) \ + { \ + int err = blake2##bs##_init_ctx (ctx, flags, NULL, 0, dbits); \ + gcry_assert (err == 0); \ + } \ + static void \ + _gcry_blake2##bs##_##dbits##_hash_buffer(void *outbuf, \ + const void *buffer, size_t length) \ + { \ + BLAKE2##BS##_CONTEXT hd; \ + blake2##bs##_##dbits##_init (&hd, 0); \ + blake2##bs##_write (&hd, buffer, length); \ + blake2##bs##_final (&hd); \ + memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \ + } \ + static void \ + _gcry_blake2##bs##_##dbits##_hash_buffers(void *outbuf, \ + const gcry_buffer_t *iov, int iovcnt) \ + { \ + BLAKE2##BS##_CONTEXT hd; \ + blake2##bs##_##dbits##_init (&hd, 0); \ + for (;iovcnt > 0; iov++, iovcnt--) \ + blake2##bs##_write (&hd, (const char*)iov[0].data + iov[0].off, \ + iov[0].len); \ + blake2##bs##_final (&hd); \ + memcpy (outbuf, blake2##bs##_read (&hd), dbits / 8); \ + } \ + static byte blake2##bs##_##dbits##_asn[] = { 0x30 }; \ + static gcry_md_oid_spec_t oid_spec_blake2##bs##_##dbits[] = \ + { \ + { " 1.3.6.1.4.1.1722.12.2." oid_branch }, \ + { NULL } \ + }; \ + gcry_md_spec_t _gcry_digest_spec_blake2##bs##_##dbits = \ + { \ + GCRY_MD_BLAKE2##BS##_##dbits, {0, 0}, \ + "BLAKE2" #BS "_" #dbits, blake2##bs##_##dbits##_asn, \ + DIM (blake2##bs##_##dbits##_asn), oid_spec_blake2##bs##_##dbits, \ + dbits / 8, blake2##bs##_##dbits##_init, blake2##bs##_write, \ + blake2##bs##_final, blake2##bs##_read, NULL, \ + _gcry_blake2##bs##_##dbits##_hash_buffer, \ + _gcry_blake2##bs##_##dbits##_hash_buffers, \ + sizeof (BLAKE2##BS##_CONTEXT), selftests_blake2##bs \ + }; + +DEFINE_BLAKE2_VARIANT(b, B, 512, "1.16") +DEFINE_BLAKE2_VARIANT(b, B, 384, "1.12") +DEFINE_BLAKE2_VARIANT(b, B, 256, "1.8") +DEFINE_BLAKE2_VARIANT(b, B, 160, "1.5") + +DEFINE_BLAKE2_VARIANT(s, S, 256, "2.8") +DEFINE_BLAKE2_VARIANT(s, S, 224, "2.7") +DEFINE_BLAKE2_VARIANT(s, S, 160, "2.5") +DEFINE_BLAKE2_VARIANT(s, S, 128, "2.4") diff --git a/comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S b/comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S new file mode 100644 index 0000000000..357e8a5167 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blake2b-amd64-avx2.S @@ -0,0 +1,300 @@ +/* blake2b-amd64-avx2.S - AVX2 implementation of BLAKE2b + * + * Copyright (C) 2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* The code is based on public-domain/CC0 BLAKE2 reference implementation + * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/sse + * Copyright 2012, Samuel Neves + */ + +#ifdef __x86_64 +#include +#if defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#include "asm-common-amd64.h" + +.text + +/* register macros */ +#define RSTATE %rdi +#define RINBLKS %rsi +#define RNBLKS %rdx +#define RIV %rcx + +/* state structure */ +#define STATE_H 0 +#define STATE_T (STATE_H + 8 * 8) +#define STATE_F (STATE_T + 2 * 8) + +/* vector registers */ +#define ROW1 %ymm0 +#define ROW2 %ymm1 +#define ROW3 %ymm2 +#define ROW4 %ymm3 +#define TMP1 %ymm4 +#define TMP1x %xmm4 +#define R16 %ymm5 +#define R24 %ymm6 + +#define MA1 %ymm8 +#define MA2 %ymm9 +#define MA3 %ymm10 +#define MA4 %ymm11 +#define MA1x %xmm8 +#define MA2x %xmm9 +#define MA3x %xmm10 +#define MA4x %xmm11 + +#define MB1 %ymm12 +#define MB2 %ymm13 +#define MB3 %ymm14 +#define MB4 %ymm15 +#define MB1x %xmm12 +#define MB2x %xmm13 +#define MB3x %xmm14 +#define MB4x %xmm15 + +/********************************************************************** + blake2b/AVX2 + **********************************************************************/ + +#define GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + s0, s1, s2, s3, s4, s5, s6, s7, s8, \ + s9, s10, s11, s12, s13, s14, s15) \ + vmovq (s0)*8(RINBLKS), m1x; \ + vmovq (s4)*8(RINBLKS), TMP1x; \ + vpinsrq $1, (s2)*8(RINBLKS), m1x, m1x; \ + vpinsrq $1, (s6)*8(RINBLKS), TMP1x, TMP1x; \ + vinserti128 $1, TMP1x, m1, m1; \ + vmovq (s1)*8(RINBLKS), m2x; \ + vmovq (s5)*8(RINBLKS), TMP1x; \ + vpinsrq $1, (s3)*8(RINBLKS), m2x, m2x; \ + vpinsrq $1, (s7)*8(RINBLKS), TMP1x, TMP1x; \ + vinserti128 $1, TMP1x, m2, m2; \ + vmovq (s8)*8(RINBLKS), m3x; \ + vmovq (s12)*8(RINBLKS), TMP1x; \ + vpinsrq $1, (s10)*8(RINBLKS), m3x, m3x; \ + vpinsrq $1, (s14)*8(RINBLKS), TMP1x, TMP1x; \ + vinserti128 $1, TMP1x, m3, m3; \ + vmovq (s9)*8(RINBLKS), m4x; \ + vmovq (s13)*8(RINBLKS), TMP1x; \ + vpinsrq $1, (s11)*8(RINBLKS), m4x, m4x; \ + vpinsrq $1, (s15)*8(RINBLKS), TMP1x, TMP1x; \ + vinserti128 $1, TMP1x, m4, m4; + +#define LOAD_MSG_0(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) +#define LOAD_MSG_1(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3) +#define LOAD_MSG_2(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4) +#define LOAD_MSG_3(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8) +#define LOAD_MSG_4(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13) +#define LOAD_MSG_5(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9) +#define LOAD_MSG_6(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11) +#define LOAD_MSG_7(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10) +#define LOAD_MSG_8(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5) +#define LOAD_MSG_9(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + GATHER_MSG(m1, m2, m3, m4, m1x, m2x, m3x, m4x, \ + 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0) +#define LOAD_MSG_10(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + LOAD_MSG_0(m1, m2, m3, m4, m1x, m2x, m3x, m4x) +#define LOAD_MSG_11(m1, m2, m3, m4, m1x, m2x, m3x, m4x) \ + LOAD_MSG_1(m1, m2, m3, m4, m1x, m2x, m3x, m4x) + +#define LOAD_MSG(r, m1, m2, m3, m4) \ + LOAD_MSG_##r(m1, m2, m3, m4, m1##x, m2##x, m3##x, m4##x) + +#define ROR_32(in, out) vpshufd $0xb1, in, out; + +#define ROR_24(in, out) vpshufb R24, in, out; + +#define ROR_16(in, out) vpshufb R16, in, out; + +#define ROR_63(in, out) \ + vpsrlq $63, in, TMP1; \ + vpaddq in, in, out; \ + vpxor TMP1, out, out; + +#define G(r1, r2, r3, r4, m, ROR_A, ROR_B) \ + vpaddq m, r1, r1; \ + vpaddq r2, r1, r1; \ + vpxor r1, r4, r4; \ + ROR_A(r4, r4); \ + vpaddq r4, r3, r3; \ + vpxor r3, r2, r2; \ + ROR_B(r2, r2); + +#define G1(r1, r2, r3, r4, m) \ + G(r1, r2, r3, r4, m, ROR_32, ROR_24); + +#define G2(r1, r2, r3, r4, m) \ + G(r1, r2, r3, r4, m, ROR_16, ROR_63); + +#define MM_SHUFFLE(z,y,x,w) \ + (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) + +#define DIAGONALIZE(r1, r2, r3, r4) \ + vpermq $MM_SHUFFLE(0,3,2,1), r2, r2; \ + vpermq $MM_SHUFFLE(1,0,3,2), r3, r3; \ + vpermq $MM_SHUFFLE(2,1,0,3), r4, r4; + +#define UNDIAGONALIZE(r1, r2, r3, r4) \ + vpermq $MM_SHUFFLE(2,1,0,3), r2, r2; \ + vpermq $MM_SHUFFLE(1,0,3,2), r3, r3; \ + vpermq $MM_SHUFFLE(0,3,2,1), r4, r4; + +#define ROUND(r, m1, m2, m3, m4) \ + G1(ROW1, ROW2, ROW3, ROW4, m1); \ + G2(ROW1, ROW2, ROW3, ROW4, m2); \ + DIAGONALIZE(ROW1, ROW2, ROW3, ROW4); \ + G1(ROW1, ROW2, ROW3, ROW4, m3); \ + G2(ROW1, ROW2, ROW3, ROW4, m4); \ + UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4); + +blake2b_data: +.align 32 +.Liv: + .quad 0x6a09e667f3bcc908, 0xbb67ae8584caa73b + .quad 0x3c6ef372fe94f82b, 0xa54ff53a5f1d36f1 + .quad 0x510e527fade682d1, 0x9b05688c2b3e6c1f + .quad 0x1f83d9abfb41bd6b, 0x5be0cd19137e2179 +.Lshuf_ror16: + .byte 2, 3, 4, 5, 6, 7, 0, 1, 10, 11, 12, 13, 14, 15, 8, 9 +.Lshuf_ror24: + .byte 3, 4, 5, 6, 7, 0, 1, 2, 11, 12, 13, 14, 15, 8, 9, 10 + +.align 64 +.globl _gcry_blake2b_transform_amd64_avx2 +ELF(.type _gcry_blake2b_transform_amd64_avx2,@function;) + +_gcry_blake2b_transform_amd64_avx2: + /* input: + * %rdi: state + * %rsi: blks + * %rdx: num_blks + */ + CFI_STARTPROC(); + + vzeroupper; + + addq $128, (STATE_T + 0)(RSTATE); + adcq $0, (STATE_T + 8)(RSTATE); + + vbroadcasti128 .Lshuf_ror16 rRIP, R16; + vbroadcasti128 .Lshuf_ror24 rRIP, R24; + + vmovdqa .Liv+(0 * 8) rRIP, ROW3; + vmovdqa .Liv+(4 * 8) rRIP, ROW4; + + vmovdqu (STATE_H + 0 * 8)(RSTATE), ROW1; + vmovdqu (STATE_H + 4 * 8)(RSTATE), ROW2; + + vpxor (STATE_T)(RSTATE), ROW4, ROW4; + + LOAD_MSG(0, MA1, MA2, MA3, MA4); + LOAD_MSG(1, MB1, MB2, MB3, MB4); + +.Loop: + ROUND(0, MA1, MA2, MA3, MA4); + LOAD_MSG(2, MA1, MA2, MA3, MA4); + ROUND(1, MB1, MB2, MB3, MB4); + LOAD_MSG(3, MB1, MB2, MB3, MB4); + ROUND(2, MA1, MA2, MA3, MA4); + LOAD_MSG(4, MA1, MA2, MA3, MA4); + ROUND(3, MB1, MB2, MB3, MB4); + LOAD_MSG(5, MB1, MB2, MB3, MB4); + ROUND(4, MA1, MA2, MA3, MA4); + LOAD_MSG(6, MA1, MA2, MA3, MA4); + ROUND(5, MB1, MB2, MB3, MB4); + LOAD_MSG(7, MB1, MB2, MB3, MB4); + ROUND(6, MA1, MA2, MA3, MA4); + LOAD_MSG(8, MA1, MA2, MA3, MA4); + ROUND(7, MB1, MB2, MB3, MB4); + LOAD_MSG(9, MB1, MB2, MB3, MB4); + ROUND(8, MA1, MA2, MA3, MA4); + LOAD_MSG(10, MA1, MA2, MA3, MA4); + ROUND(9, MB1, MB2, MB3, MB4); + LOAD_MSG(11, MB1, MB2, MB3, MB4); + sub $1, RNBLKS; + jz .Loop_end; + + lea 128(RINBLKS), RINBLKS; + addq $128, (STATE_T + 0)(RSTATE); + adcq $0, (STATE_T + 8)(RSTATE); + + ROUND(10, MA1, MA2, MA3, MA4); + LOAD_MSG(0, MA1, MA2, MA3, MA4); + ROUND(11, MB1, MB2, MB3, MB4); + LOAD_MSG(1, MB1, MB2, MB3, MB4); + + vpxor ROW3, ROW1, ROW1; + vpxor ROW4, ROW2, ROW2; + + vmovdqa .Liv+(0 * 8) rRIP, ROW3; + vmovdqa .Liv+(4 * 8) rRIP, ROW4; + + vpxor (STATE_H + 0 * 8)(RSTATE), ROW1, ROW1; + vpxor (STATE_H + 4 * 8)(RSTATE), ROW2, ROW2; + + vmovdqu ROW1, (STATE_H + 0 * 8)(RSTATE); + vmovdqu ROW2, (STATE_H + 4 * 8)(RSTATE); + + vpxor (STATE_T)(RSTATE), ROW4, ROW4; + + jmp .Loop; + +.Loop_end: + ROUND(10, MA1, MA2, MA3, MA4); + ROUND(11, MB1, MB2, MB3, MB4); + + vpxor ROW3, ROW1, ROW1; + vpxor ROW4, ROW2, ROW2; + vpxor (STATE_H + 0 * 8)(RSTATE), ROW1, ROW1; + vpxor (STATE_H + 4 * 8)(RSTATE), ROW2, ROW2; + + vmovdqu ROW1, (STATE_H + 0 * 8)(RSTATE); + vmovdqu ROW2, (STATE_H + 4 * 8)(RSTATE); + + xor %eax, %eax; + vzeroall; + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blake2b_transform_amd64_avx2, + .-_gcry_blake2b_transform_amd64_avx2;) + +#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S b/comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S new file mode 100644 index 0000000000..5b93675871 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blake2s-amd64-avx.S @@ -0,0 +1,278 @@ +/* blake2s-amd64-avx.S - AVX implementation of BLAKE2s + * + * Copyright (C) 2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* The code is based on public-domain/CC0 BLAKE2 reference implementation + * by Samual Neves, at https://github.com/BLAKE2/BLAKE2/tree/master/sse + * Copyright 2012, Samuel Neves + */ + +#ifdef __x86_64 +#include +#if defined(HAVE_GCC_INLINE_ASM_AVX) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#include "asm-common-amd64.h" + +.text + +/* register macros */ +#define RSTATE %rdi +#define RINBLKS %rsi +#define RNBLKS %rdx +#define RIV %rcx + +/* state structure */ +#define STATE_H 0 +#define STATE_T (STATE_H + 8 * 4) +#define STATE_F (STATE_T + 2 * 4) + +/* vector registers */ +#define ROW1 %xmm0 +#define ROW2 %xmm1 +#define ROW3 %xmm2 +#define ROW4 %xmm3 +#define TMP1 %xmm4 +#define TMP1x %xmm4 +#define R16 %xmm5 +#define R8 %xmm6 + +#define MA1 %xmm8 +#define MA2 %xmm9 +#define MA3 %xmm10 +#define MA4 %xmm11 + +#define MB1 %xmm12 +#define MB2 %xmm13 +#define MB3 %xmm14 +#define MB4 %xmm15 + +/********************************************************************** + blake2s/AVX + **********************************************************************/ + +#define GATHER_MSG(m1, m2, m3, m4, \ + s0, s1, s2, s3, s4, s5, s6, s7, s8, \ + s9, s10, s11, s12, s13, s14, s15) \ + vmovd (s0)*4(RINBLKS), m1; \ + vmovd (s1)*4(RINBLKS), m2; \ + vmovd (s8)*4(RINBLKS), m3; \ + vmovd (s9)*4(RINBLKS), m4; \ + vpinsrd $1, (s2)*4(RINBLKS), m1, m1; \ + vpinsrd $1, (s3)*4(RINBLKS), m2, m2; \ + vpinsrd $1, (s10)*4(RINBLKS), m3, m3; \ + vpinsrd $1, (s11)*4(RINBLKS), m4, m4; \ + vpinsrd $2, (s4)*4(RINBLKS), m1, m1; \ + vpinsrd $2, (s5)*4(RINBLKS), m2, m2; \ + vpinsrd $2, (s12)*4(RINBLKS), m3, m3; \ + vpinsrd $2, (s13)*4(RINBLKS), m4, m4; \ + vpinsrd $3, (s6)*4(RINBLKS), m1, m1; \ + vpinsrd $3, (s7)*4(RINBLKS), m2, m2; \ + vpinsrd $3, (s14)*4(RINBLKS), m3, m3; \ + vpinsrd $3, (s15)*4(RINBLKS), m4, m4; + +#define LOAD_MSG_0(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15) +#define LOAD_MSG_1(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 14, 10, 4, 8, 9, 15, 13, 6, 1, 12, 0, 2, 11, 7, 5, 3) +#define LOAD_MSG_2(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 11, 8, 12, 0, 5, 2, 15, 13, 10, 14, 3, 6, 7, 1, 9, 4) +#define LOAD_MSG_3(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 7, 9, 3, 1, 13, 12, 11, 14, 2, 6, 5, 10, 4, 0, 15, 8) +#define LOAD_MSG_4(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 9, 0, 5, 7, 2, 4, 10, 15, 14, 1, 11, 12, 6, 8, 3, 13) +#define LOAD_MSG_5(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 2, 12, 6, 10, 0, 11, 8, 3, 4, 13, 7, 5, 15, 14, 1, 9) +#define LOAD_MSG_6(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 12, 5, 1, 15, 14, 13, 4, 10, 0, 7, 6, 3, 9, 2, 8, 11) +#define LOAD_MSG_7(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 13, 11, 7, 14, 12, 1, 3, 9, 5, 0, 15, 4, 8, 6, 2, 10) +#define LOAD_MSG_8(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 6, 15, 14, 9, 11, 3, 0, 8, 12, 2, 13, 7, 1, 4, 10, 5) +#define LOAD_MSG_9(m1, m2, m3, m4) \ + GATHER_MSG(m1, m2, m3, m4, \ + 10, 2, 8, 4, 7, 6, 1, 5, 15, 11, 9, 14, 3, 12, 13 , 0) + +#define LOAD_MSG(r, m1, m2, m3, m4) LOAD_MSG_##r(m1, m2, m3, m4) + +#define ROR_16(in, out) vpshufb R16, in, out; + +#define ROR_8(in, out) vpshufb R8, in, out; + +#define ROR_12(in, out) \ + vpsrld $12, in, TMP1; \ + vpslld $(32 - 12), in, out; \ + vpxor TMP1, out, out; + +#define ROR_7(in, out) \ + vpsrld $7, in, TMP1; \ + vpslld $(32 - 7), in, out; \ + vpxor TMP1, out, out; + +#define G(r1, r2, r3, r4, m, ROR_A, ROR_B) \ + vpaddd m, r1, r1; \ + vpaddd r2, r1, r1; \ + vpxor r1, r4, r4; \ + ROR_A(r4, r4); \ + vpaddd r4, r3, r3; \ + vpxor r3, r2, r2; \ + ROR_B(r2, r2); + +#define G1(r1, r2, r3, r4, m) \ + G(r1, r2, r3, r4, m, ROR_16, ROR_12); + +#define G2(r1, r2, r3, r4, m) \ + G(r1, r2, r3, r4, m, ROR_8, ROR_7); + +#define MM_SHUFFLE(z,y,x,w) \ + (((z) << 6) | ((y) << 4) | ((x) << 2) | (w)) + +#define DIAGONALIZE(r1, r2, r3, r4) \ + vpshufd $MM_SHUFFLE(0,3,2,1), r2, r2; \ + vpshufd $MM_SHUFFLE(1,0,3,2), r3, r3; \ + vpshufd $MM_SHUFFLE(2,1,0,3), r4, r4; + +#define UNDIAGONALIZE(r1, r2, r3, r4) \ + vpshufd $MM_SHUFFLE(2,1,0,3), r2, r2; \ + vpshufd $MM_SHUFFLE(1,0,3,2), r3, r3; \ + vpshufd $MM_SHUFFLE(0,3,2,1), r4, r4; + +#define ROUND(r, m1, m2, m3, m4) \ + G1(ROW1, ROW2, ROW3, ROW4, m1); \ + G2(ROW1, ROW2, ROW3, ROW4, m2); \ + DIAGONALIZE(ROW1, ROW2, ROW3, ROW4); \ + G1(ROW1, ROW2, ROW3, ROW4, m3); \ + G2(ROW1, ROW2, ROW3, ROW4, m4); \ + UNDIAGONALIZE(ROW1, ROW2, ROW3, ROW4); + +blake2s_data: +.align 16 +.Liv: + .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A + .long 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19 +.Lshuf_ror16: + .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +.Lshuf_ror8: + .byte 1,2,3,0,5,6,7,4,9,10,11,8,13,14,15,12 + +.align 64 +.globl _gcry_blake2s_transform_amd64_avx +ELF(.type _gcry_blake2s_transform_amd64_avx,@function;) + +_gcry_blake2s_transform_amd64_avx: + /* input: + * %rdi: state + * %rsi: blks + * %rdx: num_blks + */ + CFI_STARTPROC(); + + vzeroupper; + + addq $64, (STATE_T + 0)(RSTATE); + + vmovdqa .Lshuf_ror16 rRIP, R16; + vmovdqa .Lshuf_ror8 rRIP, R8; + + vmovdqa .Liv+(0 * 4) rRIP, ROW3; + vmovdqa .Liv+(4 * 4) rRIP, ROW4; + + vmovdqu (STATE_H + 0 * 4)(RSTATE), ROW1; + vmovdqu (STATE_H + 4 * 4)(RSTATE), ROW2; + + vpxor (STATE_T)(RSTATE), ROW4, ROW4; + + LOAD_MSG(0, MA1, MA2, MA3, MA4); + LOAD_MSG(1, MB1, MB2, MB3, MB4); + +.Loop: + ROUND(0, MA1, MA2, MA3, MA4); + LOAD_MSG(2, MA1, MA2, MA3, MA4); + ROUND(1, MB1, MB2, MB3, MB4); + LOAD_MSG(3, MB1, MB2, MB3, MB4); + ROUND(2, MA1, MA2, MA3, MA4); + LOAD_MSG(4, MA1, MA2, MA3, MA4); + ROUND(3, MB1, MB2, MB3, MB4); + LOAD_MSG(5, MB1, MB2, MB3, MB4); + ROUND(4, MA1, MA2, MA3, MA4); + LOAD_MSG(6, MA1, MA2, MA3, MA4); + ROUND(5, MB1, MB2, MB3, MB4); + LOAD_MSG(7, MB1, MB2, MB3, MB4); + ROUND(6, MA1, MA2, MA3, MA4); + LOAD_MSG(8, MA1, MA2, MA3, MA4); + ROUND(7, MB1, MB2, MB3, MB4); + LOAD_MSG(9, MB1, MB2, MB3, MB4); + sub $1, RNBLKS; + jz .Loop_end; + + lea 64(RINBLKS), RINBLKS; + addq $64, (STATE_T + 0)(RSTATE); + + ROUND(8, MA1, MA2, MA3, MA4); + LOAD_MSG(0, MA1, MA2, MA3, MA4); + ROUND(9, MB1, MB2, MB3, MB4); + LOAD_MSG(1, MB1, MB2, MB3, MB4); + + vpxor ROW3, ROW1, ROW1; + vpxor ROW4, ROW2, ROW2; + + vmovdqa .Liv+(0 * 4) rRIP, ROW3; + vmovdqa .Liv+(4 * 4) rRIP, ROW4; + + vpxor (STATE_H + 0 * 4)(RSTATE), ROW1, ROW1; + vpxor (STATE_H + 4 * 4)(RSTATE), ROW2, ROW2; + + vmovdqu ROW1, (STATE_H + 0 * 4)(RSTATE); + vmovdqu ROW2, (STATE_H + 4 * 4)(RSTATE); + + vpxor (STATE_T)(RSTATE), ROW4, ROW4; + + jmp .Loop; + +.Loop_end: + ROUND(8, MA1, MA2, MA3, MA4); + ROUND(9, MB1, MB2, MB3, MB4); + + vpxor ROW3, ROW1, ROW1; + vpxor ROW4, ROW2, ROW2; + vpxor (STATE_H + 0 * 4)(RSTATE), ROW1, ROW1; + vpxor (STATE_H + 4 * 4)(RSTATE), ROW2, ROW2; + + vmovdqu ROW1, (STATE_H + 0 * 4)(RSTATE); + vmovdqu ROW2, (STATE_H + 4 * 4)(RSTATE); + + xor %eax, %eax; + vzeroall; + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blake2s_transform_amd64_avx, + .-_gcry_blake2s_transform_amd64_avx;) + +#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/blowfish-amd64.S b/comm/third_party/libgcrypt/cipher/blowfish-amd64.S new file mode 100644 index 0000000000..bdb361d7eb --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blowfish-amd64.S @@ -0,0 +1,601 @@ +/* blowfish-amd64.S - AMD64 assembly implementation of Blowfish cipher + * + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifdef __x86_64 +#include +#if defined(USE_BLOWFISH) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#include "asm-common-amd64.h" + +.text + +/* structure of BLOWFISH_context: */ +#define s0 0 +#define s1 ((s0) + 256 * 4) +#define s2 ((s1) + 256 * 4) +#define s3 ((s2) + 256 * 4) +#define p ((s3) + 256 * 4) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rcx +#define RX3 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %ecx +#define RX3d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %cl +#define RX3bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %ch +#define RX3bh %dh + +#define RT0 %rbp +#define RT1 %rsi +#define RT2 %r8 +#define RT3 %r9 + +#define RT0d %ebp +#define RT1d %esi +#define RT2d %r8d +#define RT3d %r9d + +#define RKEY %r10 + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F() \ + movzbl RX0bh, RT1d; \ + movzbl RX0bl, RT3d; \ + rorq $16, RX0; \ + movzbl RX0bh, RT0d; \ + movzbl RX0bl, RT2d; \ + rorq $16, RX0; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, RX0; + +#define load_roundkey_enc(n) \ + movq p+4*(n)(CTX), RX3; + +#define add_roundkey_enc() \ + xorq RX3, RX0; + +#define round_enc(n) \ + add_roundkey_enc(); \ + load_roundkey_enc(n); \ + \ + F(); \ + F(); + +#define load_roundkey_dec(n) \ + movq p+4*(n-1)(CTX), RX3; \ + rorq $32, RX3; + +#define add_roundkey_dec() \ + xorq RX3, RX0; + +#define round_dec(n) \ + add_roundkey_dec(); \ + load_roundkey_dec(n); \ + \ + F(); \ + F(); + +#define read_block() \ + movq (RIO), RX0; \ + rorq $32, RX0; \ + bswapq RX0; + +#define write_block() \ + bswapq RX0; \ + movq RX0, (RIO); + +.align 8 +ELF(.type __blowfish_enc_blk1,@function;) + +__blowfish_enc_blk1: + /* input: + * %rdi: ctx, CTX + * RX0: input plaintext block + * output: + * RX0: output plaintext block + */ + CFI_STARTPROC(); + movq %rbp, %r11; + CFI_REGISTER(%rbp, %r11); + + load_roundkey_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + round_enc(16); + add_roundkey_enc(); + + movq %r11, %rbp; + CFI_RESTORE(%rbp) + + ret; + CFI_ENDPROC(); +ELF(.size __blowfish_enc_blk1,.-__blowfish_enc_blk1;) + +.align 8 +.globl _gcry_blowfish_amd64_do_encrypt +ELF(.type _gcry_blowfish_amd64_do_encrypt,@function;) + +_gcry_blowfish_amd64_do_encrypt: + /* input: + * %rdi: ctx, CTX + * %rsi: u32 *ret_xl + * %rdx: u32 *ret_xr + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + movl (%rdx), RX0d; + shlq $32, RX0; + movl (%rsi), RT3d; + movq %rdx, %r10; + orq RT3, RX0; + movq %rsi, RX2; + + call __blowfish_enc_blk1; + + movl RX0d, (%r10); + shrq $32, RX0; + movl RX0d, (RX2); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blowfish_amd64_do_encrypt,.-_gcry_blowfish_amd64_do_encrypt;) + +.align 8 +.globl _gcry_blowfish_amd64_encrypt_block +ELF(.type _gcry_blowfish_amd64_encrypt_block,@function;) + +_gcry_blowfish_amd64_encrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + movq %rsi, %r10; + + movq %rdx, RIO; + read_block(); + + call __blowfish_enc_blk1; + + movq %r10, RIO; + write_block(); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blowfish_amd64_encrypt_block,.-_gcry_blowfish_amd64_encrypt_block;) + +.align 8 +.globl _gcry_blowfish_amd64_decrypt_block +ELF(.type _gcry_blowfish_amd64_decrypt_block,@function;) + +_gcry_blowfish_amd64_decrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + movq %rbp, %r11; + CFI_REGISTER(%rbp, %r11); + + movq %rsi, %r10; + movq %rdx, RIO; + + read_block(); + + load_roundkey_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + round_dec(1); + add_roundkey_dec(); + + movq %r10, RIO; + write_block(); + + movq %r11, %rbp; + CFI_RESTORE(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blowfish_amd64_decrypt_block,.-_gcry_blowfish_amd64_decrypt_block;) + +/********************************************************************** + 4-way blowfish, four blocks parallel + **********************************************************************/ +#define F4(x) \ + movzbl x ## bh, RT1d; \ + movzbl x ## bl, RT3d; \ + rorq $16, x; \ + movzbl x ## bh, RT0d; \ + movzbl x ## bl, RT2d; \ + rorq $16, x; \ + movl s0(CTX,RT0,4), RT0d; \ + addl s1(CTX,RT2,4), RT0d; \ + xorl s2(CTX,RT1,4), RT0d; \ + addl s3(CTX,RT3,4), RT0d; \ + xorq RT0, x; + +#define add_preloaded_roundkey4() \ + xorq RKEY, RX0; \ + xorq RKEY, RX1; \ + xorq RKEY, RX2; \ + xorq RKEY, RX3; + +#define preload_roundkey_enc(n) \ + movq p+4*(n)(CTX), RKEY; + +#define add_roundkey_enc4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_enc(n + 2); + +#define round_enc4(n) \ + add_roundkey_enc4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define preload_roundkey_dec(n) \ + movq p+4*((n)-1)(CTX), RKEY; \ + rorq $32, RKEY; + +#define add_roundkey_dec4(n) \ + add_preloaded_roundkey4(); \ + preload_roundkey_dec(n - 2); + +#define round_dec4(n) \ + add_roundkey_dec4(n); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); \ + \ + F4(RX0); \ + F4(RX1); \ + F4(RX2); \ + F4(RX3); + +#define inbswap_block4() \ + rorq $32, RX0; \ + bswapq RX0; \ + rorq $32, RX1; \ + bswapq RX1; \ + rorq $32, RX2; \ + bswapq RX2; \ + rorq $32, RX3; \ + bswapq RX3; + +#define inctrswap_block4() \ + rorq $32, RX0; \ + rorq $32, RX1; \ + rorq $32, RX2; \ + rorq $32, RX3; + +#define outbswap_block4() \ + bswapq RX0; \ + bswapq RX1; \ + bswapq RX2; \ + bswapq RX3; + +.align 8 +ELF(.type __blowfish_enc_blk4,@function;) + +__blowfish_enc_blk4: + /* input: + * %rdi: ctx, CTX + * RX0,RX1,RX2,RX3: four input inbswapped plaintext blocks + * output: + * RX0,RX1,RX2,RX3: four output ciphertext blocks + */ + CFI_STARTPROC(); + preload_roundkey_enc(0); + + round_enc4(0); + round_enc4(2); + round_enc4(4); + round_enc4(6); + round_enc4(8); + round_enc4(10); + round_enc4(12); + round_enc4(14); + add_preloaded_roundkey4(); + + outbswap_block4(); + + ret; + CFI_ENDPROC(); +ELF(.size __blowfish_enc_blk4,.-__blowfish_enc_blk4;) + +.align 8 +ELF(.type __blowfish_dec_blk4,@function;) + +__blowfish_dec_blk4: + /* input: + * %rdi: ctx, CTX + * RX0,RX1,RX2,RX3: four input ciphertext blocks + * output: + * RX0,RX1,RX2,RX3: four output plaintext blocks + */ + CFI_STARTPROC(); + preload_roundkey_dec(17); + + inbswap_block4(); + + round_dec4(17); + round_dec4(15); + round_dec4(13); + round_dec4(11); + round_dec4(9); + round_dec4(7); + round_dec4(5); + round_dec4(3); + add_preloaded_roundkey4(); + + outbswap_block4(); + + ret; + CFI_ENDPROC(); +ELF(.size __blowfish_dec_blk4,.-__blowfish_dec_blk4;) + +.align 8 +.globl _gcry_blowfish_amd64_ctr_enc +ELF(.type _gcry_blowfish_amd64_ctr_enc,@function;) +_gcry_blowfish_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (big endian, 64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + + /* %r11-%r13 are not used by __blowfish_enc_blk4 */ + movq %rcx, %r13; /*iv*/ + movq %rdx, %r12; /*src*/ + movq %rsi, %r11; /*dst*/ + + /* load IV and byteswap */ + movq (%r13), RT0; + bswapq RT0; + movq RT0, RX0; + + /* construct IVs */ + leaq 1(RT0), RX1; + leaq 2(RT0), RX2; + leaq 3(RT0), RX3; + leaq 4(RT0), RT0; + bswapq RT0; + + inctrswap_block4(); + + /* store new IV */ + movq RT0, (%r13); + + call __blowfish_enc_blk4; + + /* XOR key-stream with plaintext */ + xorq 0 * 8(%r12), RX0; + xorq 1 * 8(%r12), RX1; + xorq 2 * 8(%r12), RX2; + xorq 3 * 8(%r12), RX3; + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blowfish_amd64_ctr_enc,.-_gcry_blowfish_amd64_ctr_enc;) + +.align 8 +.globl _gcry_blowfish_amd64_cbc_dec +ELF(.type _gcry_blowfish_amd64_cbc_dec,@function;) +_gcry_blowfish_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + + /* %r11-%r13 are not used by __blowfish_dec_blk4 */ + movq %rsi, %r11; /*dst*/ + movq %rdx, %r12; /*src*/ + movq %rcx, %r13; /*iv*/ + + /* load input */ + movq 0 * 8(%r12), RX0; + movq 1 * 8(%r12), RX1; + movq 2 * 8(%r12), RX2; + movq 3 * 8(%r12), RX3; + + call __blowfish_dec_blk4; + + movq 3 * 8(%r12), RT0; + xorq (%r13), RX0; + xorq 0 * 8(%r12), RX1; + xorq 1 * 8(%r12), RX2; + xorq 2 * 8(%r12), RX3; + movq RT0, (%r13); /* store new IV */ + + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blowfish_amd64_cbc_dec,.-_gcry_blowfish_amd64_cbc_dec;) + +.align 8 +.globl _gcry_blowfish_amd64_cfb_dec +ELF(.type _gcry_blowfish_amd64_cfb_dec,@function;) +_gcry_blowfish_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + + /* %r11-%r13 are not used by __blowfish_enc_blk4 */ + movq %rcx, %r13; /*iv*/ + movq %rdx, %r12; /*src*/ + movq %rsi, %r11; /*dst*/ + + /* Load input */ + movq (%r13), RX0; + movq 0 * 8(%r12), RX1; + movq 1 * 8(%r12), RX2; + movq 2 * 8(%r12), RX3; + + inbswap_block4(); + + /* Update IV */ + movq 3 * 8(%r12), RT0; + movq RT0, (%r13); + + call __blowfish_enc_blk4; + + xorq 0 * 8(%r12), RX0; + xorq 1 * 8(%r12), RX1; + xorq 2 * 8(%r12), RX2; + xorq 3 * 8(%r12), RX3; + movq RX0, 0 * 8(%r11); + movq RX1, 1 * 8(%r11); + movq RX2, 2 * 8(%r11); + movq RX3, 3 * 8(%r11); + + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_blowfish_amd64_cfb_dec,.-_gcry_blowfish_amd64_cfb_dec;) + +#endif /*defined(USE_BLOWFISH)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/blowfish-arm.S b/comm/third_party/libgcrypt/cipher/blowfish-arm.S new file mode 100644 index 0000000000..b30aa31f1d --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blowfish-arm.S @@ -0,0 +1,743 @@ +/* blowfish-arm.S - ARM assembly implementation of Blowfish cipher + * + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +/* structure of crypto context */ +#define s0 0 +#define s1 (s0 + (1 * 256) * 4) +#define s2 (s0 + (2 * 256) * 4) +#define s3 (s0 + (3 * 256) * 4) +#define p (s3 + (1 * 256) * 4) + +/* register macros */ +#define CTXs0 %r0 +#define CTXs1 %r9 +#define CTXs2 %r8 +#define CTXs3 %r10 +#define RMASK %lr +#define RKEYL %r2 +#define RKEYR %ip + +#define RL0 %r3 +#define RR0 %r4 + +#define RL1 %r9 +#define RR1 %r10 + +#define RT0 %r11 +#define RT1 %r7 +#define RT2 %r5 +#define RT3 %r6 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ + #define ldr_unaligned_host ldr_unaligned_le + #define str_unaligned_host str_unaligned_le + + /* bswap on little-endian */ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + #define ldr_unaligned_host ldr_unaligned_be + #define str_unaligned_host str_unaligned_be + + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define host_to_host(x, y) /*_*/ + +/*********************************************************************** + * 1-way blowfish + ***********************************************************************/ +#define F(l, r) \ + and RT0, RMASK, l, lsr#(24 - 2); \ + and RT1, RMASK, l, lsr#(16 - 2); \ + ldr RT0, [CTXs0, RT0]; \ + and RT2, RMASK, l, lsr#(8 - 2); \ + ldr RT1, [CTXs1, RT1]; \ + and RT3, RMASK, l, lsl#2; \ + ldr RT2, [CTXs2, RT2]; \ + add RT0, RT1; \ + ldr RT3, [CTXs3, RT3]; \ + eor RT0, RT2; \ + add RT0, RT3; \ + eor r, RT0; + +#define load_roundkey_enc(n) \ + ldr RKEYL, [CTXs2, #((p - s2) + (4 * (n) + 0))]; \ + ldr RKEYR, [CTXs2, #((p - s2) + (4 * (n) + 4))]; + +#define add_roundkey_enc() \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; + +#define round_enc(n) \ + add_roundkey_enc(); \ + load_roundkey_enc(n); \ + \ + F(RL0, RR0); \ + F(RR0, RL0); + +#define load_roundkey_dec(n) \ + ldr RKEYL, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 4))]; \ + ldr RKEYR, [CTXs2, #((p - s2) + (4 * ((n) - 1) + 0))]; + +#define add_roundkey_dec() \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; + +#define round_dec(n) \ + add_roundkey_dec(); \ + load_roundkey_dec(n); \ + \ + F(RL0, RR0); \ + F(RR0, RL0); + +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ + ldr l0, [rin, #((offs) + 0)]; \ + ldr r0, [rin, #((offs) + 4)]; \ + convert(l0, rtmp); \ + convert(r0, rtmp); + +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + str l0, [rout, #((offs) + 0)]; \ + str r0, [rout, #((offs) + 4)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) + + #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) + + #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ + 2:; + + #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ + 2:; + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ + 2:; + + #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, host_to_host); \ + 2:; +#endif + +.align 3 +.type __blowfish_enc_blk1,%function; + +__blowfish_enc_blk1: + /* input: + * preloaded: CTX + * [RL0, RR0]: src + * output: + * [RR0, RL0]: dst + */ + push {%lr}; + + add CTXs1, CTXs0, #(s1 - s0); + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + add CTXs3, CTXs1, #(s3 - s1); + + load_roundkey_enc(0); + round_enc(2); + round_enc(4); + round_enc(6); + round_enc(8); + round_enc(10); + round_enc(12); + round_enc(14); + round_enc(16); + add_roundkey_enc(); + + pop {%pc}; +.size __blowfish_enc_blk1,.-__blowfish_enc_blk1; + +.align 8 +.globl _gcry_blowfish_arm_do_encrypt +.type _gcry_blowfish_arm_do_encrypt,%function; + +_gcry_blowfish_arm_do_encrypt: + /* input: + * %r0: ctx, CTX + * %r1: u32 *ret_xl + * %r2: u32 *ret_xr + */ + push {%r2, %r4-%r11, %ip, %lr}; + + ldr RL0, [%r1]; + ldr RR0, [%r2]; + + bl __blowfish_enc_blk1; + + pop {%r2}; + str RR0, [%r1]; + str RL0, [%r2]; + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_do_encrypt,.-_gcry_blowfish_arm_do_encrypt; + +.align 3 +.globl _gcry_blowfish_arm_encrypt_block +.type _gcry_blowfish_arm_encrypt_block,%function; + +_gcry_blowfish_arm_encrypt_block: + /* input: + * %r0: ctx, CTX + * %r1: dst + * %r2: src + */ + push {%r4-%r11, %ip, %lr}; + + read_block(%r2, 0, RL0, RR0, RT0); + + bl __blowfish_enc_blk1; + + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_encrypt_block,.-_gcry_blowfish_arm_encrypt_block; + +.align 3 +.globl _gcry_blowfish_arm_decrypt_block +.type _gcry_blowfish_arm_decrypt_block,%function; + +_gcry_blowfish_arm_decrypt_block: + /* input: + * %r0: ctx, CTX + * %r1: dst + * %r2: src + */ + push {%r4-%r11, %ip, %lr}; + + add CTXs1, CTXs0, #(s1 - s0); + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + add CTXs3, CTXs1, #(s3 - s1); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_roundkey_dec(17); + round_dec(15); + round_dec(13); + round_dec(11); + round_dec(9); + round_dec(7); + round_dec(5); + round_dec(3); + round_dec(1); + add_roundkey_dec(); + + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.size _gcry_blowfish_arm_decrypt_block,.-_gcry_blowfish_arm_decrypt_block; + +/*********************************************************************** + * 2-way blowfish + ***********************************************************************/ +#define F2(n, l0, r0, l1, r1, set_nextk, dec) \ + \ + and RT0, RMASK, l0, lsr#(24 - 2); \ + and RT1, RMASK, l0, lsr#(16 - 2); \ + and RT2, RMASK, l0, lsr#(8 - 2); \ + add RT1, #(s1 - s0); \ + \ + ldr RT0, [CTXs0, RT0]; \ + and RT3, RMASK, l0, lsl#2; \ + ldr RT1, [CTXs0, RT1]; \ + add RT3, #(s3 - s2); \ + ldr RT2, [CTXs2, RT2]; \ + add RT0, RT1; \ + ldr RT3, [CTXs2, RT3]; \ + \ + and RT1, RMASK, l1, lsr#(24 - 2); \ + eor RT0, RT2; \ + and RT2, RMASK, l1, lsr#(16 - 2); \ + add RT0, RT3; \ + add RT2, #(s1 - s0); \ + and RT3, RMASK, l1, lsr#(8 - 2); \ + eor r0, RT0; \ + \ + ldr RT1, [CTXs0, RT1]; \ + and RT0, RMASK, l1, lsl#2; \ + ldr RT2, [CTXs0, RT2]; \ + add RT0, #(s3 - s2); \ + ldr RT3, [CTXs2, RT3]; \ + add RT1, RT2; \ + ldr RT0, [CTXs2, RT0]; \ + \ + and RT2, RMASK, r0, lsr#(24 - 2); \ + eor RT1, RT3; \ + and RT3, RMASK, r0, lsr#(16 - 2); \ + add RT1, RT0; \ + add RT3, #(s1 - s0); \ + and RT0, RMASK, r0, lsr#(8 - 2); \ + eor r1, RT1; \ + \ + ldr RT2, [CTXs0, RT2]; \ + and RT1, RMASK, r0, lsl#2; \ + ldr RT3, [CTXs0, RT3]; \ + add RT1, #(s3 - s2); \ + ldr RT0, [CTXs2, RT0]; \ + add RT2, RT3; \ + ldr RT1, [CTXs2, RT1]; \ + \ + and RT3, RMASK, r1, lsr#(24 - 2); \ + eor RT2, RT0; \ + and RT0, RMASK, r1, lsr#(16 - 2); \ + add RT2, RT1; \ + add RT0, #(s1 - s0); \ + and RT1, RMASK, r1, lsr#(8 - 2); \ + eor l0, RT2; \ + \ + ldr RT3, [CTXs0, RT3]; \ + and RT2, RMASK, r1, lsl#2; \ + ldr RT0, [CTXs0, RT0]; \ + add RT2, #(s3 - s2); \ + ldr RT1, [CTXs2, RT1]; \ + eor l1, RKEYL; \ + ldr RT2, [CTXs2, RT2]; \ + \ + eor r0, RKEYR; \ + add RT3, RT0; \ + eor r1, RKEYR; \ + eor RT3, RT1; \ + eor l0, RKEYL; \ + add RT3, RT2; \ + set_nextk(RKEYL, (p - s2) + (4 * (n) + ((dec) * 4))); \ + eor l1, RT3; \ + set_nextk(RKEYR, (p - s2) + (4 * (n) + (!(dec) * 4))); + +#define load_n_add_roundkey_enc2(n) \ + load_roundkey_enc(n); \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; \ + eor RL1, RKEYL; \ + eor RR1, RKEYR; \ + load_roundkey_enc((n) + 2); + +#define next_key(reg, offs) \ + ldr reg, [CTXs2, #(offs)]; + +#define dummy(x, y) /* do nothing */ + +#define round_enc2(n, load_next_key) \ + F2((n) + 2, RL0, RR0, RL1, RR1, load_next_key, 0); + +#define load_n_add_roundkey_dec2(n) \ + load_roundkey_dec(n); \ + eor RL0, RKEYL; \ + eor RR0, RKEYR; \ + eor RL1, RKEYL; \ + eor RR1, RKEYR; \ + load_roundkey_dec((n) - 2); + +#define round_dec2(n, load_next_key) \ + F2((n) - 3, RL0, RR0, RL1, RR1, load_next_key, 1); + +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ + ldr l0, [rin, #(0)]; \ + ldr r0, [rin, #(4)]; \ + convert(l0, rtmp); \ + ldr l1, [rin, #(8)]; \ + convert(r0, rtmp); \ + ldr r1, [rin, #(12)]; \ + convert(l1, rtmp); \ + convert(r1, rtmp); + +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ + str l0, [rout, #(0)]; \ + convert(r1, rtmp); \ + str r0, [rout, #(4)]; \ + str l1, [rout, #(8)]; \ + str r1, [rout, #(12)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, 0, rtmp0); \ + ldr_unaligned_be(r0, rin, 4, rtmp0); \ + ldr_unaligned_be(l1, rin, 8, rtmp0); \ + ldr_unaligned_be(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ + 2:; + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ + 2:; + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, 0, rtmp0); \ + ldr_unaligned_host(r0, rin, 4, rtmp0); \ + ldr_unaligned_host(l1, rin, 8, rtmp0); \ + ldr_unaligned_host(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.type _gcry_blowfish_arm_enc_blk2,%function; + +_gcry_blowfish_arm_enc_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + push {RT0,%lr}; + + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + + load_n_add_roundkey_enc2(0); + round_enc2(2, next_key); + round_enc2(4, next_key); + round_enc2(6, next_key); + round_enc2(8, next_key); + round_enc2(10, next_key); + round_enc2(12, next_key); + round_enc2(14, next_key); + round_enc2(16, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + pop {RT0,%pc}; +.size _gcry_blowfish_arm_enc_blk2,.-_gcry_blowfish_arm_enc_blk2; + +.align 3 +.globl _gcry_blowfish_arm_cfb_dec; +.type _gcry_blowfish_arm_cfb_dec,%function; + +_gcry_blowfish_arm_cfb_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ + ldm %r3, {RL0, RR0}; + host_to_be(RL0, RT0); + host_to_be(RR0, RT0); + read_block(%r2, 0, RL1, RR1, RT0); + + /* Update IV, load src[1] and save to iv[0] */ + read_block_host(%r2, 8, %r5, %r6, RT0); + stm %lr, {%r5, %r6}; + + bl _gcry_blowfish_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r1: dst, %r0: %src */ + pop {%r0}; + + /* dst = src ^ result */ + read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_cfb_dec,.-_gcry_blowfish_arm_cfb_dec; + +.align 3 +.globl _gcry_blowfish_arm_ctr_enc; +.type _gcry_blowfish_arm_ctr_enc,%function; + +_gcry_blowfish_arm_ctr_enc: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit, big-endian) + */ + push {%r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load IV (big => host endian) */ + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT0); + + /* Construct IVs */ + adds RR1, RR0, #1; /* +1 */ + adc RL1, RL0, #0; + adds %r6, RR1, #1; /* +2 */ + adc %r5, RL1, #0; + + /* Store new IV (host => big-endian) */ + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT0); + + bl _gcry_blowfish_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r1: dst, %r0: %src */ + pop {%r0}; + + /* XOR key-stream with plaintext */ + read_block2_host(%r0, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r1, %r5, %r6, %r7, %r8, %r9, %r10); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_ctr_enc,.-_gcry_blowfish_arm_ctr_enc; + +.align 3 +.type _gcry_blowfish_arm_dec_blk2,%function; + +_gcry_blowfish_arm_dec_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + add CTXs2, CTXs0, #(s2 - s0); + mov RMASK, #(0xff << 2); /* byte mask */ + + load_n_add_roundkey_dec2(17); + round_dec2(15, next_key); + round_dec2(13, next_key); + round_dec2(11, next_key); + round_dec2(9, next_key); + round_dec2(7, next_key); + round_dec2(5, next_key); + round_dec2(3, next_key); + round_dec2(1, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + b .Ldec_cbc_tail; +.ltorg +.size _gcry_blowfish_arm_dec_blk2,.-_gcry_blowfish_arm_dec_blk2; + +.align 3 +.globl _gcry_blowfish_arm_cbc_dec; +.type _gcry_blowfish_arm_cbc_dec,%function; + +_gcry_blowfish_arm_cbc_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r2-%r11, %ip, %lr}; + + read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + + /* dec_blk2 is only used by cbc_dec, jump directly in/out instead + * of function call. */ + b _gcry_blowfish_arm_dec_blk2; +.Ldec_cbc_tail: + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: %src, %r1: dst, %r2: iv */ + pop {%r0, %r2}; + + /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r0, 0, %r7, %r8, %r5); + /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ + ldm %r2, {%r5, %r6}; + + /* out[1] ^= IV+1 */ + eor %r10, %r7; + eor %r9, %r8; + /* out[0] ^= IV */ + eor %r4, %r5; + eor %r3, %r6; + + /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r0, 8, %r7, %r8, %r5); + /* store IV+2 to iv[0] (aligned). */ + stm %r2, {%r7, %r8}; + + /* store result to dst[0-3]. Might be unaligned. */ + write_block2_host(%r1, %r4, %r3, %r10, %r9, %r5, %r6); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_blowfish_arm_cbc_dec,.-_gcry_blowfish_arm_cbc_dec; + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/comm/third_party/libgcrypt/cipher/blowfish.c b/comm/third_party/libgcrypt/cipher/blowfish.c new file mode 100644 index 0000000000..7b001306c7 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/blowfish.c @@ -0,0 +1,1142 @@ +/* blowfish.c - Blowfish encryption + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * For a description of the algorithm, see: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. Pages 336 ff. + */ + +/* Test values: + * key "abcdefghijklmnopqrstuvwxyz"; + * plain "BLOWFISH" + * cipher 32 4E D0 FE F4 13 A2 03 + * + */ + +#include +#include +#include +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + +#define BLOWFISH_BLOCKSIZE 8 +#define BLOWFISH_KEY_MIN_BITS 8 +#define BLOWFISH_KEY_MAX_BITS 576 + + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) +# if defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) +# define USE_ARM_ASM 1 +# endif +#endif + +typedef struct { + u32 s0[256]; + u32 s1[256]; + u32 s2[256]; + u32 s3[256]; + u32 p[16+2]; +} BLOWFISH_context; + +static gcry_err_code_t bf_setkey (void *c, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops); +static unsigned int encrypt_block (void *bc, byte *outbuf, const byte *inbuf); +static unsigned int decrypt_block (void *bc, byte *outbuf, const byte *inbuf); + + +/* precomputed S boxes */ +static const u32 ks0[256] = { + 0xD1310BA6,0x98DFB5AC,0x2FFD72DB,0xD01ADFB7,0xB8E1AFED,0x6A267E96, + 0xBA7C9045,0xF12C7F99,0x24A19947,0xB3916CF7,0x0801F2E2,0x858EFC16, + 0x636920D8,0x71574E69,0xA458FEA3,0xF4933D7E,0x0D95748F,0x728EB658, + 0x718BCD58,0x82154AEE,0x7B54A41D,0xC25A59B5,0x9C30D539,0x2AF26013, + 0xC5D1B023,0x286085F0,0xCA417918,0xB8DB38EF,0x8E79DCB0,0x603A180E, + 0x6C9E0E8B,0xB01E8A3E,0xD71577C1,0xBD314B27,0x78AF2FDA,0x55605C60, + 0xE65525F3,0xAA55AB94,0x57489862,0x63E81440,0x55CA396A,0x2AAB10B6, + 0xB4CC5C34,0x1141E8CE,0xA15486AF,0x7C72E993,0xB3EE1411,0x636FBC2A, + 0x2BA9C55D,0x741831F6,0xCE5C3E16,0x9B87931E,0xAFD6BA33,0x6C24CF5C, + 0x7A325381,0x28958677,0x3B8F4898,0x6B4BB9AF,0xC4BFE81B,0x66282193, + 0x61D809CC,0xFB21A991,0x487CAC60,0x5DEC8032,0xEF845D5D,0xE98575B1, + 0xDC262302,0xEB651B88,0x23893E81,0xD396ACC5,0x0F6D6FF3,0x83F44239, + 0x2E0B4482,0xA4842004,0x69C8F04A,0x9E1F9B5E,0x21C66842,0xF6E96C9A, + 0x670C9C61,0xABD388F0,0x6A51A0D2,0xD8542F68,0x960FA728,0xAB5133A3, + 0x6EEF0B6C,0x137A3BE4,0xBA3BF050,0x7EFB2A98,0xA1F1651D,0x39AF0176, + 0x66CA593E,0x82430E88,0x8CEE8619,0x456F9FB4,0x7D84A5C3,0x3B8B5EBE, + 0xE06F75D8,0x85C12073,0x401A449F,0x56C16AA6,0x4ED3AA62,0x363F7706, + 0x1BFEDF72,0x429B023D,0x37D0D724,0xD00A1248,0xDB0FEAD3,0x49F1C09B, + 0x075372C9,0x80991B7B,0x25D479D8,0xF6E8DEF7,0xE3FE501A,0xB6794C3B, + 0x976CE0BD,0x04C006BA,0xC1A94FB6,0x409F60C4,0x5E5C9EC2,0x196A2463, + 0x68FB6FAF,0x3E6C53B5,0x1339B2EB,0x3B52EC6F,0x6DFC511F,0x9B30952C, + 0xCC814544,0xAF5EBD09,0xBEE3D004,0xDE334AFD,0x660F2807,0x192E4BB3, + 0xC0CBA857,0x45C8740F,0xD20B5F39,0xB9D3FBDB,0x5579C0BD,0x1A60320A, + 0xD6A100C6,0x402C7279,0x679F25FE,0xFB1FA3CC,0x8EA5E9F8,0xDB3222F8, + 0x3C7516DF,0xFD616B15,0x2F501EC8,0xAD0552AB,0x323DB5FA,0xFD238760, + 0x53317B48,0x3E00DF82,0x9E5C57BB,0xCA6F8CA0,0x1A87562E,0xDF1769DB, + 0xD542A8F6,0x287EFFC3,0xAC6732C6,0x8C4F5573,0x695B27B0,0xBBCA58C8, + 0xE1FFA35D,0xB8F011A0,0x10FA3D98,0xFD2183B8,0x4AFCB56C,0x2DD1D35B, + 0x9A53E479,0xB6F84565,0xD28E49BC,0x4BFB9790,0xE1DDF2DA,0xA4CB7E33, + 0x62FB1341,0xCEE4C6E8,0xEF20CADA,0x36774C01,0xD07E9EFE,0x2BF11FB4, + 0x95DBDA4D,0xAE909198,0xEAAD8E71,0x6B93D5A0,0xD08ED1D0,0xAFC725E0, + 0x8E3C5B2F,0x8E7594B7,0x8FF6E2FB,0xF2122B64,0x8888B812,0x900DF01C, + 0x4FAD5EA0,0x688FC31C,0xD1CFF191,0xB3A8C1AD,0x2F2F2218,0xBE0E1777, + 0xEA752DFE,0x8B021FA1,0xE5A0CC0F,0xB56F74E8,0x18ACF3D6,0xCE89E299, + 0xB4A84FE0,0xFD13E0B7,0x7CC43B81,0xD2ADA8D9,0x165FA266,0x80957705, + 0x93CC7314,0x211A1477,0xE6AD2065,0x77B5FA86,0xC75442F5,0xFB9D35CF, + 0xEBCDAF0C,0x7B3E89A0,0xD6411BD3,0xAE1E7E49,0x00250E2D,0x2071B35E, + 0x226800BB,0x57B8E0AF,0x2464369B,0xF009B91E,0x5563911D,0x59DFA6AA, + 0x78C14389,0xD95A537F,0x207D5BA2,0x02E5B9C5,0x83260376,0x6295CFA9, + 0x11C81968,0x4E734A41,0xB3472DCA,0x7B14A94A,0x1B510052,0x9A532915, + 0xD60F573F,0xBC9BC6E4,0x2B60A476,0x81E67400,0x08BA6FB5,0x571BE91F, + 0xF296EC6B,0x2A0DD915,0xB6636521,0xE7B9F9B6,0xFF34052E,0xC5855664, + 0x53B02D5D,0xA99F8FA1,0x08BA4799,0x6E85076A }; + +static const u32 ks1[256] = { + 0x4B7A70E9,0xB5B32944,0xDB75092E,0xC4192623,0xAD6EA6B0,0x49A7DF7D, + 0x9CEE60B8,0x8FEDB266,0xECAA8C71,0x699A17FF,0x5664526C,0xC2B19EE1, + 0x193602A5,0x75094C29,0xA0591340,0xE4183A3E,0x3F54989A,0x5B429D65, + 0x6B8FE4D6,0x99F73FD6,0xA1D29C07,0xEFE830F5,0x4D2D38E6,0xF0255DC1, + 0x4CDD2086,0x8470EB26,0x6382E9C6,0x021ECC5E,0x09686B3F,0x3EBAEFC9, + 0x3C971814,0x6B6A70A1,0x687F3584,0x52A0E286,0xB79C5305,0xAA500737, + 0x3E07841C,0x7FDEAE5C,0x8E7D44EC,0x5716F2B8,0xB03ADA37,0xF0500C0D, + 0xF01C1F04,0x0200B3FF,0xAE0CF51A,0x3CB574B2,0x25837A58,0xDC0921BD, + 0xD19113F9,0x7CA92FF6,0x94324773,0x22F54701,0x3AE5E581,0x37C2DADC, + 0xC8B57634,0x9AF3DDA7,0xA9446146,0x0FD0030E,0xECC8C73E,0xA4751E41, + 0xE238CD99,0x3BEA0E2F,0x3280BBA1,0x183EB331,0x4E548B38,0x4F6DB908, + 0x6F420D03,0xF60A04BF,0x2CB81290,0x24977C79,0x5679B072,0xBCAF89AF, + 0xDE9A771F,0xD9930810,0xB38BAE12,0xDCCF3F2E,0x5512721F,0x2E6B7124, + 0x501ADDE6,0x9F84CD87,0x7A584718,0x7408DA17,0xBC9F9ABC,0xE94B7D8C, + 0xEC7AEC3A,0xDB851DFA,0x63094366,0xC464C3D2,0xEF1C1847,0x3215D908, + 0xDD433B37,0x24C2BA16,0x12A14D43,0x2A65C451,0x50940002,0x133AE4DD, + 0x71DFF89E,0x10314E55,0x81AC77D6,0x5F11199B,0x043556F1,0xD7A3C76B, + 0x3C11183B,0x5924A509,0xF28FE6ED,0x97F1FBFA,0x9EBABF2C,0x1E153C6E, + 0x86E34570,0xEAE96FB1,0x860E5E0A,0x5A3E2AB3,0x771FE71C,0x4E3D06FA, + 0x2965DCB9,0x99E71D0F,0x803E89D6,0x5266C825,0x2E4CC978,0x9C10B36A, + 0xC6150EBA,0x94E2EA78,0xA5FC3C53,0x1E0A2DF4,0xF2F74EA7,0x361D2B3D, + 0x1939260F,0x19C27960,0x5223A708,0xF71312B6,0xEBADFE6E,0xEAC31F66, + 0xE3BC4595,0xA67BC883,0xB17F37D1,0x018CFF28,0xC332DDEF,0xBE6C5AA5, + 0x65582185,0x68AB9802,0xEECEA50F,0xDB2F953B,0x2AEF7DAD,0x5B6E2F84, + 0x1521B628,0x29076170,0xECDD4775,0x619F1510,0x13CCA830,0xEB61BD96, + 0x0334FE1E,0xAA0363CF,0xB5735C90,0x4C70A239,0xD59E9E0B,0xCBAADE14, + 0xEECC86BC,0x60622CA7,0x9CAB5CAB,0xB2F3846E,0x648B1EAF,0x19BDF0CA, + 0xA02369B9,0x655ABB50,0x40685A32,0x3C2AB4B3,0x319EE9D5,0xC021B8F7, + 0x9B540B19,0x875FA099,0x95F7997E,0x623D7DA8,0xF837889A,0x97E32D77, + 0x11ED935F,0x16681281,0x0E358829,0xC7E61FD6,0x96DEDFA1,0x7858BA99, + 0x57F584A5,0x1B227263,0x9B83C3FF,0x1AC24696,0xCDB30AEB,0x532E3054, + 0x8FD948E4,0x6DBC3128,0x58EBF2EF,0x34C6FFEA,0xFE28ED61,0xEE7C3C73, + 0x5D4A14D9,0xE864B7E3,0x42105D14,0x203E13E0,0x45EEE2B6,0xA3AAABEA, + 0xDB6C4F15,0xFACB4FD0,0xC742F442,0xEF6ABBB5,0x654F3B1D,0x41CD2105, + 0xD81E799E,0x86854DC7,0xE44B476A,0x3D816250,0xCF62A1F2,0x5B8D2646, + 0xFC8883A0,0xC1C7B6A3,0x7F1524C3,0x69CB7492,0x47848A0B,0x5692B285, + 0x095BBF00,0xAD19489D,0x1462B174,0x23820E00,0x58428D2A,0x0C55F5EA, + 0x1DADF43E,0x233F7061,0x3372F092,0x8D937E41,0xD65FECF1,0x6C223BDB, + 0x7CDE3759,0xCBEE7460,0x4085F2A7,0xCE77326E,0xA6078084,0x19F8509E, + 0xE8EFD855,0x61D99735,0xA969A7AA,0xC50C06C2,0x5A04ABFC,0x800BCADC, + 0x9E447A2E,0xC3453484,0xFDD56705,0x0E1E9EC9,0xDB73DBD3,0x105588CD, + 0x675FDA79,0xE3674340,0xC5C43465,0x713E38D8,0x3D28F89E,0xF16DFF20, + 0x153E21E7,0x8FB03D4A,0xE6E39F2B,0xDB83ADF7 }; + +static const u32 ks2[256] = { + 0xE93D5A68,0x948140F7,0xF64C261C,0x94692934,0x411520F7,0x7602D4F7, + 0xBCF46B2E,0xD4A20068,0xD4082471,0x3320F46A,0x43B7D4B7,0x500061AF, + 0x1E39F62E,0x97244546,0x14214F74,0xBF8B8840,0x4D95FC1D,0x96B591AF, + 0x70F4DDD3,0x66A02F45,0xBFBC09EC,0x03BD9785,0x7FAC6DD0,0x31CB8504, + 0x96EB27B3,0x55FD3941,0xDA2547E6,0xABCA0A9A,0x28507825,0x530429F4, + 0x0A2C86DA,0xE9B66DFB,0x68DC1462,0xD7486900,0x680EC0A4,0x27A18DEE, + 0x4F3FFEA2,0xE887AD8C,0xB58CE006,0x7AF4D6B6,0xAACE1E7C,0xD3375FEC, + 0xCE78A399,0x406B2A42,0x20FE9E35,0xD9F385B9,0xEE39D7AB,0x3B124E8B, + 0x1DC9FAF7,0x4B6D1856,0x26A36631,0xEAE397B2,0x3A6EFA74,0xDD5B4332, + 0x6841E7F7,0xCA7820FB,0xFB0AF54E,0xD8FEB397,0x454056AC,0xBA489527, + 0x55533A3A,0x20838D87,0xFE6BA9B7,0xD096954B,0x55A867BC,0xA1159A58, + 0xCCA92963,0x99E1DB33,0xA62A4A56,0x3F3125F9,0x5EF47E1C,0x9029317C, + 0xFDF8E802,0x04272F70,0x80BB155C,0x05282CE3,0x95C11548,0xE4C66D22, + 0x48C1133F,0xC70F86DC,0x07F9C9EE,0x41041F0F,0x404779A4,0x5D886E17, + 0x325F51EB,0xD59BC0D1,0xF2BCC18F,0x41113564,0x257B7834,0x602A9C60, + 0xDFF8E8A3,0x1F636C1B,0x0E12B4C2,0x02E1329E,0xAF664FD1,0xCAD18115, + 0x6B2395E0,0x333E92E1,0x3B240B62,0xEEBEB922,0x85B2A20E,0xE6BA0D99, + 0xDE720C8C,0x2DA2F728,0xD0127845,0x95B794FD,0x647D0862,0xE7CCF5F0, + 0x5449A36F,0x877D48FA,0xC39DFD27,0xF33E8D1E,0x0A476341,0x992EFF74, + 0x3A6F6EAB,0xF4F8FD37,0xA812DC60,0xA1EBDDF8,0x991BE14C,0xDB6E6B0D, + 0xC67B5510,0x6D672C37,0x2765D43B,0xDCD0E804,0xF1290DC7,0xCC00FFA3, + 0xB5390F92,0x690FED0B,0x667B9FFB,0xCEDB7D9C,0xA091CF0B,0xD9155EA3, + 0xBB132F88,0x515BAD24,0x7B9479BF,0x763BD6EB,0x37392EB3,0xCC115979, + 0x8026E297,0xF42E312D,0x6842ADA7,0xC66A2B3B,0x12754CCC,0x782EF11C, + 0x6A124237,0xB79251E7,0x06A1BBE6,0x4BFB6350,0x1A6B1018,0x11CAEDFA, + 0x3D25BDD8,0xE2E1C3C9,0x44421659,0x0A121386,0xD90CEC6E,0xD5ABEA2A, + 0x64AF674E,0xDA86A85F,0xBEBFE988,0x64E4C3FE,0x9DBC8057,0xF0F7C086, + 0x60787BF8,0x6003604D,0xD1FD8346,0xF6381FB0,0x7745AE04,0xD736FCCC, + 0x83426B33,0xF01EAB71,0xB0804187,0x3C005E5F,0x77A057BE,0xBDE8AE24, + 0x55464299,0xBF582E61,0x4E58F48F,0xF2DDFDA2,0xF474EF38,0x8789BDC2, + 0x5366F9C3,0xC8B38E74,0xB475F255,0x46FCD9B9,0x7AEB2661,0x8B1DDF84, + 0x846A0E79,0x915F95E2,0x466E598E,0x20B45770,0x8CD55591,0xC902DE4C, + 0xB90BACE1,0xBB8205D0,0x11A86248,0x7574A99E,0xB77F19B6,0xE0A9DC09, + 0x662D09A1,0xC4324633,0xE85A1F02,0x09F0BE8C,0x4A99A025,0x1D6EFE10, + 0x1AB93D1D,0x0BA5A4DF,0xA186F20F,0x2868F169,0xDCB7DA83,0x573906FE, + 0xA1E2CE9B,0x4FCD7F52,0x50115E01,0xA70683FA,0xA002B5C4,0x0DE6D027, + 0x9AF88C27,0x773F8641,0xC3604C06,0x61A806B5,0xF0177A28,0xC0F586E0, + 0x006058AA,0x30DC7D62,0x11E69ED7,0x2338EA63,0x53C2DD94,0xC2C21634, + 0xBBCBEE56,0x90BCB6DE,0xEBFC7DA1,0xCE591D76,0x6F05E409,0x4B7C0188, + 0x39720A3D,0x7C927C24,0x86E3725F,0x724D9DB9,0x1AC15BB4,0xD39EB8FC, + 0xED545578,0x08FCA5B5,0xD83D7CD3,0x4DAD0FC4,0x1E50EF5E,0xB161E6F8, + 0xA28514D9,0x6C51133C,0x6FD5C7E7,0x56E14EC4,0x362ABFCE,0xDDC6C837, + 0xD79A3234,0x92638212,0x670EFA8E,0x406000E0 }; + +static const u32 ks3[256] = { + 0x3A39CE37,0xD3FAF5CF,0xABC27737,0x5AC52D1B,0x5CB0679E,0x4FA33742, + 0xD3822740,0x99BC9BBE,0xD5118E9D,0xBF0F7315,0xD62D1C7E,0xC700C47B, + 0xB78C1B6B,0x21A19045,0xB26EB1BE,0x6A366EB4,0x5748AB2F,0xBC946E79, + 0xC6A376D2,0x6549C2C8,0x530FF8EE,0x468DDE7D,0xD5730A1D,0x4CD04DC6, + 0x2939BBDB,0xA9BA4650,0xAC9526E8,0xBE5EE304,0xA1FAD5F0,0x6A2D519A, + 0x63EF8CE2,0x9A86EE22,0xC089C2B8,0x43242EF6,0xA51E03AA,0x9CF2D0A4, + 0x83C061BA,0x9BE96A4D,0x8FE51550,0xBA645BD6,0x2826A2F9,0xA73A3AE1, + 0x4BA99586,0xEF5562E9,0xC72FEFD3,0xF752F7DA,0x3F046F69,0x77FA0A59, + 0x80E4A915,0x87B08601,0x9B09E6AD,0x3B3EE593,0xE990FD5A,0x9E34D797, + 0x2CF0B7D9,0x022B8B51,0x96D5AC3A,0x017DA67D,0xD1CF3ED6,0x7C7D2D28, + 0x1F9F25CF,0xADF2B89B,0x5AD6B472,0x5A88F54C,0xE029AC71,0xE019A5E6, + 0x47B0ACFD,0xED93FA9B,0xE8D3C48D,0x283B57CC,0xF8D56629,0x79132E28, + 0x785F0191,0xED756055,0xF7960E44,0xE3D35E8C,0x15056DD4,0x88F46DBA, + 0x03A16125,0x0564F0BD,0xC3EB9E15,0x3C9057A2,0x97271AEC,0xA93A072A, + 0x1B3F6D9B,0x1E6321F5,0xF59C66FB,0x26DCF319,0x7533D928,0xB155FDF5, + 0x03563482,0x8ABA3CBB,0x28517711,0xC20AD9F8,0xABCC5167,0xCCAD925F, + 0x4DE81751,0x3830DC8E,0x379D5862,0x9320F991,0xEA7A90C2,0xFB3E7BCE, + 0x5121CE64,0x774FBE32,0xA8B6E37E,0xC3293D46,0x48DE5369,0x6413E680, + 0xA2AE0810,0xDD6DB224,0x69852DFD,0x09072166,0xB39A460A,0x6445C0DD, + 0x586CDECF,0x1C20C8AE,0x5BBEF7DD,0x1B588D40,0xCCD2017F,0x6BB4E3BB, + 0xDDA26A7E,0x3A59FF45,0x3E350A44,0xBCB4CDD5,0x72EACEA8,0xFA6484BB, + 0x8D6612AE,0xBF3C6F47,0xD29BE463,0x542F5D9E,0xAEC2771B,0xF64E6370, + 0x740E0D8D,0xE75B1357,0xF8721671,0xAF537D5D,0x4040CB08,0x4EB4E2CC, + 0x34D2466A,0x0115AF84,0xE1B00428,0x95983A1D,0x06B89FB4,0xCE6EA048, + 0x6F3F3B82,0x3520AB82,0x011A1D4B,0x277227F8,0x611560B1,0xE7933FDC, + 0xBB3A792B,0x344525BD,0xA08839E1,0x51CE794B,0x2F32C9B7,0xA01FBAC9, + 0xE01CC87E,0xBCC7D1F6,0xCF0111C3,0xA1E8AAC7,0x1A908749,0xD44FBD9A, + 0xD0DADECB,0xD50ADA38,0x0339C32A,0xC6913667,0x8DF9317C,0xE0B12B4F, + 0xF79E59B7,0x43F5BB3A,0xF2D519FF,0x27D9459C,0xBF97222C,0x15E6FC2A, + 0x0F91FC71,0x9B941525,0xFAE59361,0xCEB69CEB,0xC2A86459,0x12BAA8D1, + 0xB6C1075E,0xE3056A0C,0x10D25065,0xCB03A442,0xE0EC6E0E,0x1698DB3B, + 0x4C98A0BE,0x3278E964,0x9F1F9532,0xE0D392DF,0xD3A0342B,0x8971F21E, + 0x1B0A7441,0x4BA3348C,0xC5BE7120,0xC37632D8,0xDF359F8D,0x9B992F2E, + 0xE60B6F47,0x0FE3F11D,0xE54CDA54,0x1EDAD891,0xCE6279CF,0xCD3E7E6F, + 0x1618B166,0xFD2C1D05,0x848FD2C5,0xF6FB2299,0xF523F357,0xA6327623, + 0x93A83531,0x56CCCD02,0xACF08162,0x5A75EBB5,0x6E163697,0x88D273CC, + 0xDE966292,0x81B949D0,0x4C50901B,0x71C65614,0xE6C6C7BD,0x327A140A, + 0x45E1D006,0xC3F27B9A,0xC9AA53FD,0x62A80F00,0xBB25BFE2,0x35BDD2F6, + 0x71126905,0xB2040222,0xB6CBCF7C,0xCD769C2B,0x53113EC0,0x1640E3D3, + 0x38ABBD60,0x2547ADF0,0xBA38209C,0xF746CE76,0x77AFA1C5,0x20756060, + 0x85CBFE4E,0x8AE88DD8,0x7AAAF9B0,0x4CF9AA7E,0x1948C25C,0x02FB8A8C, + 0x01C36AE4,0xD6EBE1F9,0x90D4F869,0xA65CDEA0,0x3F09252D,0xC208E69F, + 0xB74E6132,0xCE77E25B,0x578FDFE3,0x3AC372E6 }; + +static const u32 ps[16+2] = { + 0x243F6A88,0x85A308D3,0x13198A2E,0x03707344,0xA4093822,0x299F31D0, + 0x082EFA98,0xEC4E6C89,0x452821E6,0x38D01377,0xBE5466CF,0x34E90C6C, + 0xC0AC29B7,0xC97C50DD,0x3F84D5B5,0xB5470917,0x9216D5D9,0x8979FB1B }; + + +#ifdef USE_AMD64_ASM + +/* Assembly implementations of Blowfish. */ +extern void _gcry_blowfish_amd64_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, + u32 *ret_xr); + +extern void _gcry_blowfish_amd64_encrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +extern void _gcry_blowfish_amd64_decrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +/* These assembly implementations process four blocks in parallel. */ +extern void _gcry_blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ + _gcry_blowfish_amd64_do_encrypt (bc, ret_xl, ret_xr); +} + +static void +do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_amd64_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_amd64_decrypt_block (context, outbuf, inbuf); +} + +static inline void +blowfish_amd64_ctr_enc(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *ctr) +{ + _gcry_blowfish_amd64_ctr_enc(ctx, out, in, ctr); +} + +static inline void +blowfish_amd64_cbc_dec(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *iv) +{ + _gcry_blowfish_amd64_cbc_dec(ctx, out, in, iv); +} + +static inline void +blowfish_amd64_cfb_dec(BLOWFISH_context *ctx, byte *out, const byte *in, + byte *iv) +{ + _gcry_blowfish_amd64_cfb_dec(ctx, out, in, iv); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +#elif defined(USE_ARM_ASM) + +/* Assembly implementations of Blowfish. */ +extern void _gcry_blowfish_arm_do_encrypt(BLOWFISH_context *c, u32 *ret_xl, + u32 *ret_xr); + +extern void _gcry_blowfish_arm_encrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +extern void _gcry_blowfish_arm_decrypt_block(BLOWFISH_context *c, byte *out, + const byte *in); + +/* These assembly implementations process two blocks in parallel. */ +extern void _gcry_blowfish_arm_ctr_enc(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_blowfish_arm_cbc_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_blowfish_arm_cfb_dec(BLOWFISH_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ + _gcry_blowfish_arm_do_encrypt (bc, ret_xl, ret_xr); +} + +static void +do_encrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_arm_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (BLOWFISH_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_blowfish_arm_decrypt_block (context, outbuf, inbuf); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +#else /*USE_ARM_ASM*/ + + +#define F(x) ((( s0[(x)>>24] + s1[((x)>>16)&0xff]) \ + ^ s2[((x)>>8)&0xff]) + s3[(x)&0xff] ) +#define R(l,r,i) do { l ^= p[i]; r ^= F(l); } while(0) +#define R3(l,r,i) do { R(l##0,r##0,i);R(l##1,r##1,i);R(l##2,r##2,i);} while(0) + + +static void +do_encrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ + u32 xl, xr, *s0, *s1, *s2, *s3, *p; + + xl = *ret_xl; + xr = *ret_xr; + p = bc->p; + s0 = bc->s0; + s1 = bc->s1; + s2 = bc->s2; + s3 = bc->s3; + + R( xl, xr, 0); + R( xr, xl, 1); + R( xl, xr, 2); + R( xr, xl, 3); + R( xl, xr, 4); + R( xr, xl, 5); + R( xl, xr, 6); + R( xr, xl, 7); + R( xl, xr, 8); + R( xr, xl, 9); + R( xl, xr, 10); + R( xr, xl, 11); + R( xl, xr, 12); + R( xr, xl, 13); + R( xl, xr, 14); + R( xr, xl, 15); + + xl ^= p[16]; + xr ^= p[16+1]; + + *ret_xl = xr; + *ret_xr = xl; +} + + +static void +do_encrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src ) +{ + u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p; + + xl0 = buf_get_be32(src + 0); + xr0 = buf_get_be32(src + 4); + xl1 = buf_get_be32(src + 8); + xr1 = buf_get_be32(src + 12); + xl2 = buf_get_be32(src + 16); + xr2 = buf_get_be32(src + 20); + p = bc->p; + s0 = bc->s0; + s1 = bc->s1; + s2 = bc->s2; + s3 = bc->s3; + + R3( xl, xr, 0); + R3( xr, xl, 1); + R3( xl, xr, 2); + R3( xr, xl, 3); + R3( xl, xr, 4); + R3( xr, xl, 5); + R3( xl, xr, 6); + R3( xr, xl, 7); + R3( xl, xr, 8); + R3( xr, xl, 9); + R3( xl, xr, 10); + R3( xr, xl, 11); + R3( xl, xr, 12); + R3( xr, xl, 13); + R3( xl, xr, 14); + R3( xr, xl, 15); + + xl0 ^= p[16]; + xr0 ^= p[16+1]; + xl1 ^= p[16]; + xr1 ^= p[16+1]; + xl2 ^= p[16]; + xr2 ^= p[16+1]; + + buf_put_be32(dst + 0, xr0); + buf_put_be32(dst + 4, xl0); + buf_put_be32(dst + 8, xr1); + buf_put_be32(dst + 12, xl1); + buf_put_be32(dst + 16, xr2); + buf_put_be32(dst + 20, xl2); +} + + +static void +decrypt ( BLOWFISH_context *bc, u32 *ret_xl, u32 *ret_xr ) +{ + u32 xl, xr, *s0, *s1, *s2, *s3, *p; + + xl = *ret_xl; + xr = *ret_xr; + p = bc->p; + s0 = bc->s0; + s1 = bc->s1; + s2 = bc->s2; + s3 = bc->s3; + + R( xl, xr, 17); + R( xr, xl, 16); + R( xl, xr, 15); + R( xr, xl, 14); + R( xl, xr, 13); + R( xr, xl, 12); + R( xl, xr, 11); + R( xr, xl, 10); + R( xl, xr, 9); + R( xr, xl, 8); + R( xl, xr, 7); + R( xr, xl, 6); + R( xl, xr, 5); + R( xr, xl, 4); + R( xl, xr, 3); + R( xr, xl, 2); + + xl ^= p[1]; + xr ^= p[0]; + + *ret_xl = xr; + *ret_xr = xl; +} + + +static void +do_decrypt_3 ( BLOWFISH_context *bc, byte *dst, const byte *src ) +{ + u32 xl0, xr0, xl1, xr1, xl2, xr2, *s0, *s1, *s2, *s3, *p; + + xl0 = buf_get_be32(src + 0); + xr0 = buf_get_be32(src + 4); + xl1 = buf_get_be32(src + 8); + xr1 = buf_get_be32(src + 12); + xl2 = buf_get_be32(src + 16); + xr2 = buf_get_be32(src + 20); + p = bc->p; + s0 = bc->s0; + s1 = bc->s1; + s2 = bc->s2; + s3 = bc->s3; + + R3( xl, xr, 17); + R3( xr, xl, 16); + R3( xl, xr, 15); + R3( xr, xl, 14); + R3( xl, xr, 13); + R3( xr, xl, 12); + R3( xl, xr, 11); + R3( xr, xl, 10); + R3( xl, xr, 9); + R3( xr, xl, 8); + R3( xl, xr, 7); + R3( xr, xl, 6); + R3( xl, xr, 5); + R3( xr, xl, 4); + R3( xl, xr, 3); + R3( xr, xl, 2); + + xl0 ^= p[1]; + xr0 ^= p[0]; + xl1 ^= p[1]; + xr1 ^= p[0]; + xl2 ^= p[1]; + xr2 ^= p[0]; + + buf_put_be32(dst + 0, xr0); + buf_put_be32(dst + 4, xl0); + buf_put_be32(dst + 8, xr1); + buf_put_be32(dst + 12, xl1); + buf_put_be32(dst + 16, xr2); + buf_put_be32(dst + 20, xl2); +} + +#undef F +#undef R +#undef R3 + +static void +do_encrypt_block ( BLOWFISH_context *bc, byte *outbuf, const byte *inbuf ) +{ + u32 d1, d2; + + d1 = buf_get_be32(inbuf); + d2 = buf_get_be32(inbuf + 4); + do_encrypt( bc, &d1, &d2 ); + buf_put_be32(outbuf, d1); + buf_put_be32(outbuf + 4, d2); +} + +static unsigned int +encrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *bc = (BLOWFISH_context *) context; + do_encrypt_block (bc, outbuf, inbuf); + return /*burn_stack*/ (64); +} + + +static void +do_decrypt_block (BLOWFISH_context *bc, byte *outbuf, const byte *inbuf) +{ + u32 d1, d2; + + d1 = buf_get_be32(inbuf); + d2 = buf_get_be32(inbuf + 4); + decrypt( bc, &d1, &d2 ); + buf_put_be32(outbuf, d1); + buf_put_be32(outbuf + 4, d2); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + BLOWFISH_context *bc = (BLOWFISH_context *) context; + do_decrypt_block (bc, outbuf, inbuf); + return /*burn_stack*/ (64); +} + +#endif /*!USE_AMD64_ASM&&!USE_ARM_ASM*/ + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size BLOWFISH_BLOCKSIZE. */ +static void +_gcry_blowfish_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + BLOWFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3]; + int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 5 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + blowfish_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 4; + outbuf += 4 * BLOWFISH_BLOCKSIZE; + inbuf += 4 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_arm_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) + for ( ;nblocks >= 3; nblocks -= 3) + { + /* Prepare the counter blocks. */ + cipher_block_cpy (tmpbuf + 0, ctr, BLOWFISH_BLOCKSIZE); + cipher_block_cpy (tmpbuf + 8, ctr, BLOWFISH_BLOCKSIZE); + cipher_block_cpy (tmpbuf + 16, ctr, BLOWFISH_BLOCKSIZE); + cipher_block_add (tmpbuf + 8, 1, BLOWFISH_BLOCKSIZE); + cipher_block_add (tmpbuf + 16, 2, BLOWFISH_BLOCKSIZE); + cipher_block_add (ctr, 3, BLOWFISH_BLOCKSIZE); + /* Encrypt the counter. */ + do_encrypt_3(ctx, tmpbuf, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE * 3); + outbuf += BLOWFISH_BLOCKSIZE * 3; + inbuf += BLOWFISH_BLOCKSIZE * 3; + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + do_encrypt_block(ctx, tmpbuf, ctr); + /* XOR the input with the encrypted counter and store in output. */ + cipher_block_xor(outbuf, tmpbuf, inbuf, BLOWFISH_BLOCKSIZE); + outbuf += BLOWFISH_BLOCKSIZE; + inbuf += BLOWFISH_BLOCKSIZE; + /* Increment the counter. */ + cipher_block_add (ctr, 1, BLOWFISH_BLOCKSIZE); + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_blowfish_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + BLOWFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[BLOWFISH_BLOCKSIZE * 3]; + int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 5 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + blowfish_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * BLOWFISH_BLOCKSIZE; + inbuf += 4 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_arm_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) + for ( ;nblocks >= 3; nblocks -= 3) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + do_decrypt_3 (ctx, savebuf, inbuf); + + cipher_block_xor_1 (savebuf + 0, iv, BLOWFISH_BLOCKSIZE); + cipher_block_xor_1 (savebuf + 8, inbuf, BLOWFISH_BLOCKSIZE * 2); + cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE); + buf_cpy (outbuf, savebuf, BLOWFISH_BLOCKSIZE * 3); + inbuf += BLOWFISH_BLOCKSIZE * 3; + outbuf += BLOWFISH_BLOCKSIZE * 3; + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + do_decrypt_block (ctx, savebuf, inbuf); + + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, BLOWFISH_BLOCKSIZE); + inbuf += BLOWFISH_BLOCKSIZE; + outbuf += BLOWFISH_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_blowfish_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + BLOWFISH_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[BLOWFISH_BLOCKSIZE * 3]; + int burn_stack_depth = (64) + 4 * BLOWFISH_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 5 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + blowfish_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * BLOWFISH_BLOCKSIZE; + inbuf += 4 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_blowfish_arm_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * BLOWFISH_BLOCKSIZE; + inbuf += 2 * BLOWFISH_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) + for ( ;nblocks >= 3; nblocks -= 3 ) + { + cipher_block_cpy (tmpbuf + 0, iv, BLOWFISH_BLOCKSIZE); + cipher_block_cpy (tmpbuf + 8, inbuf + 0, BLOWFISH_BLOCKSIZE * 2); + cipher_block_cpy (iv, inbuf + 16, BLOWFISH_BLOCKSIZE); + do_encrypt_3 (ctx, tmpbuf, tmpbuf); + buf_xor (outbuf, inbuf, tmpbuf, BLOWFISH_BLOCKSIZE * 3); + outbuf += BLOWFISH_BLOCKSIZE * 3; + inbuf += BLOWFISH_BLOCKSIZE * 3; + } +#endif + + for ( ;nblocks; nblocks-- ) + { + do_encrypt_block(ctx, iv, iv); + cipher_block_xor_n_copy(outbuf, iv, inbuf, BLOWFISH_BLOCKSIZE); + outbuf += BLOWFISH_BLOCKSIZE; + inbuf += BLOWFISH_BLOCKSIZE; + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Run the self-tests for BLOWFISH-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 4+1; + const int blocksize = BLOWFISH_BLOCKSIZE; + const int context_size = sizeof(BLOWFISH_context); + + return _gcry_selftest_helper_ctr("BLOWFISH", &bf_setkey, + &encrypt_block, nblocks, blocksize, context_size); +} + + +/* Run the self-tests for BLOWFISH-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 4+2; + const int blocksize = BLOWFISH_BLOCKSIZE; + const int context_size = sizeof(BLOWFISH_context); + + return _gcry_selftest_helper_cbc("BLOWFISH", &bf_setkey, + &encrypt_block, nblocks, blocksize, context_size); +} + + +/* Run the self-tests for BLOWFISH-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 4+2; + const int blocksize = BLOWFISH_BLOCKSIZE; + const int context_size = sizeof(BLOWFISH_context); + + return _gcry_selftest_helper_cfb("BLOWFISH", &bf_setkey, + &encrypt_block, nblocks, blocksize, context_size); +} + + +static const char* +selftest(void) +{ + BLOWFISH_context c; + cipher_bulk_ops_t bulk_ops; + byte plain[] = "BLOWFISH"; + byte buffer[8]; + static const byte plain3[] = + { 0xFE, 0xDC, 0xBA, 0x98, 0x76, 0x54, 0x32, 0x10 }; + static const byte key3[] = + { 0x41, 0x79, 0x6E, 0xA0, 0x52, 0x61, 0x6E, 0xE4 }; + static const byte cipher3[] = + { 0xE1, 0x13, 0xF4, 0x10, 0x2C, 0xFC, 0xCE, 0x43 }; + const char *r; + + bf_setkey( (void *) &c, + (const unsigned char*)"abcdefghijklmnopqrstuvwxyz", 26, + &bulk_ops ); + encrypt_block( (void *) &c, buffer, plain ); + if( memcmp( buffer, "\x32\x4E\xD0\xFE\xF4\x13\xA2\x03", 8 ) ) + return "Blowfish selftest failed (1)."; + decrypt_block( (void *) &c, buffer, buffer ); + if( memcmp( buffer, plain, 8 ) ) + return "Blowfish selftest failed (2)."; + + bf_setkey( (void *) &c, key3, 8, &bulk_ops ); + encrypt_block( (void *) &c, buffer, plain3 ); + if( memcmp( buffer, cipher3, 8 ) ) + return "Blowfish selftest failed (3)."; + decrypt_block( (void *) &c, buffer, buffer ); + if( memcmp( buffer, plain3, 8 ) ) + return "Blowfish selftest failed (4)."; + + if ( (r = selftest_cbc ()) ) + return r; + + if ( (r = selftest_cfb ()) ) + return r; + + if ( (r = selftest_ctr ()) ) + return r; + + return NULL; +} + + +struct hashset_elem { + u32 val; + short nidx; + char used; +}; + +static inline byte +val_to_hidx(u32 val) +{ + /* bf sboxes are quite random already. */ + return (val >> 24) ^ (val >> 16) ^ (val >> 8) ^ val; +} + +static inline int +add_val(struct hashset_elem hset[256], u32 val, int *midx, + struct hashset_elem *mpool) +{ + struct hashset_elem *elem; + byte hidx; + + hidx = val_to_hidx(val); + elem = &hset[hidx]; + + /* Check if first is in use. */ + if (elem->used == 0) + { + elem->val = val; + elem->nidx = -1; + elem->used = 1; + return 0; + } + + /* Check if first matches. */ + if (elem->val == val) + return 1; + + for (; elem->nidx >= 0; elem = &mpool[elem->nidx]) + { + /* Check if elem matches. */ + if (elem->val == val) + return 1; + } + + elem->nidx = (*midx)++; + elem = &mpool[elem->nidx]; + + elem->val = val; + elem->nidx = -1; + elem->used = 1; + + return 0; +} + +static gcry_err_code_t +do_bf_setkey (BLOWFISH_context *c, const byte *key, unsigned keylen) +{ + struct hashset_elem mempool[4 * 255]; /* Enough entries for the worst case. */ + struct hashset_elem hset[4][256]; + int memidx = 0; + int weak = 0; + int i, j, ret; + u32 data, datal, datar; + static int initialized; + static const char *selftest_failed; + + if( !initialized ) + { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error ("%s\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen < BLOWFISH_KEY_MIN_BITS / 8 || + keylen > BLOWFISH_KEY_MAX_BITS / 8) + return GPG_ERR_INV_KEYLEN; + + memset(hset, 0, sizeof(hset)); + + for(i=0; i < 16+2; i++ ) + c->p[i] = ps[i]; + for(i=0; i < 256; i++ ) + { + c->s0[i] = ks0[i]; + c->s1[i] = ks1[i]; + c->s2[i] = ks2[i]; + c->s3[i] = ks3[i]; + } + + for(i=j=0; i < 16+2; i++ ) + { + data = ((u32)key[j] << 24) | + ((u32)key[(j+1)%keylen] << 16) | + ((u32)key[(j+2)%keylen] << 8) | + ((u32)key[(j+3)%keylen]); + c->p[i] ^= data; + j = (j+4) % keylen; + } + + datal = datar = 0; + for(i=0; i < 16+2; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->p[i] = datal; + c->p[i+1] = datar; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s0[i] = datal; + c->s0[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[0], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[0], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s1[i] = datal; + c->s1[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[1], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[1], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s2[i] = datal; + c->s2[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[2], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[2], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + for(i=0; i < 256; i += 2 ) + { + do_encrypt( c, &datal, &datar ); + c->s3[i] = datal; + c->s3[i+1] = datar; + + /* Add values to hashset, detect duplicates (weak keys). */ + ret = add_val (hset[3], datal, &memidx, mempool); + weak = ret ? 1 : weak; + ret = add_val (hset[3], datar, &memidx, mempool); + weak = ret ? 1 : weak; + } + + /* Clear stack. */ + wipememory(hset, sizeof(hset)); + wipememory(mempool, sizeof(mempool[0]) * memidx); + + _gcry_burn_stack (64); + + /* Check for weak key. A weak key is a key in which a value in + the P-array (here c) occurs more than once per table. */ + if (weak) + return GPG_ERR_WEAK_KEY; + + return GPG_ERR_NO_ERROR; +} + + +static gcry_err_code_t +bf_setkey (void *context, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops) +{ + BLOWFISH_context *c = (BLOWFISH_context *) context; + gcry_err_code_t rc = do_bf_setkey (c, key, keylen); + + /* Setup bulk encryption routines. */ + memset (bulk_ops, 0, sizeof(*bulk_ops)); + bulk_ops->cfb_dec = _gcry_blowfish_cfb_dec; + bulk_ops->cbc_dec = _gcry_blowfish_cbc_dec; + bulk_ops->ctr_enc = _gcry_blowfish_ctr_enc; + + return rc; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_blowfish = + { + GCRY_CIPHER_BLOWFISH, {0, 0}, + "BLOWFISH", NULL, NULL, BLOWFISH_BLOCKSIZE, 128, + sizeof (BLOWFISH_context), + bf_setkey, encrypt_block, decrypt_block + }; diff --git a/comm/third_party/libgcrypt/cipher/bufhelp.h b/comm/third_party/libgcrypt/cipher/bufhelp.h new file mode 100644 index 0000000000..fa5b2e8ece --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/bufhelp.h @@ -0,0 +1,385 @@ +/* bufhelp.h - Some buffer manipulation helpers + * Copyright (C) 2012-2017 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ +#ifndef GCRYPT_BUFHELP_H +#define GCRYPT_BUFHELP_H + + +#include "g10lib.h" +#include "bithelp.h" + + +#undef BUFHELP_UNALIGNED_ACCESS +#if defined(HAVE_GCC_ATTRIBUTE_PACKED) && \ + defined(HAVE_GCC_ATTRIBUTE_ALIGNED) && \ + defined(HAVE_GCC_ATTRIBUTE_MAY_ALIAS) +/* Compiler is supports attributes needed for automatically issuing unaligned + memory access instructions. + */ +# define BUFHELP_UNALIGNED_ACCESS 1 +#endif + + +#ifndef BUFHELP_UNALIGNED_ACCESS + +/* Functions for loading and storing unaligned u32 values of different + endianness. */ +static inline u32 buf_get_be32(const void *_buf) +{ + const byte *in = _buf; + return ((u32)in[0] << 24) | ((u32)in[1] << 16) | \ + ((u32)in[2] << 8) | (u32)in[3]; +} + +static inline u32 buf_get_le32(const void *_buf) +{ + const byte *in = _buf; + return ((u32)in[3] << 24) | ((u32)in[2] << 16) | \ + ((u32)in[1] << 8) | (u32)in[0]; +} + +static inline void buf_put_be32(void *_buf, u32 val) +{ + byte *out = _buf; + out[0] = val >> 24; + out[1] = val >> 16; + out[2] = val >> 8; + out[3] = val; +} + +static inline void buf_put_le32(void *_buf, u32 val) +{ + byte *out = _buf; + out[3] = val >> 24; + out[2] = val >> 16; + out[1] = val >> 8; + out[0] = val; +} + + +/* Functions for loading and storing unaligned u64 values of different + endianness. */ +static inline u64 buf_get_be64(const void *_buf) +{ + const byte *in = _buf; + return ((u64)in[0] << 56) | ((u64)in[1] << 48) | \ + ((u64)in[2] << 40) | ((u64)in[3] << 32) | \ + ((u64)in[4] << 24) | ((u64)in[5] << 16) | \ + ((u64)in[6] << 8) | (u64)in[7]; +} + +static inline u64 buf_get_le64(const void *_buf) +{ + const byte *in = _buf; + return ((u64)in[7] << 56) | ((u64)in[6] << 48) | \ + ((u64)in[5] << 40) | ((u64)in[4] << 32) | \ + ((u64)in[3] << 24) | ((u64)in[2] << 16) | \ + ((u64)in[1] << 8) | (u64)in[0]; +} + +static inline void buf_put_be64(void *_buf, u64 val) +{ + byte *out = _buf; + out[0] = val >> 56; + out[1] = val >> 48; + out[2] = val >> 40; + out[3] = val >> 32; + out[4] = val >> 24; + out[5] = val >> 16; + out[6] = val >> 8; + out[7] = val; +} + +static inline void buf_put_le64(void *_buf, u64 val) +{ + byte *out = _buf; + out[7] = val >> 56; + out[6] = val >> 48; + out[5] = val >> 40; + out[4] = val >> 32; + out[3] = val >> 24; + out[2] = val >> 16; + out[1] = val >> 8; + out[0] = val; +} + +#else /*BUFHELP_UNALIGNED_ACCESS*/ + +typedef struct bufhelp_u32_s +{ + u32 a; +} __attribute__((packed, aligned(1), may_alias)) bufhelp_u32_t; + +/* Functions for loading and storing unaligned u32 values of different + endianness. */ +static inline u32 buf_get_be32(const void *_buf) +{ + return be_bswap32(((const bufhelp_u32_t *)_buf)->a); +} + +static inline u32 buf_get_le32(const void *_buf) +{ + return le_bswap32(((const bufhelp_u32_t *)_buf)->a); +} + +static inline void buf_put_be32(void *_buf, u32 val) +{ + bufhelp_u32_t *out = _buf; + out->a = be_bswap32(val); +} + +static inline void buf_put_le32(void *_buf, u32 val) +{ + bufhelp_u32_t *out = _buf; + out->a = le_bswap32(val); +} + + +typedef struct bufhelp_u64_s +{ + u64 a; +} __attribute__((packed, aligned(1), may_alias)) bufhelp_u64_t; + +/* Functions for loading and storing unaligned u64 values of different + endianness. */ +static inline u64 buf_get_be64(const void *_buf) +{ + return be_bswap64(((const bufhelp_u64_t *)_buf)->a); +} + +static inline u64 buf_get_le64(const void *_buf) +{ + return le_bswap64(((const bufhelp_u64_t *)_buf)->a); +} + +static inline void buf_put_be64(void *_buf, u64 val) +{ + bufhelp_u64_t *out = _buf; + out->a = be_bswap64(val); +} + +static inline void buf_put_le64(void *_buf, u64 val) +{ + bufhelp_u64_t *out = _buf; + out->a = le_bswap64(val); +} + +#endif /*BUFHELP_UNALIGNED_ACCESS*/ + + +/* Host-endian get/put macros */ +#ifdef WORDS_BIGENDIAN +# define buf_get_he32 buf_get_be32 +# define buf_put_he32 buf_put_be32 +# define buf_get_he64 buf_get_be64 +# define buf_put_he64 buf_put_be64 +#else +# define buf_get_he32 buf_get_le32 +# define buf_put_he32 buf_put_le32 +# define buf_get_he64 buf_get_le64 +# define buf_put_he64 buf_put_le64 +#endif + + + +/* Optimized function for small buffer copying */ +static inline void +buf_cpy(void *_dst, const void *_src, size_t len) +{ + byte *dst = _dst; + const byte *src = _src; + +#if __GNUC__ >= 4 + if (!__builtin_constant_p (len)) + { + if (UNLIKELY(len == 0)) + return; + memcpy(_dst, _src, len); + return; + } +#endif + + while (len >= sizeof(u64)) + { + buf_put_he64(dst, buf_get_he64(src)); + dst += sizeof(u64); + src += sizeof(u64); + len -= sizeof(u64); + } + + if (len >= sizeof(u32)) + { + buf_put_he32(dst, buf_get_he32(src)); + dst += sizeof(u32); + src += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + *dst++ = *src++; +} + + +/* Optimized function for buffer xoring */ +static inline void +buf_xor(void *_dst, const void *_src1, const void *_src2, size_t len) +{ + byte *dst = _dst; + const byte *src1 = _src1; + const byte *src2 = _src2; + + while (len >= sizeof(u64)) + { + buf_put_he64(dst, buf_get_he64(src1) ^ buf_get_he64(src2)); + dst += sizeof(u64); + src1 += sizeof(u64); + src2 += sizeof(u64); + len -= sizeof(u64); + } + + if (len > sizeof(u32)) + { + buf_put_he32(dst, buf_get_he32(src1) ^ buf_get_he32(src2)); + dst += sizeof(u32); + src1 += sizeof(u32); + src2 += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + *dst++ = *src1++ ^ *src2++; +} + + +/* Optimized function for buffer xoring with two destination buffers. Used + mainly by CFB mode encryption. */ +static inline void +buf_xor_2dst(void *_dst1, void *_dst2, const void *_src, size_t len) +{ + byte *dst1 = _dst1; + byte *dst2 = _dst2; + const byte *src = _src; + + while (len >= sizeof(u64)) + { + u64 temp = buf_get_he64(dst2) ^ buf_get_he64(src); + buf_put_he64(dst2, temp); + buf_put_he64(dst1, temp); + dst2 += sizeof(u64); + dst1 += sizeof(u64); + src += sizeof(u64); + len -= sizeof(u64); + } + + if (len >= sizeof(u32)) + { + u32 temp = buf_get_he32(dst2) ^ buf_get_he32(src); + buf_put_he32(dst2, temp); + buf_put_he32(dst1, temp); + dst2 += sizeof(u32); + dst1 += sizeof(u32); + src += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + *dst1++ = (*dst2++ ^= *src++); +} + + +/* Optimized function for combined buffer xoring and copying. Used by mainly + CBC mode decryption. */ +static inline void +buf_xor_n_copy_2(void *_dst_xor, const void *_src_xor, void *_srcdst_cpy, + const void *_src_cpy, size_t len) +{ + byte *dst_xor = _dst_xor; + byte *srcdst_cpy = _srcdst_cpy; + const byte *src_xor = _src_xor; + const byte *src_cpy = _src_cpy; + + while (len >= sizeof(u64)) + { + u64 temp = buf_get_he64(src_cpy); + buf_put_he64(dst_xor, buf_get_he64(srcdst_cpy) ^ buf_get_he64(src_xor)); + buf_put_he64(srcdst_cpy, temp); + dst_xor += sizeof(u64); + srcdst_cpy += sizeof(u64); + src_xor += sizeof(u64); + src_cpy += sizeof(u64); + len -= sizeof(u64); + } + + if (len >= sizeof(u32)) + { + u32 temp = buf_get_he32(src_cpy); + buf_put_he32(dst_xor, buf_get_he32(srcdst_cpy) ^ buf_get_he32(src_xor)); + buf_put_he32(srcdst_cpy, temp); + dst_xor += sizeof(u32); + srcdst_cpy += sizeof(u32); + src_xor += sizeof(u32); + src_cpy += sizeof(u32); + len -= sizeof(u32); + } + + /* Handle tail. */ + for (; len; len--) + { + byte temp = *src_cpy++; + *dst_xor++ = *srcdst_cpy ^ *src_xor++; + *srcdst_cpy++ = temp; + } +} + + +/* Optimized function for combined buffer xoring and copying. Used by mainly + CFB mode decryption. */ +static inline void +buf_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, size_t len) +{ + buf_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, len); +} + + +/* Constant-time compare of two buffers. Returns 1 if buffers are equal, + and 0 if buffers differ. */ +static inline int +buf_eq_const(const void *_a, const void *_b, size_t len) +{ + const byte *a = _a; + const byte *b = _b; + int ab, ba; + size_t i; + + /* Constant-time compare. */ + for (i = 0, ab = 0, ba = 0; i < len; i++) + { + /* If a[i] != b[i], either ab or ba will be negative. */ + ab |= a[i] - b[i]; + ba |= b[i] - a[i]; + } + + /* 'ab | ba' is negative when buffers are not equal. */ + return (ab | ba) >= 0; +} + + +#endif /*GCRYPT_BUFHELP_H*/ diff --git a/comm/third_party/libgcrypt/cipher/camellia-aarch64.S b/comm/third_party/libgcrypt/cipher/camellia-aarch64.S new file mode 100644 index 0000000000..f498086212 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia-aarch64.S @@ -0,0 +1,586 @@ +/* camellia-aarch64.S - ARMv8/AArch64 assembly implementation of Camellia + * cipher + * + * Copyright (C) 2016 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include "asm-common-aarch64.h" + +#if defined(__AARCH64EL__) +#ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS + +.text + +/* struct camellia_ctx: */ +#define key_table 0 + +/* register macros */ +#define CTX x0 +#define RDST x1 +#define RSRC x2 +#define RKEYBITS w3 + +#define RTAB1 x4 +#define RTAB2 x5 +#define RTAB3 x6 +#define RTAB4 x7 +#define RMASK w8 + +#define IL w9 +#define IR w10 + +#define xIL x9 +#define xIR x10 + +#define XL w11 +#define XR w12 +#define YL w13 +#define YR w14 + +#define RT0 w15 +#define RT1 w16 +#define RT2 w17 +#define RT3 w19 + +#define xRT0 x15 +#define xRT1 x16 +#define xRT2 x17 +#define xRT3 x19 + +#ifdef __AARCH64EL__ + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \ + ldr a, [rin, #0]; \ + ldr b, [rin, #4]; \ + be_to_host(a, rtmp); \ + ldr c, [rin, #8]; \ + be_to_host(b, rtmp); \ + ldr d, [rin, #12]; \ + be_to_host(c, rtmp); \ + be_to_host(d, rtmp); + +#define str_output_aligned_be(rout, a, b, c, d, rtmp) \ + be_to_host(a, rtmp); \ + be_to_host(b, rtmp); \ + str a, [rout, #0]; \ + be_to_host(c, rtmp); \ + str b, [rout, #4]; \ + be_to_host(d, rtmp); \ + str c, [rout, #8]; \ + str d, [rout, #12]; + +/* unaligned word reads/writes allowed */ +#define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp) + +#define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0) + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(xl, xr, kl, kr, yl, yr) \ + ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \ + and IR, RMASK, xr, lsl#(4); /*sp1110*/ \ + ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \ + and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \ + and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \ + ldr IR, [RTAB1, xIR]; \ + and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \ + eor yl, yl, RT2; \ + ldr IL, [RTAB1, xIL]; \ + eor yr, yr, RT3; \ + \ + ldr RT0, [RTAB3, xRT0]; \ + ldr RT1, [RTAB3, xRT1]; \ + \ + and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \ + and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \ + \ + eor IR, IR, RT0; \ + eor IL, IL, RT1; \ + \ + ldr RT2, [RTAB2, xRT2]; \ + and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \ + ldr RT3, [RTAB2, xRT3]; \ + and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \ + \ + ldr RT0, [RTAB4, xRT0]; \ + ldr RT1, [RTAB4, xRT1]; \ + \ + eor IR, IR, RT2; \ + eor IL, IL, RT3; \ + eor IR, IR, RT0; \ + eor IL, IL, RT1; \ + \ + eor IR, IR, IL; \ + eor yr, yr, IL, ror#8; \ + eor yl, yl, IR; \ + eor yr, yr, IR; + +#define enc_rounds(n) \ + roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR); + +#define dec_rounds(n) \ + roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR); + +/* perform FL and FL⁻¹ */ +#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \ + ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \ + ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \ + and RT0, RT0, ll; \ + ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \ + orr RT2, RT2, rr; \ + ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \ + eor rl, rl, RT2; \ + eor lr, lr, RT0, ror#31; \ + and RT3, RT3, rl; \ + orr RT1, RT1, lr; \ + eor ll, ll, RT1; \ + eor rr, rr, RT3, ror#31; + +#define enc_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 0, (n) * 2 + 1, \ + (n) * 2 + 2, (n) * 2 + 3); + +#define dec_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 2, (n) * 2 + 3, \ + (n) * 2 + 0, (n) * 2 + 1); + +#define inpack(n) \ + ldr_input_be(RSRC, XL, XR, YL, YR, RT0); \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor XL, XL, RT0; \ + eor XR, XR, RT1; + +#define outunpack(n) \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor YL, YL, RT0; \ + eor YR, YR, RT1; \ + str_output_be(RDST, YL, YR, XL, XR, RT0, RT1); + +.globl _gcry_camellia_arm_encrypt_block +ELF(.type _gcry_camellia_arm_encrypt_block,@function;) + +_gcry_camellia_arm_encrypt_block: + CFI_STARTPROC() + stp x19, x30, [sp, #-16]! + CFI_ADJUST_CFA_OFFSET(16) + CFI_REG_ON_STACK(19, 0) + CFI_REG_ON_STACK(30, 8) + + /* input: + * x0: keytable + * x1: dst + * x2: src + * w3: keybitlen + */ + + adr RTAB1, _gcry_camellia_arm_tables; + mov RMASK, #(0xff<<4); /* byte mask */ + add RTAB2, RTAB1, #(1 * 4); + add RTAB3, RTAB1, #(2 * 4); + add RTAB4, RTAB1, #(3 * 4); + + inpack(0); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + + cmp RKEYBITS, #(16 * 8); + bne .Lenc_256; + + outunpack(24); + + CFI_REMEMBER_STATE() + ldp x19, x30, [sp], #16 + CFI_ADJUST_CFA_OFFSET(-16) + CFI_RESTORE(x19) + CFI_RESTORE(x30) + ret; + CFI_RESTORE_STATE() +.ltorg + +.Lenc_256: + enc_fls(24); + enc_rounds(24); + + outunpack(32); + + ldp x19, x30, [sp], #16 + CFI_ADJUST_CFA_OFFSET(-16) + CFI_RESTORE(x19) + CFI_RESTORE(x30) + ret; + CFI_ENDPROC() +.ltorg +ELF(.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block;) + +.globl _gcry_camellia_arm_decrypt_block +ELF(.type _gcry_camellia_arm_decrypt_block,@function;) + +_gcry_camellia_arm_decrypt_block: + CFI_STARTPROC() + stp x19, x30, [sp, #-16]! + CFI_ADJUST_CFA_OFFSET(16) + CFI_REG_ON_STACK(19, 0) + CFI_REG_ON_STACK(30, 8) + + /* input: + * x0: keytable + * x1: dst + * x2: src + * w3: keybitlen + */ + + adr RTAB1, _gcry_camellia_arm_tables; + mov RMASK, #(0xff<<4); /* byte mask */ + add RTAB2, RTAB1, #(1 * 4); + add RTAB3, RTAB1, #(2 * 4); + add RTAB4, RTAB1, #(3 * 4); + + cmp RKEYBITS, #(16 * 8); + bne .Ldec_256; + + inpack(24); + +.Ldec_128: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + outunpack(0); + + CFI_REMEMBER_STATE() + ldp x19, x30, [sp], #16 + CFI_ADJUST_CFA_OFFSET(-16) + CFI_RESTORE(x19) + CFI_RESTORE(x30) + ret; + CFI_RESTORE_STATE() +.ltorg + +.Ldec_256: + inpack(32); + dec_rounds(24); + dec_fls(24); + + b .Ldec_128; + CFI_ENDPROC() +.ltorg +ELF(.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block;) + +/* Encryption/Decryption tables */ +ELF(.type _gcry_camellia_arm_tables,@object;) +.balign 32 +_gcry_camellia_arm_tables: +.Lcamellia_sp1110: +.long 0x70707000 +.Lcamellia_sp0222: + .long 0x00e0e0e0 +.Lcamellia_sp3033: + .long 0x38003838 +.Lcamellia_sp4404: + .long 0x70700070 +.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c +.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3 +.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0 +.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4 +.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057 +.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea +.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae +.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023 +.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b +.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045 +.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5 +.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed +.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f +.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d +.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092 +.long 0x23232300, 0x00464646, 0x91009191, 0x86860086 +.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af +.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c +.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f +.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e +.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc +.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e +.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b +.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6 +.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039 +.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5 +.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d +.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9 +.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a +.long 0x92929200, 0x00252525, 0x49004949, 0x51510051 +.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c +.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b +.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a +.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb +.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0 +.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074 +.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b +.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0 +.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084 +.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df +.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb +.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034 +.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076 +.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d +.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9 +.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1 +.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004 +.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014 +.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a +.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de +.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011 +.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032 +.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c +.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053 +.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2 +.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe +.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf +.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3 +.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a +.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024 +.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8 +.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060 +.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069 +.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa +.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0 +.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1 +.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062 +.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054 +.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e +.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0 +.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064 +.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010 +.long 0x12121200, 0x00242424, 0x09000909, 0x00000000 +.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3 +.long 0x20202000, 0x00404040, 0x10001010, 0x75750075 +.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a +.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6 +.long 0x84848400, 0x00090909, 0x42004242, 0x09090009 +.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd +.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087 +.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083 +.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd +.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090 +.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073 +.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6 +.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d +.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf +.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052 +.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8 +.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8 +.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6 +.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081 +.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f +.long 0x04040400, 0x00080808, 0x02000202, 0x13130013 +.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063 +.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9 +.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7 +.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f +.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc +.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029 +.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9 +.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f +.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4 +.long 0x32323200, 0x00646464, 0x19001919, 0x78780078 +.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006 +.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7 +.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071 +.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4 +.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab +.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088 +.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d +.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072 +.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9 +.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8 +.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac +.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036 +.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a +.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c +.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1 +.long 0x24242400, 0x00484848, 0x12001212, 0x40400040 +.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3 +.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb +.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043 +.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015 +.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad +.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077 +.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080 +.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082 +.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec +.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027 +.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5 +.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085 +.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035 +.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c +.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041 +.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef +.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093 +.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019 +.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021 +.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e +.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e +.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065 +.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd +.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8 +.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f +.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb +.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce +.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030 +.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f +.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5 +.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a +.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1 +.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca +.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047 +.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d +.long 0x09090900, 0x00121212, 0x84008484, 0x01010001 +.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6 +.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056 +.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d +.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d +.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066 +.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc +.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d +.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012 +.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020 +.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1 +.long 0x33333300, 0x00666666, 0x99009999, 0x99990099 +.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c +.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2 +.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e +.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005 +.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7 +.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031 +.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017 +.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7 +.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058 +.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061 +.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b +.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c +.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f +.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016 +.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018 +.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022 +.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044 +.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2 +.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5 +.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091 +.long 0x13131300, 0x00262626, 0x89008989, 0x08080008 +.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8 +.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc +.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050 +.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0 +.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d +.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089 +.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097 +.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b +.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095 +.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff +.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2 +.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4 +.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048 +.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7 +.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db +.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003 +.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da +.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f +.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094 +.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c +.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002 +.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a +.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033 +.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067 +.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3 +.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f +.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2 +.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b +.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026 +.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037 +.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b +.long 0x88888800, 0x00111111, 0x44004444, 0x96960096 +.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b +.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be +.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e +.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079 +.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c +.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e +.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e +.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5 +.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6 +.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd +.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059 +.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098 +.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a +.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046 +.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba +.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025 +.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042 +.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2 +.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa +.long 0x40404000, 0x00808080, 0x20002020, 0x07070007 +.long 0x28282800, 0x00505050, 0x14001414, 0x55550055 +.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee +.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a +.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049 +.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068 +.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038 +.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4 +.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028 +.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b +.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9 +.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1 +.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3 +.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 +.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 +.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e +ELF(.size _gcry_camellia_arm_tables,.-_gcry_camellia_arm_tables;) + +#endif /*HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS*/ +#endif /*__AARCH64EL__*/ diff --git a/comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S new file mode 100644 index 0000000000..64cabaa51b --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx-amd64.S @@ -0,0 +1,2618 @@ +/* camellia-avx-aesni-amd64.S - AES-NI/AVX implementation of Camellia cipher + * + * Copyright (C) 2013-2015,2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#ifdef __x86_64 +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) + +#include "asm-common-amd64.h" + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct CAMELLIA_context: */ +#define key_table 0 +#define key_bitlength CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi + +/********************************************************************** + helper macros + **********************************************************************/ +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +/********************************************************************** + 16-way camellia + **********************************************************************/ + +/* + * IN: + * x0..x7: byte-sliced AB state + * mem_cd: register pointer storing CD state + * key: index for key material + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ + t7, mem_cd, key) \ + /* \ + * S-function with AES subbytes \ + */ \ + vmovdqa .Linv_shift_row rRIP, t4; \ + vbroadcastss .L0f0f0f0f rRIP, t7; \ + vmovdqa .Lpre_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpre_tf_hi_s1 rRIP, t1; \ + \ + /* AES inverse shift rows */ \ + vpshufb t4, x0, x0; \ + vpshufb t4, x7, x7; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x3, x3; \ + vpshufb t4, x6, x6; \ + \ + /* prefilter sboxes 1, 2 and 3 */ \ + vmovdqa .Lpre_tf_lo_s4 rRIP, t2; \ + vmovdqa .Lpre_tf_hi_s4 rRIP, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x1, t0, t1, t7, t6); \ + filter_8bit(x4, t0, t1, t7, t6); \ + filter_8bit(x2, t0, t1, t7, t6); \ + filter_8bit(x5, t0, t1, t7, t6); \ + \ + /* prefilter sbox 4 */ \ + vpxor t4, t4, t4; \ + filter_8bit(x3, t2, t3, t7, t6); \ + filter_8bit(x6, t2, t3, t7, t6); \ + \ + /* AES subbytes + AES shift rows */ \ + vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \ + vaesenclast t4, x0, x0; \ + vaesenclast t4, x7, x7; \ + vaesenclast t4, x1, x1; \ + vaesenclast t4, x4, x4; \ + vaesenclast t4, x2, x2; \ + vaesenclast t4, x5, x5; \ + vaesenclast t4, x3, x3; \ + vaesenclast t4, x6, x6; \ + \ + /* postfilter sboxes 1 and 4 */ \ + vmovdqa .Lpost_tf_lo_s3 rRIP, t2; \ + vmovdqa .Lpost_tf_hi_s3 rRIP, t3; \ + filter_8bit(x0, t0, t1, t7, t6); \ + filter_8bit(x7, t0, t1, t7, t6); \ + filter_8bit(x3, t0, t1, t7, t6); \ + filter_8bit(x6, t0, t1, t7, t6); \ + \ + /* postfilter sbox 3 */ \ + vmovdqa .Lpost_tf_lo_s2 rRIP, t4; \ + vmovdqa .Lpost_tf_hi_s2 rRIP, t5; \ + filter_8bit(x2, t2, t3, t7, t6); \ + filter_8bit(x5, t2, t3, t7, t6); \ + \ + vpxor t6, t6, t6; \ + vmovq key, t0; \ + \ + /* postfilter sbox 2 */ \ + filter_8bit(x1, t4, t5, t7, t2); \ + filter_8bit(x4, t4, t5, t7, t2); \ + \ + vpsrldq $5, t0, t5; \ + vpsrldq $1, t0, t1; \ + vpsrldq $2, t0, t2; \ + vpsrldq $3, t0, t3; \ + vpsrldq $4, t0, t4; \ + vpshufb t6, t0, t0; \ + vpshufb t6, t1, t1; \ + vpshufb t6, t2, t2; \ + vpshufb t6, t3, t3; \ + vpshufb t6, t4, t4; \ + vpsrldq $2, t5, t7; \ + vpshufb t6, t7, t7; \ + \ + /* P-function */ \ + vpxor x5, x0, x0; \ + vpxor x6, x1, x1; \ + vpxor x7, x2, x2; \ + vpxor x4, x3, x3; \ + \ + vpxor x2, x4, x4; \ + vpxor x3, x5, x5; \ + vpxor x0, x6, x6; \ + vpxor x1, x7, x7; \ + \ + vpxor x7, x0, x0; \ + vpxor x4, x1, x1; \ + vpxor x5, x2, x2; \ + vpxor x6, x3, x3; \ + \ + vpxor x3, x4, x4; \ + vpxor x0, x5, x5; \ + vpxor x1, x6, x6; \ + vpxor x2, x7, x7; /* note: high and low parts swapped */ \ + \ + /* Add key material and result to CD (x becomes new CD) */ \ + \ + vpxor t3, x4, x4; \ + vpxor 0 * 16(mem_cd), x4, x4; \ + \ + vpxor t2, x5, x5; \ + vpxor 1 * 16(mem_cd), x5, x5; \ + \ + vpsrldq $1, t5, t3; \ + vpshufb t6, t5, t5; \ + vpshufb t6, t3, t6; \ + \ + vpxor t1, x6, x6; \ + vpxor 2 * 16(mem_cd), x6, x6; \ + \ + vpxor t0, x7, x7; \ + vpxor 3 * 16(mem_cd), x7, x7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 16(mem_cd), x0, x0; \ + \ + vpxor t6, x1, x1; \ + vpxor 5 * 16(mem_cd), x1, x1; \ + \ + vpxor t5, x2, x2; \ + vpxor 6 * 16(mem_cd), x2, x2; \ + \ + vpxor t4, x3, x3; \ + vpxor 7 * 16(mem_cd), x3, x3; + +/* + * IN/OUT: + * x0..x7: byte-sliced AB state preloaded + * mem_ab: byte-sliced AB state in memory + * mem_cb: byte-sliced CD state in memory + */ +#define two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ + roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \ + \ + vmovdqu x4, 0 * 16(mem_cd); \ + vmovdqu x5, 1 * 16(mem_cd); \ + vmovdqu x6, 2 * 16(mem_cd); \ + vmovdqu x7, 3 * 16(mem_cd); \ + vmovdqu x0, 4 * 16(mem_cd); \ + vmovdqu x1, 5 * 16(mem_cd); \ + vmovdqu x2, 6 * 16(mem_cd); \ + vmovdqu x3, 7 * 16(mem_cd); \ + \ + roundsm16(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \ + \ + store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); + +#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ + +#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ + /* Store new AB state */ \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); + +#define enc_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); + +#define dec_rounds16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ + two_roundsm16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); + +/* + * IN: + * v0..3: byte-sliced 32-bit integers + * OUT: + * v0..3: (IN <<< 1) + */ +#define rol32_1_16(v0, v1, v2, v3, t0, t1, t2, zero) \ + vpcmpgtb v0, zero, t0; \ + vpaddb v0, v0, v0; \ + vpabsb t0, t0; \ + \ + vpcmpgtb v1, zero, t1; \ + vpaddb v1, v1, v1; \ + vpabsb t1, t1; \ + \ + vpcmpgtb v2, zero, t2; \ + vpaddb v2, v2, v2; \ + vpabsb t2, t2; \ + \ + vpor t0, v1, v1; \ + \ + vpcmpgtb v3, zero, t0; \ + vpaddb v3, v3, v3; \ + vpabsb t0, t0; \ + \ + vpor t1, v2, v2; \ + vpor t2, v3, v3; \ + vpor t0, v0, v0; + +/* + * IN: + * r: byte-sliced AB state in memory + * l: byte-sliced CD state in memory + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define fls16(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ + tt1, tt2, tt3, kll, klr, krl, krr) \ + /* \ + * t0 = kll; \ + * t0 &= ll; \ + * lr ^= rol32(t0, 1); \ + */ \ + vpxor tt0, tt0, tt0; \ + vmovd kll, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand l0, t0, t0; \ + vpand l1, t1, t1; \ + vpand l2, t2, t2; \ + vpand l3, t3, t3; \ + \ + rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor l4, t0, l4; \ + vmovdqu l4, 4 * 16(l); \ + vpxor l5, t1, l5; \ + vmovdqu l5, 5 * 16(l); \ + vpxor l6, t2, l6; \ + vmovdqu l6, 6 * 16(l); \ + vpxor l7, t3, l7; \ + vmovdqu l7, 7 * 16(l); \ + \ + /* \ + * t2 = krr; \ + * t2 |= rr; \ + * rl ^= t2; \ + */ \ + \ + vmovd krr, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor 4 * 16(r), t0, t0; \ + vpor 5 * 16(r), t1, t1; \ + vpor 6 * 16(r), t2, t2; \ + vpor 7 * 16(r), t3, t3; \ + \ + vpxor 0 * 16(r), t0, t0; \ + vpxor 1 * 16(r), t1, t1; \ + vpxor 2 * 16(r), t2, t2; \ + vpxor 3 * 16(r), t3, t3; \ + vmovdqu t0, 0 * 16(r); \ + vmovdqu t1, 1 * 16(r); \ + vmovdqu t2, 2 * 16(r); \ + vmovdqu t3, 3 * 16(r); \ + \ + /* \ + * t2 = krl; \ + * t2 &= rl; \ + * rr ^= rol32(t2, 1); \ + */ \ + vmovd krl, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand 0 * 16(r), t0, t0; \ + vpand 1 * 16(r), t1, t1; \ + vpand 2 * 16(r), t2, t2; \ + vpand 3 * 16(r), t3, t3; \ + \ + rol32_1_16(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor 4 * 16(r), t0, t0; \ + vpxor 5 * 16(r), t1, t1; \ + vpxor 6 * 16(r), t2, t2; \ + vpxor 7 * 16(r), t3, t3; \ + vmovdqu t0, 4 * 16(r); \ + vmovdqu t1, 5 * 16(r); \ + vmovdqu t2, 6 * 16(r); \ + vmovdqu t3, 7 * 16(r); \ + \ + /* \ + * t0 = klr; \ + * t0 |= lr; \ + * ll ^= t0; \ + */ \ + \ + vmovd klr, t0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor l4, t0, t0; \ + vpor l5, t1, t1; \ + vpor l6, t2, t2; \ + vpor l7, t3, t3; \ + \ + vpxor l0, t0, l0; \ + vmovdqu l0, 0 * 16(l); \ + vpxor l1, t1, l1; \ + vmovdqu l1, 1 * 16(l); \ + vpxor l2, t2, l2; \ + vmovdqu l2, 2 * 16(l); \ + vpxor l3, t3, l3; \ + vmovdqu l3, 3 * 16(l); + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \ + a3, b3, c3, d3, st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vmovdqu .Lshufb_16x16b rRIP, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +#define transpose_8x8b(a, b, c, d, e, f, g, h, t0, t1, t2, t3, t4) \ + vpunpcklbw a, b, t0; \ + vpunpckhbw a, b, b; \ + \ + vpunpcklbw c, d, t1; \ + vpunpckhbw c, d, d; \ + \ + vpunpcklbw e, f, t2; \ + vpunpckhbw e, f, f; \ + \ + vpunpcklbw g, h, t3; \ + vpunpckhbw g, h, h; \ + \ + vpunpcklwd t0, t1, g; \ + vpunpckhwd t0, t1, t0; \ + \ + vpunpcklwd b, d, t1; \ + vpunpckhwd b, d, e; \ + \ + vpunpcklwd t2, t3, c; \ + vpunpckhwd t2, t3, t2; \ + \ + vpunpcklwd f, h, t3; \ + vpunpckhwd f, h, b; \ + \ + vpunpcklwd e, b, t4; \ + vpunpckhwd e, b, b; \ + \ + vpunpcklwd t1, t3, e; \ + vpunpckhwd t1, t3, f; \ + \ + vmovdqa .Ltranspose_8x8_shuf rRIP, t3; \ + \ + vpunpcklwd g, c, d; \ + vpunpckhwd g, c, c; \ + \ + vpunpcklwd t0, t2, t1; \ + vpunpckhwd t0, t2, h; \ + \ + vpunpckhqdq b, h, a; \ + vpshufb t3, a, a; \ + vpunpcklqdq b, h, b; \ + vpshufb t3, b, b; \ + \ + vpunpckhqdq e, d, g; \ + vpshufb t3, g, g; \ + vpunpcklqdq e, d, h; \ + vpshufb t3, h, h; \ + \ + vpunpckhqdq f, c, e; \ + vpshufb t3, e, e; \ + vpunpcklqdq f, c, f; \ + vpshufb t3, f, f; \ + \ + vpunpckhqdq t4, t1, c; \ + vpshufb t3, c, c; \ + vpunpcklqdq t4, t1, d; \ + vpshufb t3, d, d; + +/* load blocks to registers and apply pre-whitening */ +#define inpack16_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio, key) \ + vmovq key, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ + \ + vpxor 0 * 16(rio), x0, y7; \ + vpxor 1 * 16(rio), x0, y6; \ + vpxor 2 * 16(rio), x0, y5; \ + vpxor 3 * 16(rio), x0, y4; \ + vpxor 4 * 16(rio), x0, y3; \ + vpxor 5 * 16(rio), x0, y2; \ + vpxor 6 * 16(rio), x0, y1; \ + vpxor 7 * 16(rio), x0, y0; \ + vpxor 8 * 16(rio), x0, x7; \ + vpxor 9 * 16(rio), x0, x6; \ + vpxor 10 * 16(rio), x0, x5; \ + vpxor 11 * 16(rio), x0, x4; \ + vpxor 12 * 16(rio), x0, x3; \ + vpxor 13 * 16(rio), x0, x2; \ + vpxor 14 * 16(rio), x0, x1; \ + vpxor 15 * 16(rio), x0, x0; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack16_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd) \ + byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \ + y4, y5, y6, y7, (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 16(mem_ab); \ + vmovdqu x1, 1 * 16(mem_ab); \ + vmovdqu x2, 2 * 16(mem_ab); \ + vmovdqu x3, 3 * 16(mem_ab); \ + vmovdqu x4, 4 * 16(mem_ab); \ + vmovdqu x5, 5 * 16(mem_ab); \ + vmovdqu x6, 6 * 16(mem_ab); \ + vmovdqu x7, 7 * 16(mem_ab); \ + vmovdqu y0, 0 * 16(mem_cd); \ + vmovdqu y1, 1 * 16(mem_cd); \ + vmovdqu y2, 2 * 16(mem_cd); \ + vmovdqu y3, 3 * 16(mem_cd); \ + vmovdqu y4, 4 * 16(mem_cd); \ + vmovdqu y5, 5 * 16(mem_cd); \ + vmovdqu y6, 6 * 16(mem_cd); \ + vmovdqu y7, 7 * 16(mem_cd); + +/* de-byteslice, apply post-whitening and store blocks */ +#define outunpack16(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, key, stack_tmp0, stack_tmp1) \ + byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \ + y3, y7, x3, x7, stack_tmp0, stack_tmp1); \ + \ + vmovdqu x0, stack_tmp0; \ + \ + vmovq key, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ + \ + vpxor x0, y7, y7; \ + vpxor x0, y6, y6; \ + vpxor x0, y5, y5; \ + vpxor x0, y4, y4; \ + vpxor x0, y3, y3; \ + vpxor x0, y2, y2; \ + vpxor x0, y1, y1; \ + vpxor x0, y0, y0; \ + vpxor x0, x7, x7; \ + vpxor x0, x6, x6; \ + vpxor x0, x5, x5; \ + vpxor x0, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x0, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor stack_tmp0, x0, x0; + +#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio) \ + vmovdqu x0, 0 * 16(rio); \ + vmovdqu x1, 1 * 16(rio); \ + vmovdqu x2, 2 * 16(rio); \ + vmovdqu x3, 3 * 16(rio); \ + vmovdqu x4, 4 * 16(rio); \ + vmovdqu x5, 5 * 16(rio); \ + vmovdqu x6, 6 * 16(rio); \ + vmovdqu x7, 7 * 16(rio); \ + vmovdqu y0, 8 * 16(rio); \ + vmovdqu y1, 9 * 16(rio); \ + vmovdqu y2, 10 * 16(rio); \ + vmovdqu y3, 11 * 16(rio); \ + vmovdqu y4, 12 * 16(rio); \ + vmovdqu y5, 13 * 16(rio); \ + vmovdqu y6, 14 * 16(rio); \ + vmovdqu y7, 15 * 16(rio); + +.text +.align 16 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3); + +.Lpack_bswap: + .long 0x00010203 + .long 0x04050607 + .long 0x80808080 + .long 0x80808080 + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox1, sbox2, sbox3: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s1: + .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 + .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 +.Lpre_tf_hi_s1: + .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a + .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox4: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in <<< 1) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s4: + .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 + .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 +.Lpre_tf_hi_s4: + .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 + .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf + +/* + * post-SubByte transform + * + * post-lookup for sbox1, sbox4: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s1: + .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 + .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 +.Lpost_tf_hi_s1: + .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 + .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c + +/* + * post-SubByte transform + * + * post-lookup for sbox2: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) <<< 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s2: + .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 + .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 +.Lpost_tf_hi_s2: + .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 + .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 + +/* + * post-SubByte transform + * + * post-lookup for sbox3: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) >>> 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s3: + .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 + .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 +.Lpost_tf_hi_s3: + .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 + .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +/* shuffle mask for 8x8 byte transpose */ +.Ltranspose_8x8_shuf: + .byte 0, 1, 4, 5, 2, 3, 6, 7, 8+0, 8+1, 8+4, 8+5, 8+2, 8+3, 8+6, 8+7 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.align 8 +ELF(.type __camellia_enc_blk16,@function;) + +__camellia_enc_blk16: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 256 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %xmm0..%xmm15: 16 plaintext blocks + * output: + * %xmm0..%xmm15: 16 encrypted blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + CFI_STARTPROC(); + + leaq 8 * 16(%rax), %rcx; + + leaq (-8 * 8)(CTX, %r8, 8), %r8; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx); + +.align 8 +.Lenc_loop: + enc_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 0); + + cmpq %r8, CTX; + je .Lenc_done; + leaq (8 * 8)(CTX), CTX; + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table) + 0)(CTX), + ((key_table) + 4)(CTX), + ((key_table) + 8)(CTX), + ((key_table) + 12)(CTX)); + jmp .Lenc_loop; + +.align 8 +.Lenc_done: + /* load CD for output */ + vmovdqu 0 * 16(%rcx), %xmm8; + vmovdqu 1 * 16(%rcx), %xmm9; + vmovdqu 2 * 16(%rcx), %xmm10; + vmovdqu 3 * 16(%rcx), %xmm11; + vmovdqu 4 * 16(%rcx), %xmm12; + vmovdqu 5 * 16(%rcx), %xmm13; + vmovdqu 6 * 16(%rcx), %xmm14; + vmovdqu 7 * 16(%rcx), %xmm15; + + outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 16(%rax)); + + ret; + CFI_ENDPROC(); +ELF(.size __camellia_enc_blk16,.-__camellia_enc_blk16;) + +.align 8 +ELF(.type __camellia_dec_blk16,@function;) + +__camellia_dec_blk16: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 256 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %xmm0..%xmm15: 16 encrypted blocks + * output: + * %xmm0..%xmm15: 16 plaintext blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + CFI_STARTPROC(); + + movq %r8, %rcx; + movq CTX, %r8 + leaq (-8 * 8)(CTX, %rcx, 8), CTX; + + leaq 8 * 16(%rax), %rcx; + + inpack16_post(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx); + +.align 8 +.Ldec_loop: + dec_rounds16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rax, %rcx, 0); + + cmpq %r8, CTX; + je .Ldec_done; + + fls16(%rax, %xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %rcx, %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, + ((key_table) + 8)(CTX), + ((key_table) + 12)(CTX), + ((key_table) + 0)(CTX), + ((key_table) + 4)(CTX)); + + leaq (-8 * 8)(CTX), CTX; + jmp .Ldec_loop; + +.align 8 +.Ldec_done: + /* load CD for output */ + vmovdqu 0 * 16(%rcx), %xmm8; + vmovdqu 1 * 16(%rcx), %xmm9; + vmovdqu 2 * 16(%rcx), %xmm10; + vmovdqu 3 * 16(%rcx), %xmm11; + vmovdqu 4 * 16(%rcx), %xmm12; + vmovdqu 5 * 16(%rcx), %xmm13; + vmovdqu 6 * 16(%rcx), %xmm14; + vmovdqu 7 * 16(%rcx), %xmm15; + + outunpack16(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, (key_table)(CTX), (%rax), 1 * 16(%rax)); + + ret; + CFI_ENDPROC(); +ELF(.size __camellia_dec_blk16,.-__camellia_dec_blk16;) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +.align 8 +.globl _gcry_camellia_aesni_avx_ctr_enc +ELF(.type _gcry_camellia_aesni_avx_ctr_enc,@function;) + +_gcry_camellia_aesni_avx_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv (big endian, 128bit) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + vmovdqa .Lbswap128_mask rRIP, %xmm14; + + /* load IV and byteswap */ + vmovdqu (%rcx), %xmm15; + vmovdqu %xmm15, 15 * 16(%rax); + vpshufb %xmm14, %xmm15, %xmm0; /* be => le */ + + vpcmpeqd %xmm15, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; /* low: -1, high: 0 */ + + /* construct IVs */ + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm13; + vmovdqu %xmm13, 14 * 16(%rax); + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm13; + vmovdqu %xmm13, 13 * 16(%rax); + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm12; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm11; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm10; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm9; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm8; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm7; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm6; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm5; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm4; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm3; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm2; + inc_le128(%xmm0, %xmm15, %xmm13); + vpshufb %xmm14, %xmm0, %xmm1; + inc_le128(%xmm0, %xmm15, %xmm13); + vmovdqa %xmm0, %xmm13; + vpshufb %xmm14, %xmm0, %xmm0; + inc_le128(%xmm13, %xmm15, %xmm14); + vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; /* le => be */ + vmovdqu %xmm13, (%rcx); + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor 13 * 16(%rax), %xmm15, %xmm13; + vpxor 14 * 16(%rax), %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rdx), %xmm7, %xmm7; + vpxor 1 * 16(%rdx), %xmm6, %xmm6; + vpxor 2 * 16(%rdx), %xmm5, %xmm5; + vpxor 3 * 16(%rdx), %xmm4, %xmm4; + vpxor 4 * 16(%rdx), %xmm3, %xmm3; + vpxor 5 * 16(%rdx), %xmm2, %xmm2; + vpxor 6 * 16(%rdx), %xmm1, %xmm1; + vpxor 7 * 16(%rdx), %xmm0, %xmm0; + vpxor 8 * 16(%rdx), %xmm15, %xmm15; + vpxor 9 * 16(%rdx), %xmm14, %xmm14; + vpxor 10 * 16(%rdx), %xmm13, %xmm13; + vpxor 11 * 16(%rdx), %xmm12, %xmm12; + vpxor 12 * 16(%rdx), %xmm11, %xmm11; + vpxor 13 * 16(%rdx), %xmm10, %xmm10; + vpxor 14 * 16(%rdx), %xmm9, %xmm9; + vpxor 15 * 16(%rdx), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_ctr_enc,.-_gcry_camellia_aesni_avx_ctr_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx_cbc_dec +ELF(.type _gcry_camellia_aesni_avx_cbc_dec,@function;) + +_gcry_camellia_aesni_avx_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + movq %rcx, %r9; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + inpack16_pre(%xmm0, %xmm1, %xmm2, %xmm3, %xmm4, %xmm5, %xmm6, %xmm7, + %xmm8, %xmm9, %xmm10, %xmm11, %xmm12, %xmm13, %xmm14, + %xmm15, %rdx, (key_table)(CTX, %r8, 8)); + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + call __camellia_dec_blk16; + + /* XOR output with IV */ + vpxor (%r9), %xmm7, %xmm7; + vpxor (0 * 16)(%rdx), %xmm6, %xmm6; + vpxor (1 * 16)(%rdx), %xmm5, %xmm5; + vpxor (2 * 16)(%rdx), %xmm4, %xmm4; + vpxor (3 * 16)(%rdx), %xmm3, %xmm3; + vpxor (4 * 16)(%rdx), %xmm2, %xmm2; + vpxor (5 * 16)(%rdx), %xmm1, %xmm1; + vpxor (6 * 16)(%rdx), %xmm0, %xmm0; + vpxor (7 * 16)(%rdx), %xmm15, %xmm15; + vpxor (8 * 16)(%rdx), %xmm14, %xmm14; + vpxor (9 * 16)(%rdx), %xmm13, %xmm13; + vpxor (10 * 16)(%rdx), %xmm12, %xmm12; + vpxor (11 * 16)(%rdx), %xmm11, %xmm11; + vpxor (12 * 16)(%rdx), %xmm10, %xmm10; + vpxor (13 * 16)(%rdx), %xmm9, %xmm9; + vpxor (14 * 16)(%rdx), %xmm8, %xmm8; + movq (15 * 16 + 0)(%rdx), %r10; + movq (15 * 16 + 8)(%rdx), %r11; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + /* store new IV */ + movq %r10, (0)(%r9); + movq %r11, (8)(%r9); + + vzeroall; + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_cbc_dec,.-_gcry_camellia_aesni_avx_cbc_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_cfb_dec +ELF(.type _gcry_camellia_aesni_avx_cfb_dec,@function;) + +_gcry_camellia_aesni_avx_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: iv + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + subq $(16 * 16), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm0; + vpshufb .Lpack_bswap rRIP, %xmm0, %xmm0; + vpxor (%rcx), %xmm0, %xmm15; + vmovdqu 15 * 16(%rdx), %xmm1; + vmovdqu %xmm1, (%rcx); /* store new IV */ + vpxor 0 * 16(%rdx), %xmm0, %xmm14; + vpxor 1 * 16(%rdx), %xmm0, %xmm13; + vpxor 2 * 16(%rdx), %xmm0, %xmm12; + vpxor 3 * 16(%rdx), %xmm0, %xmm11; + vpxor 4 * 16(%rdx), %xmm0, %xmm10; + vpxor 5 * 16(%rdx), %xmm0, %xmm9; + vpxor 6 * 16(%rdx), %xmm0, %xmm8; + vpxor 7 * 16(%rdx), %xmm0, %xmm7; + vpxor 8 * 16(%rdx), %xmm0, %xmm6; + vpxor 9 * 16(%rdx), %xmm0, %xmm5; + vpxor 10 * 16(%rdx), %xmm0, %xmm4; + vpxor 11 * 16(%rdx), %xmm0, %xmm3; + vpxor 12 * 16(%rdx), %xmm0, %xmm2; + vpxor 13 * 16(%rdx), %xmm0, %xmm1; + vpxor 14 * 16(%rdx), %xmm0, %xmm0; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rdx), %xmm7, %xmm7; + vpxor 1 * 16(%rdx), %xmm6, %xmm6; + vpxor 2 * 16(%rdx), %xmm5, %xmm5; + vpxor 3 * 16(%rdx), %xmm4, %xmm4; + vpxor 4 * 16(%rdx), %xmm3, %xmm3; + vpxor 5 * 16(%rdx), %xmm2, %xmm2; + vpxor 6 * 16(%rdx), %xmm1, %xmm1; + vpxor 7 * 16(%rdx), %xmm0, %xmm0; + vpxor 8 * 16(%rdx), %xmm15, %xmm15; + vpxor 9 * 16(%rdx), %xmm14, %xmm14; + vpxor 10 * 16(%rdx), %xmm13, %xmm13; + vpxor 11 * 16(%rdx), %xmm12, %xmm12; + vpxor 12 * 16(%rdx), %xmm11, %xmm11; + vpxor 13 * 16(%rdx), %xmm10, %xmm10; + vpxor 14 * 16(%rdx), %xmm9, %xmm9; + vpxor 15 * 16(%rdx), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_cfb_dec,.-_gcry_camellia_aesni_avx_cfb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_enc +ELF(.type _gcry_camellia_aesni_avx_ocb_enc,@function;) + +_gcry_camellia_aesni_avx_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); + + vmovdqu (%rcx), %xmm14; + vmovdqu (%r8), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rdx), xreg; \ + vpxor (lreg), %xmm14, %xmm14; \ + vpxor xreg, %xmm15, %xmm15; \ + vpxor xreg, %xmm14, xreg; \ + vmovdqu %xmm14, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm0); + vmovdqu %xmm0, (14 * 16)(%rax); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rcx); + vmovdqu %xmm15, (%r8); + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r10d; + cmovel %r10d, %r8d; /* max */ + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor 14 * 16(%rax), %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor 0 * 16(%rsi), %xmm7, %xmm7; + vpxor 1 * 16(%rsi), %xmm6, %xmm6; + vpxor 2 * 16(%rsi), %xmm5, %xmm5; + vpxor 3 * 16(%rsi), %xmm4, %xmm4; + vpxor 4 * 16(%rsi), %xmm3, %xmm3; + vpxor 5 * 16(%rsi), %xmm2, %xmm2; + vpxor 6 * 16(%rsi), %xmm1, %xmm1; + vpxor 7 * 16(%rsi), %xmm0, %xmm0; + vpxor 8 * 16(%rsi), %xmm15, %xmm15; + vpxor 9 * 16(%rsi), %xmm14, %xmm14; + vpxor 10 * 16(%rsi), %xmm13, %xmm13; + vpxor 11 * 16(%rsi), %xmm12, %xmm12; + vpxor 12 * 16(%rsi), %xmm11, %xmm11; + vpxor 13 * 16(%rsi), %xmm10, %xmm10; + vpxor 14 * 16(%rsi), %xmm9, %xmm9; + vpxor 15 * 16(%rsi), %xmm8, %xmm8; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_ocb_enc,.-_gcry_camellia_aesni_avx_ocb_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_dec +ELF(.type _gcry_camellia_aesni_avx_ocb_dec,@function;) + +_gcry_camellia_aesni_avx_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (16 blocks) + * %rdx: src (16 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[16]) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); + + vmovdqu (%rcx), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rdx), xreg; \ + vpxor (lreg), %xmm15, %xmm15; \ + vpxor xreg, %xmm15, xreg; \ + vmovdqu %xmm15, (n * 16)(%rsi); + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm14); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + vmovdqu %xmm15, (%rcx); + + movq %r8, %r10; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r9d; + cmovel %r9d, %r8d; /* max */ + + /* inpack16_pre: */ + vmovq (key_table)(CTX, %r8, 8), %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor %xmm14, %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_dec_blk16; + + vpxor 0 * 16(%rsi), %xmm7, %xmm7; + vpxor 1 * 16(%rsi), %xmm6, %xmm6; + vpxor 2 * 16(%rsi), %xmm5, %xmm5; + vpxor 3 * 16(%rsi), %xmm4, %xmm4; + vpxor 4 * 16(%rsi), %xmm3, %xmm3; + vpxor 5 * 16(%rsi), %xmm2, %xmm2; + vpxor 6 * 16(%rsi), %xmm1, %xmm1; + vpxor 7 * 16(%rsi), %xmm0, %xmm0; + vmovdqu %xmm7, (7 * 16)(%rax); + vpxor 8 * 16(%rsi), %xmm15, %xmm15; + vpxor 9 * 16(%rsi), %xmm14, %xmm14; + vpxor 10 * 16(%rsi), %xmm13, %xmm13; + vpxor 11 * 16(%rsi), %xmm12, %xmm12; + vpxor 12 * 16(%rsi), %xmm11, %xmm11; + vpxor 13 * 16(%rsi), %xmm10, %xmm10; + vpxor 14 * 16(%rsi), %xmm9, %xmm9; + vpxor 15 * 16(%rsi), %xmm8, %xmm8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vpxor (%r10), %xmm7, %xmm7; + vpxor %xmm6, %xmm7, %xmm7; + vpxor %xmm5, %xmm7, %xmm7; + vpxor %xmm4, %xmm7, %xmm7; + vpxor %xmm3, %xmm7, %xmm7; + vpxor %xmm2, %xmm7, %xmm7; + vpxor %xmm1, %xmm7, %xmm7; + vpxor %xmm0, %xmm7, %xmm7; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm14, %xmm7, %xmm7; + vpxor %xmm13, %xmm7, %xmm7; + vpxor %xmm12, %xmm7, %xmm7; + vpxor %xmm11, %xmm7, %xmm7; + vpxor %xmm10, %xmm7, %xmm7; + vpxor %xmm9, %xmm7, %xmm7; + vpxor %xmm8, %xmm7, %xmm7; + vmovdqu %xmm7, (%r10); + vmovdqu (7 * 16)(%rax), %xmm7; + + write_output(%xmm7, %xmm6, %xmm5, %xmm4, %xmm3, %xmm2, %xmm1, %xmm0, + %xmm15, %xmm14, %xmm13, %xmm12, %xmm11, %xmm10, %xmm9, + %xmm8, %rsi); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_ocb_dec,.-_gcry_camellia_aesni_avx_ocb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx_ocb_auth +ELF(.type _gcry_camellia_aesni_avx_ocb_auth,@function;) + +_gcry_camellia_aesni_avx_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(16 * 16 + 4 * 8), %rsp; + andq $~31, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 16 + 0 * 8)(%rsp); + movq %r11, (16 * 16 + 1 * 8)(%rsp); + movq %r12, (16 * 16 + 2 * 8)(%rsp); + movq %r13, (16 * 16 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 16 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 16 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 16 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 16 + 3 * 8); + + vmovdqu (%rdx), %xmm15; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + +#define OCB_INPUT(n, lreg, xreg) \ + vmovdqu (n * 16)(%rsi), xreg; \ + vpxor (lreg), %xmm15, %xmm15; \ + vpxor xreg, %xmm15, xreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %xmm0); + vmovdqu %xmm0, (15 * 16)(%rax); + OCB_INPUT(1, %r11, %xmm14); + OCB_INPUT(2, %r12, %xmm13); + OCB_INPUT(3, %r13, %xmm12); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %xmm11); + OCB_INPUT(5, %r11, %xmm10); + OCB_INPUT(6, %r12, %xmm9); + OCB_INPUT(7, %r13, %xmm8); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(8, %r10, %xmm7); + OCB_INPUT(9, %r11, %xmm6); + OCB_INPUT(10, %r12, %xmm5); + OCB_INPUT(11, %r13, %xmm4); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(12, %r10, %xmm3); + OCB_INPUT(13, %r11, %xmm2); + OCB_INPUT(14, %r12, %xmm1); + OCB_INPUT(15, %r13, %xmm0); +#undef OCB_INPUT + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r10d; + cmovel %r10d, %r8d; /* max */ + + vmovdqu %xmm15, (%rdx); + + movq %rcx, %r10; + + /* inpack16_pre: */ + vmovq (key_table)(CTX), %xmm15; + vpshufb .Lpack_bswap rRIP, %xmm15, %xmm15; + vpxor %xmm0, %xmm15, %xmm0; + vpxor %xmm1, %xmm15, %xmm1; + vpxor %xmm2, %xmm15, %xmm2; + vpxor %xmm3, %xmm15, %xmm3; + vpxor %xmm4, %xmm15, %xmm4; + vpxor %xmm5, %xmm15, %xmm5; + vpxor %xmm6, %xmm15, %xmm6; + vpxor %xmm7, %xmm15, %xmm7; + vpxor %xmm8, %xmm15, %xmm8; + vpxor %xmm9, %xmm15, %xmm9; + vpxor %xmm10, %xmm15, %xmm10; + vpxor %xmm11, %xmm15, %xmm11; + vpxor %xmm12, %xmm15, %xmm12; + vpxor %xmm13, %xmm15, %xmm13; + vpxor %xmm14, %xmm15, %xmm14; + vpxor 15 * 16(%rax), %xmm15, %xmm15; + + call __camellia_enc_blk16; + + vpxor %xmm7, %xmm6, %xmm6; + vpxor %xmm5, %xmm4, %xmm4; + vpxor %xmm3, %xmm2, %xmm2; + vpxor %xmm1, %xmm0, %xmm0; + vpxor %xmm15, %xmm14, %xmm14; + vpxor %xmm13, %xmm12, %xmm12; + vpxor %xmm11, %xmm10, %xmm10; + vpxor %xmm9, %xmm8, %xmm8; + + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm10, %xmm8, %xmm8; + + vpxor %xmm4, %xmm0, %xmm0; + vpxor %xmm12, %xmm8, %xmm8; + + vpxor %xmm0, %xmm8, %xmm0; + vpxor (%r10), %xmm0, %xmm0; + vmovdqu %xmm0, (%r10); + + vzeroall; + + movq (16 * 16 + 0 * 8)(%rsp), %r10; + movq (16 * 16 + 1 * 8)(%rsp), %r11; + movq (16 * 16 + 2 * 8)(%rsp), %r12; + movq (16 * 16 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_ocb_auth,.-_gcry_camellia_aesni_avx_ocb_auth;) + +/* + * IN: + * ab: 64-bit AB state + * cd: 64-bit CD state + */ +#define camellia_f(ab, x, t0, t1, t2, t3, t4, inv_shift_row, sbox4mask, \ + _0f0f0f0fmask, pre_s1lo_mask, pre_s1hi_mask, key) \ + vmovq key, t0; \ + vpxor x, x, t3; \ + \ + vpxor ab, t0, x; \ + \ + /* \ + * S-function with AES subbytes \ + */ \ + \ + /* input rotation for sbox4 (<<< 1) */ \ + vpand x, sbox4mask, t0; \ + vpandn x, sbox4mask, x; \ + vpaddw t0, t0, t1; \ + vpsrlw $7, t0, t0; \ + vpor t0, t1, t0; \ + vpand sbox4mask, t0, t0; \ + vpor t0, x, x; \ + \ + vmovdqa .Lpost_tf_lo_s1 rRIP, t0; \ + vmovdqa .Lpost_tf_hi_s1 rRIP, t1; \ + \ + /* prefilter sboxes */ \ + filter_8bit(x, pre_s1lo_mask, pre_s1hi_mask, _0f0f0f0fmask, t2); \ + \ + /* AES subbytes + AES shift rows + AES inv shift rows */ \ + vaesenclast t3, x, x; \ + \ + /* postfilter sboxes */ \ + filter_8bit(x, t0, t1, _0f0f0f0fmask, t2); \ + \ + /* output rotation for sbox2 (<<< 1) */ \ + /* output rotation for sbox3 (>>> 1) */ \ + vpshufb inv_shift_row, x, t1; \ + vpshufb .Lsp0044440444044404mask rRIP, x, t4; \ + vpshufb .Lsp1110111010011110mask rRIP, x, x; \ + vpaddb t1, t1, t2; \ + vpsrlw $7, t1, t0; \ + vpsllw $7, t1, t3; \ + vpor t0, t2, t0; \ + vpsrlw $1, t1, t1; \ + vpshufb .Lsp0222022222000222mask rRIP, t0, t0; \ + vpor t1, t3, t1; \ + \ + vpxor x, t4, t4; \ + vpshufb .Lsp3033303303303033mask rRIP, t1, t1; \ + vpxor t4, t0, t0; \ + vpxor t1, t0, t0; \ + vpsrldq $8, t0, x; \ + vpxor t0, x, x; + +#define vec_rol128(in, out, nrol, t0) \ + vpshufd $0x4e, in, out; \ + vpsllq $(nrol), in, t0; \ + vpsrlq $(64-(nrol)), out, out; \ + vpaddd t0, out, out; + +#define vec_ror128(in, out, nror, t0) \ + vpshufd $0x4e, in, out; \ + vpsrlq $(nror), in, t0; \ + vpsllq $(64-(nror)), out, out; \ + vpaddd t0, out, out; + + +.align 16 +.Linv_shift_row_and_unpcklbw: + .byte 0x00, 0xff, 0x0d, 0xff, 0x0a, 0xff, 0x07, 0xff + .byte 0x04, 0xff, 0x01, 0xff, 0x0e, 0xff, 0x0b, 0xff +.Lsp0044440444044404mask: + .long 0xffff0404, 0x0404ff04; + .long 0x0d0dff0d, 0x0d0dff0d; +.Lsp1110111010011110mask: + .long 0x000000ff, 0x000000ff; + .long 0x0bffff0b, 0x0b0b0bff; +.Lsp0222022222000222mask: + .long 0xff060606, 0xff060606; + .long 0x0c0cffff, 0xff0c0c0c; +.Lsp3033303303303033mask: + .long 0x04ff0404, 0x04ff0404; + .long 0xff0a0aff, 0x0aff0a0a; +.Lsbox4_input_mask: + .byte 0x00, 0xff, 0x00, 0x00, 0xff, 0x00, 0x00, 0x00; +.Lsigma1: + .long 0x3BCC908B, 0xA09E667F; +.Lsigma2: + .long 0x4CAA73B2, 0xB67AE858; +.Lsigma3: + .long 0xE94F82BE, 0xC6EF372F; +.Lsigma4: + .long 0xF1D36F1C, 0x54FF53A5; +.Lsigma5: + .long 0xDE682D1D, 0x10E527FA; +.Lsigma6: + .long 0xB3E6C1FD, 0xB05688C2; + + +.align 8 +ELF(.type __camellia_avx_setup128,@function;) +__camellia_avx_setup128: + /* input: + * %rdi: ctx, CTX; subkey storage at key_table(CTX) + * %xmm0: key + */ + CFI_STARTPROC(); + +#define cmll_sub(n, ctx) (key_table+((n)*8))(ctx) +#define KL128 %xmm0 +#define KA128 %xmm2 + + vpshufb .Lbswap128_mask rRIP, KL128, KL128; + + vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11; + vmovq .Lsbox4_input_mask rRIP, %xmm12; + vbroadcastss .L0f0f0f0f rRIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15; + + /* + * Generate KA + */ + vpsrldq $8, KL128, %xmm2; + vmovdqa KL128, %xmm3; + vpslldq $8, %xmm3, %xmm3; + vpsrldq $8, %xmm3, %xmm3; + + camellia_f(%xmm2, %xmm4, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP); + vpxor %xmm4, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm2, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP); + camellia_f(%xmm2, %xmm3, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP); + vpxor %xmm4, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm4, %xmm1, + %xmm5, %xmm6, %xmm7, %xmm8, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP); + + vpslldq $8, %xmm3, %xmm3; + vpxor %xmm4, %xmm2, %xmm2; + vpsrldq $8, %xmm3, %xmm3; + vpslldq $8, %xmm2, KA128; + vpor %xmm3, KA128, KA128; + + /* + * Generate subkeys + */ + vmovdqu KA128, cmll_sub(24, CTX); + vec_rol128(KL128, %xmm3, 15, %xmm15); + vec_rol128(KA128, %xmm4, 15, %xmm15); + vec_rol128(KA128, %xmm5, 30, %xmm15); + vec_rol128(KL128, %xmm6, 45, %xmm15); + vec_rol128(KA128, %xmm7, 45, %xmm15); + vec_rol128(KL128, %xmm8, 60, %xmm15); + vec_rol128(KA128, %xmm9, 60, %xmm15); + vec_ror128(KL128, %xmm10, 128-77, %xmm15); + + /* absorb kw2 to other subkeys */ + vpslldq $8, KL128, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, KA128, KA128; + vpxor %xmm15, %xmm3, %xmm3; + vpxor %xmm15, %xmm4, %xmm4; + + /* subl(1) ^= subr(1) & ~subr(9); */ + vpandn %xmm15, %xmm5, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm5, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm8, %xmm8; + vpxor %xmm15, %xmm9, %xmm9; + + /* subl(1) ^= subr(1) & ~subr(17); */ + vpandn %xmm15, %xmm10, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm10, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, KL128, KL128; + vpshufd $0x1b, KA128, KA128; + vpshufd $0x1b, %xmm3, %xmm3; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm7, %xmm7; + vpshufd $0x1b, %xmm8, %xmm8; + vpshufd $0x1b, %xmm9, %xmm9; + vpshufd $0x1b, %xmm10, %xmm10; + + vmovdqu KL128, cmll_sub(0, CTX); + vpshufd $0x1b, KL128, KL128; + vmovdqu KA128, cmll_sub(2, CTX); + vmovdqu %xmm3, cmll_sub(4, CTX); + vmovdqu %xmm4, cmll_sub(6, CTX); + vmovdqu %xmm5, cmll_sub(8, CTX); + vmovdqu %xmm6, cmll_sub(10, CTX); + vpsrldq $8, %xmm8, %xmm8; + vmovq %xmm7, cmll_sub(12, CTX); + vmovq %xmm8, cmll_sub(13, CTX); + vmovdqu %xmm9, cmll_sub(14, CTX); + vmovdqu %xmm10, cmll_sub(16, CTX); + + vmovdqu cmll_sub(24, CTX), KA128; + + vec_ror128(KL128, %xmm3, 128 - 94, %xmm7); + vec_ror128(KA128, %xmm4, 128 - 94, %xmm7); + vec_ror128(KL128, %xmm5, 128 - 111, %xmm7); + vec_ror128(KA128, %xmm6, 128 - 111, %xmm7); + + vpxor %xmm15, %xmm3, %xmm3; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + vpslldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm6, %xmm6; + + /* absorb kw4 to other subkeys */ + vpslldq $8, %xmm6, %xmm15; + vpxor %xmm15, %xmm5, %xmm5; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm3, %xmm3; + + /* subl(25) ^= subr(25) & ~subr(16); */ + vpshufd $0x1b, cmll_sub(16, CTX), %xmm10; + vpandn %xmm15, %xmm10, %xmm13; + vpslldq $4, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(25) & subl(16), subr(25) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm10, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, %xmm3, %xmm3; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + + vmovdqu %xmm3, cmll_sub(18, CTX); + vmovdqu %xmm4, cmll_sub(20, CTX); + vmovdqu %xmm5, cmll_sub(22, CTX); + vmovdqu %xmm6, cmll_sub(24, CTX); + + vpshufd $0x1b, cmll_sub(14, CTX), %xmm3; + vpshufd $0x1b, cmll_sub(12, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(10, CTX), %xmm5; + vpshufd $0x1b, cmll_sub(8, CTX), %xmm6; + + vpxor %xmm15, %xmm3, %xmm3; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + + /* subl(25) ^= subr(25) & ~subr(8); */ + vpandn %xmm15, %xmm6, %xmm13; + vpslldq $4, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(25) & subl(8), subr(25) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm6, %xmm14; + vpslld $1, %xmm14, %xmm11; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm11, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, %xmm3, %xmm3; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + + vmovdqu %xmm3, cmll_sub(14, CTX); + vmovdqu %xmm4, cmll_sub(12, CTX); + vmovdqu %xmm5, cmll_sub(10, CTX); + + vpshufd $0x1b, cmll_sub(6, CTX), %xmm6; + vpshufd $0x1b, cmll_sub(4, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(2, CTX), %xmm2; + vpshufd $0x1b, cmll_sub(0, CTX), %xmm0; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm2, %xmm2; + vpxor %xmm15, %xmm0, %xmm0; + + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm2, %xmm2; + vpshufd $0x1b, %xmm0, %xmm0; + + vpsrldq $8, %xmm2, %xmm3; + vpsrldq $8, %xmm4, %xmm5; + vpsrldq $8, %xmm6, %xmm7; + + /* + * key XOR is end of F-function. + */ + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm4, %xmm2, %xmm2; + + vmovq %xmm0, cmll_sub(0, CTX); + vmovq %xmm3, cmll_sub(2, CTX); + vpxor %xmm5, %xmm3, %xmm3; + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm7, %xmm5, %xmm5; + vmovq %xmm2, cmll_sub(3, CTX); + vmovq %xmm3, cmll_sub(4, CTX); + vmovq %xmm4, cmll_sub(5, CTX); + vmovq %xmm5, cmll_sub(6, CTX); + + vmovq cmll_sub(7, CTX), %xmm7; + vmovq cmll_sub(8, CTX), %xmm8; + vmovq cmll_sub(9, CTX), %xmm9; + vmovq cmll_sub(10, CTX), %xmm10; + /* tl = subl(10) ^ (subr(10) & ~subr(8)); */ + vpandn %xmm10, %xmm8, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm10, %xmm0; + /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */ + vpand %xmm8, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm0, %xmm6, %xmm6; + vmovq %xmm6, cmll_sub(7, CTX); + + vmovq cmll_sub(11, CTX), %xmm11; + vmovq cmll_sub(12, CTX), %xmm12; + vmovq cmll_sub(13, CTX), %xmm13; + vmovq cmll_sub(14, CTX), %xmm14; + vmovq cmll_sub(15, CTX), %xmm15; + /* tl = subl(7) ^ (subr(7) & ~subr(9)); */ + vpandn %xmm7, %xmm9, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm7, %xmm0; + /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */ + vpand %xmm9, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm11, %xmm0, %xmm0; + vpxor %xmm12, %xmm10, %xmm10; + vpxor %xmm13, %xmm11, %xmm11; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm15, %xmm13, %xmm13; + vmovq %xmm0, cmll_sub(10, CTX); + vmovq %xmm10, cmll_sub(11, CTX); + vmovq %xmm11, cmll_sub(12, CTX); + vmovq %xmm12, cmll_sub(13, CTX); + vmovq %xmm13, cmll_sub(14, CTX); + + vmovq cmll_sub(16, CTX), %xmm6; + vmovq cmll_sub(17, CTX), %xmm7; + vmovq cmll_sub(18, CTX), %xmm8; + vmovq cmll_sub(19, CTX), %xmm9; + vmovq cmll_sub(20, CTX), %xmm10; + /* tl = subl(18) ^ (subr(18) & ~subr(16)); */ + vpandn %xmm8, %xmm6, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm8, %xmm0; + /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */ + vpand %xmm6, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm14, %xmm0, %xmm0; + vmovq %xmm0, cmll_sub(15, CTX); + + /* tl = subl(15) ^ (subr(15) & ~subr(17)); */ + vpandn %xmm15, %xmm7, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm15, %xmm0; + /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */ + vpand %xmm7, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vmovq cmll_sub(21, CTX), %xmm1; + vmovq cmll_sub(22, CTX), %xmm2; + vmovq cmll_sub(23, CTX), %xmm3; + vmovq cmll_sub(24, CTX), %xmm4; + + vpxor %xmm9, %xmm0, %xmm0; + vpxor %xmm10, %xmm8, %xmm8; + vpxor %xmm1, %xmm9, %xmm9; + vpxor %xmm2, %xmm10, %xmm10; + vpxor %xmm3, %xmm1, %xmm1; + vpxor %xmm4, %xmm3, %xmm3; + + vmovq %xmm0, cmll_sub(18, CTX); + vmovq %xmm8, cmll_sub(19, CTX); + vmovq %xmm9, cmll_sub(20, CTX); + vmovq %xmm10, cmll_sub(21, CTX); + vmovq %xmm1, cmll_sub(22, CTX); + vmovq %xmm2, cmll_sub(23, CTX); + vmovq %xmm3, cmll_sub(24, CTX); + + /* kw2 and kw4 are unused now. */ + movq $0, cmll_sub(1, CTX); + movq $0, cmll_sub(25, CTX); + + vzeroall; + + ret; + CFI_ENDPROC(); +ELF(.size __camellia_avx_setup128,.-__camellia_avx_setup128;) + +.align 8 +ELF(.type __camellia_avx_setup256,@function;) + +__camellia_avx_setup256: + /* input: + * %rdi: ctx, CTX; subkey storage at key_table(CTX) + * %xmm0 & %xmm1: key + */ + CFI_STARTPROC(); + +#define KL128 %xmm0 +#define KR128 %xmm1 +#define KA128 %xmm2 +#define KB128 %xmm3 + + vpshufb .Lbswap128_mask rRIP, KL128, KL128; + vpshufb .Lbswap128_mask rRIP, KR128, KR128; + + vmovdqa .Linv_shift_row_and_unpcklbw rRIP, %xmm11; + vmovq .Lsbox4_input_mask rRIP, %xmm12; + vbroadcastss .L0f0f0f0f rRIP, %xmm13; + vmovdqa .Lpre_tf_lo_s1 rRIP, %xmm14; + vmovdqa .Lpre_tf_hi_s1 rRIP, %xmm15; + + /* + * Generate KA + */ + vpxor KL128, KR128, %xmm3; + vpsrldq $8, KR128, %xmm6; + vpsrldq $8, %xmm3, %xmm2; + vpslldq $8, %xmm3, %xmm3; + vpsrldq $8, %xmm3, %xmm3; + + camellia_f(%xmm2, %xmm4, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma1 rRIP); + vpxor %xmm4, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm2, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma2 rRIP); + vpxor %xmm6, %xmm2, %xmm2; + camellia_f(%xmm2, %xmm3, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma3 rRIP); + vpxor %xmm4, %xmm3, %xmm3; + vpxor KR128, %xmm3, %xmm3; + camellia_f(%xmm3, %xmm4, %xmm5, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma4 rRIP); + + vpslldq $8, %xmm3, %xmm3; + vpxor %xmm4, %xmm2, %xmm2; + vpsrldq $8, %xmm3, %xmm3; + vpslldq $8, %xmm2, KA128; + vpor %xmm3, KA128, KA128; + + /* + * Generate KB + */ + vpxor KA128, KR128, %xmm3; + vpsrldq $8, %xmm3, %xmm4; + vpslldq $8, %xmm3, %xmm3; + vpsrldq $8, %xmm3, %xmm3; + + camellia_f(%xmm4, %xmm5, %xmm6, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma5 rRIP); + vpxor %xmm5, %xmm3, %xmm3; + + camellia_f(%xmm3, %xmm5, %xmm6, + %xmm7, %xmm8, %xmm9, %xmm10, + %xmm11, %xmm12, %xmm13, %xmm14, %xmm15, .Lsigma6 rRIP); + vpslldq $8, %xmm3, %xmm3; + vpxor %xmm5, %xmm4, %xmm4; + vpsrldq $8, %xmm3, %xmm3; + vpslldq $8, %xmm4, %xmm4; + vpor %xmm3, %xmm4, KB128; + + /* + * Generate subkeys + */ + vmovdqu KB128, cmll_sub(32, CTX); + vec_rol128(KR128, %xmm4, 15, %xmm15); + vec_rol128(KA128, %xmm5, 15, %xmm15); + vec_rol128(KR128, %xmm6, 30, %xmm15); + vec_rol128(KB128, %xmm7, 30, %xmm15); + vec_rol128(KL128, %xmm8, 45, %xmm15); + vec_rol128(KA128, %xmm9, 45, %xmm15); + vec_rol128(KL128, %xmm10, 60, %xmm15); + vec_rol128(KR128, %xmm11, 60, %xmm15); + vec_rol128(KB128, %xmm12, 60, %xmm15); + + /* absorb kw2 to other subkeys */ + vpslldq $8, KL128, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, KB128, KB128; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + + /* subl(1) ^= subr(1) & ~subr(9); */ + vpandn %xmm15, %xmm6, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm6, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm15, %xmm8, %xmm8; + vpxor %xmm15, %xmm9, %xmm9; + + vpshufd $0x1b, KL128, KL128; + vpshufd $0x1b, KB128, KB128; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm7, %xmm7; + vpshufd $0x1b, %xmm8, %xmm8; + vpshufd $0x1b, %xmm9, %xmm9; + + vmovdqu KL128, cmll_sub(0, CTX); + vpshufd $0x1b, KL128, KL128; + vmovdqu KB128, cmll_sub(2, CTX); + vmovdqu %xmm4, cmll_sub(4, CTX); + vmovdqu %xmm5, cmll_sub(6, CTX); + vmovdqu %xmm6, cmll_sub(8, CTX); + vmovdqu %xmm7, cmll_sub(10, CTX); + vmovdqu %xmm8, cmll_sub(12, CTX); + vmovdqu %xmm9, cmll_sub(14, CTX); + + vmovdqu cmll_sub(32, CTX), KB128; + + /* subl(1) ^= subr(1) & ~subr(17); */ + vpandn %xmm15, %xmm10, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm10, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm11, %xmm11; + vpxor %xmm15, %xmm12, %xmm12; + + vec_ror128(KL128, %xmm4, 128-77, %xmm14); + vec_ror128(KA128, %xmm5, 128-77, %xmm14); + vec_ror128(KR128, %xmm6, 128-94, %xmm14); + vec_ror128(KA128, %xmm7, 128-94, %xmm14); + vec_ror128(KL128, %xmm8, 128-111, %xmm14); + vec_ror128(KB128, %xmm9, 128-111, %xmm14); + + vpxor %xmm15, %xmm4, %xmm4; + + vpshufd $0x1b, %xmm10, %xmm10; + vpshufd $0x1b, %xmm11, %xmm11; + vpshufd $0x1b, %xmm12, %xmm12; + vpshufd $0x1b, %xmm4, %xmm4; + + vmovdqu %xmm10, cmll_sub(16, CTX); + vmovdqu %xmm11, cmll_sub(18, CTX); + vmovdqu %xmm12, cmll_sub(20, CTX); + vmovdqu %xmm4, cmll_sub(22, CTX); + + /* subl(1) ^= subr(1) & ~subr(25); */ + vpandn %xmm15, %xmm5, %xmm13; + vpslldq $12, %xmm13, %xmm13; + vpsrldq $8, %xmm13, %xmm13; + vpxor %xmm13, %xmm15, %xmm15; + /* dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm5, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm15, %xmm8, %xmm8; + vpslldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm9, %xmm9; + + /* absorb kw4 to other subkeys */ + vpslldq $8, %xmm9, %xmm15; + vpxor %xmm15, %xmm8, %xmm8; + vpxor %xmm15, %xmm7, %xmm7; + vpxor %xmm15, %xmm6, %xmm6; + + /* subl(33) ^= subr(33) & ~subr(24); */ + vpandn %xmm15, %xmm5, %xmm14; + vpslldq $4, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm5, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm7, %xmm7; + vpshufd $0x1b, %xmm8, %xmm8; + vpshufd $0x1b, %xmm9, %xmm9; + + vmovdqu %xmm5, cmll_sub(24, CTX); + vmovdqu %xmm6, cmll_sub(26, CTX); + vmovdqu %xmm7, cmll_sub(28, CTX); + vmovdqu %xmm8, cmll_sub(30, CTX); + vmovdqu %xmm9, cmll_sub(32, CTX); + + vpshufd $0x1b, cmll_sub(22, CTX), %xmm0; + vpshufd $0x1b, cmll_sub(20, CTX), %xmm1; + vpshufd $0x1b, cmll_sub(18, CTX), %xmm2; + vpshufd $0x1b, cmll_sub(16, CTX), %xmm3; + vpshufd $0x1b, cmll_sub(14, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(12, CTX), %xmm5; + vpshufd $0x1b, cmll_sub(10, CTX), %xmm6; + vpshufd $0x1b, cmll_sub(8, CTX), %xmm7; + + vpxor %xmm15, %xmm0, %xmm0; + vpxor %xmm15, %xmm1, %xmm1; + vpxor %xmm15, %xmm2, %xmm2; + + /* subl(33) ^= subr(33) & ~subr(24); */ + vpandn %xmm15, %xmm3, %xmm14; + vpslldq $4, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm3, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm5, %xmm5; + vpxor %xmm15, %xmm6, %xmm6; + + vpshufd $0x1b, %xmm0, %xmm0; + vpshufd $0x1b, %xmm1, %xmm1; + vpshufd $0x1b, %xmm2, %xmm2; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm5, %xmm5; + vpshufd $0x1b, %xmm6, %xmm6; + + vmovdqu %xmm0, cmll_sub(22, CTX); + vmovdqu %xmm1, cmll_sub(20, CTX); + vmovdqu %xmm2, cmll_sub(18, CTX); + vmovdqu %xmm4, cmll_sub(14, CTX); + vmovdqu %xmm5, cmll_sub(12, CTX); + vmovdqu %xmm6, cmll_sub(10, CTX); + + vpshufd $0x1b, cmll_sub(6, CTX), %xmm6; + vpshufd $0x1b, cmll_sub(4, CTX), %xmm4; + vpshufd $0x1b, cmll_sub(2, CTX), %xmm2; + vpshufd $0x1b, cmll_sub(0, CTX), %xmm0; + + /* subl(33) ^= subr(33) & ~subr(24); */ + vpandn %xmm15, %xmm7, %xmm14; + vpslldq $4, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + /* dw = subl(33) & subl(24), subr(33) ^= CAMELLIA_RL1(dw); */ + vpand %xmm15, %xmm7, %xmm14; + vpslld $1, %xmm14, %xmm13; + vpsrld $31, %xmm14, %xmm14; + vpaddd %xmm13, %xmm14, %xmm14; + vpsrldq $12, %xmm14, %xmm14; + vpslldq $8, %xmm14, %xmm14; + vpxor %xmm14, %xmm15, %xmm15; + + vpxor %xmm15, %xmm6, %xmm6; + vpxor %xmm15, %xmm4, %xmm4; + vpxor %xmm15, %xmm2, %xmm2; + vpxor %xmm15, %xmm0, %xmm0; + + vpshufd $0x1b, %xmm6, %xmm6; + vpshufd $0x1b, %xmm4, %xmm4; + vpshufd $0x1b, %xmm2, %xmm2; + vpshufd $0x1b, %xmm0, %xmm0; + + vpsrldq $8, %xmm2, %xmm3; + vpsrldq $8, %xmm4, %xmm5; + vpsrldq $8, %xmm6, %xmm7; + + /* + * key XOR is end of F-function. + */ + vpxor %xmm2, %xmm0, %xmm0; + vpxor %xmm4, %xmm2, %xmm2; + + vmovq %xmm0, cmll_sub(0, CTX); + vmovq %xmm3, cmll_sub(2, CTX); + vpxor %xmm5, %xmm3, %xmm3; + vpxor %xmm6, %xmm4, %xmm4; + vpxor %xmm7, %xmm5, %xmm5; + vmovq %xmm2, cmll_sub(3, CTX); + vmovq %xmm3, cmll_sub(4, CTX); + vmovq %xmm4, cmll_sub(5, CTX); + vmovq %xmm5, cmll_sub(6, CTX); + + vmovq cmll_sub(7, CTX), %xmm7; + vmovq cmll_sub(8, CTX), %xmm8; + vmovq cmll_sub(9, CTX), %xmm9; + vmovq cmll_sub(10, CTX), %xmm10; + /* tl = subl(10) ^ (subr(10) & ~subr(8)); */ + vpandn %xmm10, %xmm8, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm10, %xmm0; + /* dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); */ + vpand %xmm8, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm0, %xmm6, %xmm6; + vmovq %xmm6, cmll_sub(7, CTX); + + vmovq cmll_sub(11, CTX), %xmm11; + vmovq cmll_sub(12, CTX), %xmm12; + vmovq cmll_sub(13, CTX), %xmm13; + vmovq cmll_sub(14, CTX), %xmm14; + vmovq cmll_sub(15, CTX), %xmm15; + /* tl = subl(7) ^ (subr(7) & ~subr(9)); */ + vpandn %xmm7, %xmm9, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm7, %xmm0; + /* dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); */ + vpand %xmm9, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm11, %xmm0, %xmm0; + vpxor %xmm12, %xmm10, %xmm10; + vpxor %xmm13, %xmm11, %xmm11; + vpxor %xmm14, %xmm12, %xmm12; + vpxor %xmm15, %xmm13, %xmm13; + vmovq %xmm0, cmll_sub(10, CTX); + vmovq %xmm10, cmll_sub(11, CTX); + vmovq %xmm11, cmll_sub(12, CTX); + vmovq %xmm12, cmll_sub(13, CTX); + vmovq %xmm13, cmll_sub(14, CTX); + + vmovq cmll_sub(16, CTX), %xmm6; + vmovq cmll_sub(17, CTX), %xmm7; + vmovq cmll_sub(18, CTX), %xmm8; + vmovq cmll_sub(19, CTX), %xmm9; + vmovq cmll_sub(20, CTX), %xmm10; + /* tl = subl(18) ^ (subr(18) & ~subr(16)); */ + vpandn %xmm8, %xmm6, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm8, %xmm0; + /* dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); */ + vpand %xmm6, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vpxor %xmm14, %xmm0, %xmm0; + vmovq %xmm0, cmll_sub(15, CTX); + + /* tl = subl(15) ^ (subr(15) & ~subr(17)); */ + vpandn %xmm15, %xmm7, %xmm1; + vpsrldq $4, %xmm1, %xmm1; + vpxor %xmm1, %xmm15, %xmm0; + /* dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); */ + vpand %xmm7, %xmm0, %xmm1; + vpslld $1, %xmm1, %xmm2; + vpsrld $31, %xmm1, %xmm1; + vpaddd %xmm2, %xmm1, %xmm1; + vpslldq $12, %xmm1, %xmm1; + vpsrldq $8, %xmm1, %xmm1; + vpxor %xmm1, %xmm0, %xmm0; + + vmovq cmll_sub(21, CTX), %xmm1; + vmovq cmll_sub(22, CTX), %xmm2; + vmovq cmll_sub(23, CTX), %xmm3; + vmovq cmll_sub(24, CTX), %xmm4; + + vpxor %xmm9, %xmm0, %xmm0; + vpxor %xmm10, %xmm8, %xmm8; + vpxor %xmm1, %xmm9, %xmm9; + vpxor %xmm2, %xmm10, %xmm10; + vpxor %xmm3, %xmm1, %xmm1; + + vmovq %xmm0, cmll_sub(18, CTX); + vmovq %xmm8, cmll_sub(19, CTX); + vmovq %xmm9, cmll_sub(20, CTX); + vmovq %xmm10, cmll_sub(21, CTX); + vmovq %xmm1, cmll_sub(22, CTX); + + vmovq cmll_sub(25, CTX), %xmm5; + vmovq cmll_sub(26, CTX), %xmm6; + vmovq cmll_sub(27, CTX), %xmm7; + vmovq cmll_sub(28, CTX), %xmm8; + vmovq cmll_sub(29, CTX), %xmm9; + vmovq cmll_sub(30, CTX), %xmm10; + vmovq cmll_sub(31, CTX), %xmm11; + vmovq cmll_sub(32, CTX), %xmm12; + + /* tl = subl(26) ^ (subr(26) & ~subr(24)); */ + vpandn %xmm6, %xmm4, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm6, %xmm0; + /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */ + vpand %xmm4, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm0, %xmm2, %xmm2; + vmovq %xmm2, cmll_sub(23, CTX); + + /* tl = subl(23) ^ (subr(23) & ~subr(25)); */ + vpandn %xmm3, %xmm5, %xmm15; + vpsrldq $4, %xmm15, %xmm15; + vpxor %xmm15, %xmm3, %xmm0; + /* dw = tl & subl(26), tr = subr(24) ^ CAMELLIA_RL1(dw); */ + vpand %xmm5, %xmm0, %xmm15; + vpslld $1, %xmm15, %xmm14; + vpsrld $31, %xmm15, %xmm15; + vpaddd %xmm14, %xmm15, %xmm15; + vpslldq $12, %xmm15, %xmm15; + vpsrldq $8, %xmm15, %xmm15; + vpxor %xmm15, %xmm0, %xmm0; + + vpxor %xmm7, %xmm0, %xmm0; + vpxor %xmm8, %xmm6, %xmm6; + vpxor %xmm9, %xmm7, %xmm7; + vpxor %xmm10, %xmm8, %xmm8; + vpxor %xmm11, %xmm9, %xmm9; + vpxor %xmm12, %xmm11, %xmm11; + + vmovq %xmm0, cmll_sub(26, CTX); + vmovq %xmm6, cmll_sub(27, CTX); + vmovq %xmm7, cmll_sub(28, CTX); + vmovq %xmm8, cmll_sub(29, CTX); + vmovq %xmm9, cmll_sub(30, CTX); + vmovq %xmm10, cmll_sub(31, CTX); + vmovq %xmm11, cmll_sub(32, CTX); + + /* kw2 and kw4 are unused now. */ + movq $0, cmll_sub(1, CTX); + movq $0, cmll_sub(33, CTX); + + vzeroall; + + ret; + CFI_ENDPROC(); +ELF(.size __camellia_avx_setup256,.-__camellia_avx_setup256;) + +.align 8 +.globl _gcry_camellia_aesni_avx_keygen +ELF(.type _gcry_camellia_aesni_avx_keygen,@function;) + +_gcry_camellia_aesni_avx_keygen: + /* input: + * %rdi: ctx, CTX + * %rsi: key + * %rdx: keylen + */ + CFI_STARTPROC(); + + vzeroupper; + + vmovdqu (%rsi), %xmm0; + cmpl $24, %edx; + jb __camellia_avx_setup128; + je .Lprepare_key192; + + vmovdqu 16(%rsi), %xmm1; + jmp __camellia_avx_setup256; + +.Lprepare_key192: + vpcmpeqd %xmm2, %xmm2, %xmm2; + vmovq 16(%rsi), %xmm1; + + vpxor %xmm1, %xmm2, %xmm2; + vpslldq $8, %xmm2, %xmm2; + vpor %xmm2, %xmm1, %xmm1; + + jmp __camellia_avx_setup256; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx_keygen,.-_gcry_camellia_aesni_avx_keygen;) + +#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S new file mode 100644 index 0000000000..f620f04036 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia-aesni-avx2-amd64.S @@ -0,0 +1,1782 @@ +/* camellia-avx2-aesni-amd64.S - AES-NI/AVX2 implementation of Camellia cipher + * + * Copyright (C) 2013-2015,2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#ifdef __x86_64 +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && \ + defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) + +#include "asm-common-amd64.h" + +#define CAMELLIA_TABLE_BYTE_LEN 272 + +/* struct CAMELLIA_context: */ +#define key_table 0 +#define key_bitlength CAMELLIA_TABLE_BYTE_LEN + +/* register macros */ +#define CTX %rdi +#define RIO %r8 + +/********************************************************************** + helper macros + **********************************************************************/ +#define filter_8bit(x, lo_t, hi_t, mask4bit, tmp0) \ + vpand x, mask4bit, tmp0; \ + vpandn x, mask4bit, x; \ + vpsrld $4, x, x; \ + \ + vpshufb tmp0, lo_t, tmp0; \ + vpshufb x, hi_t, x; \ + vpxor tmp0, x, x; + +#define ymm0_x xmm0 +#define ymm1_x xmm1 +#define ymm2_x xmm2 +#define ymm3_x xmm3 +#define ymm4_x xmm4 +#define ymm5_x xmm5 +#define ymm6_x xmm6 +#define ymm7_x xmm7 +#define ymm8_x xmm8 +#define ymm9_x xmm9 +#define ymm10_x xmm10 +#define ymm11_x xmm11 +#define ymm12_x xmm12 +#define ymm13_x xmm13 +#define ymm14_x xmm14 +#define ymm15_x xmm15 + +/********************************************************************** + 32-way camellia + **********************************************************************/ + +/* + * IN: + * x0..x7: byte-sliced AB state + * mem_cd: register pointer storing CD state + * key: index for key material + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, t0, t1, t2, t3, t4, t5, t6, \ + t7, mem_cd, key) \ + /* \ + * S-function with AES subbytes \ + */ \ + vbroadcasti128 .Linv_shift_row rRIP, t4; \ + vpbroadcastd .L0f0f0f0f rRIP, t7; \ + vbroadcasti128 .Lpre_tf_lo_s1 rRIP, t5; \ + vbroadcasti128 .Lpre_tf_hi_s1 rRIP, t6; \ + vbroadcasti128 .Lpre_tf_lo_s4 rRIP, t2; \ + vbroadcasti128 .Lpre_tf_hi_s4 rRIP, t3; \ + \ + /* AES inverse shift rows */ \ + vpshufb t4, x0, x0; \ + vpshufb t4, x7, x7; \ + vpshufb t4, x3, x3; \ + vpshufb t4, x6, x6; \ + vpshufb t4, x2, x2; \ + vpshufb t4, x5, x5; \ + vpshufb t4, x1, x1; \ + vpshufb t4, x4, x4; \ + \ + /* prefilter sboxes 1, 2 and 3 */ \ + /* prefilter sbox 4 */ \ + filter_8bit(x0, t5, t6, t7, t4); \ + filter_8bit(x7, t5, t6, t7, t4); \ + vextracti128 $1, x0, t0##_x; \ + vextracti128 $1, x7, t1##_x; \ + filter_8bit(x3, t2, t3, t7, t4); \ + filter_8bit(x6, t2, t3, t7, t4); \ + vextracti128 $1, x3, t3##_x; \ + vextracti128 $1, x6, t2##_x; \ + filter_8bit(x2, t5, t6, t7, t4); \ + filter_8bit(x5, t5, t6, t7, t4); \ + filter_8bit(x1, t5, t6, t7, t4); \ + filter_8bit(x4, t5, t6, t7, t4); \ + \ + vpxor t4##_x, t4##_x, t4##_x; \ + \ + /* AES subbytes + AES shift rows */ \ + vextracti128 $1, x2, t6##_x; \ + vextracti128 $1, x5, t5##_x; \ + vaesenclast t4##_x, x0##_x, x0##_x; \ + vaesenclast t4##_x, t0##_x, t0##_x; \ + vaesenclast t4##_x, x7##_x, x7##_x; \ + vaesenclast t4##_x, t1##_x, t1##_x; \ + vaesenclast t4##_x, x3##_x, x3##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vaesenclast t4##_x, x6##_x, x6##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t0##_x, x0, x0; \ + vinserti128 $1, t1##_x, x7, x7; \ + vinserti128 $1, t3##_x, x3, x3; \ + vinserti128 $1, t2##_x, x6, x6; \ + vextracti128 $1, x1, t3##_x; \ + vextracti128 $1, x4, t2##_x; \ + vbroadcasti128 .Lpost_tf_lo_s1 rRIP, t0; \ + vbroadcasti128 .Lpost_tf_hi_s1 rRIP, t1; \ + vaesenclast t4##_x, x2##_x, x2##_x; \ + vaesenclast t4##_x, t6##_x, t6##_x; \ + vaesenclast t4##_x, x5##_x, x5##_x; \ + vaesenclast t4##_x, t5##_x, t5##_x; \ + vaesenclast t4##_x, x1##_x, x1##_x; \ + vaesenclast t4##_x, t3##_x, t3##_x; \ + vaesenclast t4##_x, x4##_x, x4##_x; \ + vaesenclast t4##_x, t2##_x, t2##_x; \ + vinserti128 $1, t6##_x, x2, x2; \ + vinserti128 $1, t5##_x, x5, x5; \ + vinserti128 $1, t3##_x, x1, x1; \ + vinserti128 $1, t2##_x, x4, x4; \ + \ + /* postfilter sboxes 1 and 4 */ \ + vbroadcasti128 .Lpost_tf_lo_s3 rRIP, t2; \ + vbroadcasti128 .Lpost_tf_hi_s3 rRIP, t3; \ + filter_8bit(x0, t0, t1, t7, t4); \ + filter_8bit(x7, t0, t1, t7, t4); \ + filter_8bit(x3, t0, t1, t7, t6); \ + filter_8bit(x6, t0, t1, t7, t6); \ + \ + /* postfilter sbox 3 */ \ + vbroadcasti128 .Lpost_tf_lo_s2 rRIP, t4; \ + vbroadcasti128 .Lpost_tf_hi_s2 rRIP, t5; \ + filter_8bit(x2, t2, t3, t7, t6); \ + filter_8bit(x5, t2, t3, t7, t6); \ + \ + vpbroadcastq key, t0; /* higher 64-bit duplicate ignored */ \ + \ + /* postfilter sbox 2 */ \ + filter_8bit(x1, t4, t5, t7, t2); \ + filter_8bit(x4, t4, t5, t7, t2); \ + vpxor t7, t7, t7; \ + \ + vpsrldq $1, t0, t1; \ + vpsrldq $2, t0, t2; \ + vpshufb t7, t1, t1; \ + vpsrldq $3, t0, t3; \ + \ + /* P-function */ \ + vpxor x5, x0, x0; \ + vpxor x6, x1, x1; \ + vpxor x7, x2, x2; \ + vpxor x4, x3, x3; \ + \ + vpshufb t7, t2, t2; \ + vpsrldq $4, t0, t4; \ + vpshufb t7, t3, t3; \ + vpsrldq $5, t0, t5; \ + vpshufb t7, t4, t4; \ + \ + vpxor x2, x4, x4; \ + vpxor x3, x5, x5; \ + vpxor x0, x6, x6; \ + vpxor x1, x7, x7; \ + \ + vpsrldq $6, t0, t6; \ + vpshufb t7, t5, t5; \ + vpshufb t7, t6, t6; \ + \ + vpxor x7, x0, x0; \ + vpxor x4, x1, x1; \ + vpxor x5, x2, x2; \ + vpxor x6, x3, x3; \ + \ + vpxor x3, x4, x4; \ + vpxor x0, x5, x5; \ + vpxor x1, x6, x6; \ + vpxor x2, x7, x7; /* note: high and low parts swapped */ \ + \ + /* Add key material and result to CD (x becomes new CD) */ \ + \ + vpxor t6, x1, x1; \ + vpxor 5 * 32(mem_cd), x1, x1; \ + \ + vpsrldq $7, t0, t6; \ + vpshufb t7, t0, t0; \ + vpshufb t7, t6, t7; \ + \ + vpxor t7, x0, x0; \ + vpxor 4 * 32(mem_cd), x0, x0; \ + \ + vpxor t5, x2, x2; \ + vpxor 6 * 32(mem_cd), x2, x2; \ + \ + vpxor t4, x3, x3; \ + vpxor 7 * 32(mem_cd), x3, x3; \ + \ + vpxor t3, x4, x4; \ + vpxor 0 * 32(mem_cd), x4, x4; \ + \ + vpxor t2, x5, x5; \ + vpxor 1 * 32(mem_cd), x5, x5; \ + \ + vpxor t1, x6, x6; \ + vpxor 2 * 32(mem_cd), x6, x6; \ + \ + vpxor t0, x7, x7; \ + vpxor 3 * 32(mem_cd), x7, x7; + +/* + * IN/OUT: + * x0..x7: byte-sliced AB state preloaded + * mem_ab: byte-sliced AB state in memory + * mem_cb: byte-sliced CD state in memory + */ +#define two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i, dir, store_ab) \ + roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_cd, (key_table + (i) * 8)(CTX)); \ + \ + vmovdqu x0, 4 * 32(mem_cd); \ + vmovdqu x1, 5 * 32(mem_cd); \ + vmovdqu x2, 6 * 32(mem_cd); \ + vmovdqu x3, 7 * 32(mem_cd); \ + vmovdqu x4, 0 * 32(mem_cd); \ + vmovdqu x5, 1 * 32(mem_cd); \ + vmovdqu x6, 2 * 32(mem_cd); \ + vmovdqu x7, 3 * 32(mem_cd); \ + \ + roundsm32(x4, x5, x6, x7, x0, x1, x2, x3, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, (key_table + ((i) + (dir)) * 8)(CTX)); \ + \ + store_ab(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab); + +#define dummy_store(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) /* do nothing */ + +#define store_ab_state(x0, x1, x2, x3, x4, x5, x6, x7, mem_ab) \ + /* Store new AB state */ \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); + +#define enc_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 2, 1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 4, 1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 6, 1, dummy_store); + +#define dec_rounds32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, i) \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 7, -1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 5, -1, store_ab_state); \ + two_roundsm32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd, (i) + 3, -1, dummy_store); + +/* + * IN: + * v0..3: byte-sliced 32-bit integers + * OUT: + * v0..3: (IN <<< 1) + */ +#define rol32_1_32(v0, v1, v2, v3, t0, t1, t2, zero) \ + vpcmpgtb v0, zero, t0; \ + vpaddb v0, v0, v0; \ + vpabsb t0, t0; \ + \ + vpcmpgtb v1, zero, t1; \ + vpaddb v1, v1, v1; \ + vpabsb t1, t1; \ + \ + vpcmpgtb v2, zero, t2; \ + vpaddb v2, v2, v2; \ + vpabsb t2, t2; \ + \ + vpor t0, v1, v1; \ + \ + vpcmpgtb v3, zero, t0; \ + vpaddb v3, v3, v3; \ + vpabsb t0, t0; \ + \ + vpor t1, v2, v2; \ + vpor t2, v3, v3; \ + vpor t0, v0, v0; + +/* + * IN: + * r: byte-sliced AB state in memory + * l: byte-sliced CD state in memory + * OUT: + * x0..x7: new byte-sliced CD state + */ +#define fls32(l, l0, l1, l2, l3, l4, l5, l6, l7, r, t0, t1, t2, t3, tt0, \ + tt1, tt2, tt3, kll, klr, krl, krr) \ + /* \ + * t0 = kll; \ + * t0 &= ll; \ + * lr ^= rol32(t0, 1); \ + */ \ + vpbroadcastd kll, t0; /* only lowest 32-bit used */ \ + vpxor tt0, tt0, tt0; \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand l0, t0, t0; \ + vpand l1, t1, t1; \ + vpand l2, t2, t2; \ + vpand l3, t3, t3; \ + \ + rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor l4, t0, l4; \ + vpbroadcastd krr, t0; /* only lowest 32-bit used */ \ + vmovdqu l4, 4 * 32(l); \ + vpxor l5, t1, l5; \ + vmovdqu l5, 5 * 32(l); \ + vpxor l6, t2, l6; \ + vmovdqu l6, 6 * 32(l); \ + vpxor l7, t3, l7; \ + vmovdqu l7, 7 * 32(l); \ + \ + /* \ + * t2 = krr; \ + * t2 |= rr; \ + * rl ^= t2; \ + */ \ + \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor 4 * 32(r), t0, t0; \ + vpor 5 * 32(r), t1, t1; \ + vpor 6 * 32(r), t2, t2; \ + vpor 7 * 32(r), t3, t3; \ + \ + vpxor 0 * 32(r), t0, t0; \ + vpxor 1 * 32(r), t1, t1; \ + vpxor 2 * 32(r), t2, t2; \ + vpxor 3 * 32(r), t3, t3; \ + vmovdqu t0, 0 * 32(r); \ + vpbroadcastd krl, t0; /* only lowest 32-bit used */ \ + vmovdqu t1, 1 * 32(r); \ + vmovdqu t2, 2 * 32(r); \ + vmovdqu t3, 3 * 32(r); \ + \ + /* \ + * t2 = krl; \ + * t2 &= rl; \ + * rr ^= rol32(t2, 1); \ + */ \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpand 0 * 32(r), t0, t0; \ + vpand 1 * 32(r), t1, t1; \ + vpand 2 * 32(r), t2, t2; \ + vpand 3 * 32(r), t3, t3; \ + \ + rol32_1_32(t3, t2, t1, t0, tt1, tt2, tt3, tt0); \ + \ + vpxor 4 * 32(r), t0, t0; \ + vpxor 5 * 32(r), t1, t1; \ + vpxor 6 * 32(r), t2, t2; \ + vpxor 7 * 32(r), t3, t3; \ + vmovdqu t0, 4 * 32(r); \ + vpbroadcastd klr, t0; /* only lowest 32-bit used */ \ + vmovdqu t1, 5 * 32(r); \ + vmovdqu t2, 6 * 32(r); \ + vmovdqu t3, 7 * 32(r); \ + \ + /* \ + * t0 = klr; \ + * t0 |= lr; \ + * ll ^= t0; \ + */ \ + \ + vpshufb tt0, t0, t3; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t2; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t1; \ + vpsrldq $1, t0, t0; \ + vpshufb tt0, t0, t0; \ + \ + vpor l4, t0, t0; \ + vpor l5, t1, t1; \ + vpor l6, t2, t2; \ + vpor l7, t3, t3; \ + \ + vpxor l0, t0, l0; \ + vmovdqu l0, 0 * 32(l); \ + vpxor l1, t1, l1; \ + vmovdqu l1, 1 * 32(l); \ + vpxor l2, t2, l2; \ + vmovdqu l2, 2 * 32(l); \ + vpxor l3, t3, l3; \ + vmovdqu l3, 3 * 32(l); + +#define transpose_4x4(x0, x1, x2, x3, t1, t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +#define byteslice_16x16b_fast(a0, b0, c0, d0, a1, b1, c1, d1, a2, b2, c2, d2, \ + a3, b3, c3, d3, st0, st1) \ + vmovdqu d2, st0; \ + vmovdqu d3, st1; \ + transpose_4x4(a0, a1, a2, a3, d2, d3); \ + transpose_4x4(b0, b1, b2, b3, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu a0, st0; \ + vmovdqu a1, st1; \ + transpose_4x4(c0, c1, c2, c3, a0, a1); \ + transpose_4x4(d0, d1, d2, d3, a0, a1); \ + \ + vbroadcasti128 .Lshufb_16x16b rRIP, a0; \ + vmovdqu st1, a1; \ + vpshufb a0, a2, a2; \ + vpshufb a0, a3, a3; \ + vpshufb a0, b0, b0; \ + vpshufb a0, b1, b1; \ + vpshufb a0, b2, b2; \ + vpshufb a0, b3, b3; \ + vpshufb a0, a1, a1; \ + vpshufb a0, c0, c0; \ + vpshufb a0, c1, c1; \ + vpshufb a0, c2, c2; \ + vpshufb a0, c3, c3; \ + vpshufb a0, d0, d0; \ + vpshufb a0, d1, d1; \ + vpshufb a0, d2, d2; \ + vpshufb a0, d3, d3; \ + vmovdqu d3, st1; \ + vmovdqu st0, d3; \ + vpshufb a0, d3, a0; \ + vmovdqu d2, st0; \ + \ + transpose_4x4(a0, b0, c0, d0, d2, d3); \ + transpose_4x4(a1, b1, c1, d1, d2, d3); \ + vmovdqu st0, d2; \ + vmovdqu st1, d3; \ + \ + vmovdqu b0, st0; \ + vmovdqu b1, st1; \ + transpose_4x4(a2, b2, c2, d2, b0, b1); \ + transpose_4x4(a3, b3, c3, d3, b0, b1); \ + vmovdqu st0, b0; \ + vmovdqu st1, b1; \ + /* does not adjust output bytes inside vectors */ + +/* load blocks to registers and apply pre-whitening */ +#define inpack32_pre(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio, key) \ + vpbroadcastq key, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ + \ + vpxor 0 * 32(rio), x0, y7; \ + vpxor 1 * 32(rio), x0, y6; \ + vpxor 2 * 32(rio), x0, y5; \ + vpxor 3 * 32(rio), x0, y4; \ + vpxor 4 * 32(rio), x0, y3; \ + vpxor 5 * 32(rio), x0, y2; \ + vpxor 6 * 32(rio), x0, y1; \ + vpxor 7 * 32(rio), x0, y0; \ + vpxor 8 * 32(rio), x0, x7; \ + vpxor 9 * 32(rio), x0, x6; \ + vpxor 10 * 32(rio), x0, x5; \ + vpxor 11 * 32(rio), x0, x4; \ + vpxor 12 * 32(rio), x0, x3; \ + vpxor 13 * 32(rio), x0, x2; \ + vpxor 14 * 32(rio), x0, x1; \ + vpxor 15 * 32(rio), x0, x0; + +/* byteslice pre-whitened blocks and store to temporary memory */ +#define inpack32_post(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, mem_ab, mem_cd) \ + byteslice_16x16b_fast(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, \ + y4, y5, y6, y7, (mem_ab), (mem_cd)); \ + \ + vmovdqu x0, 0 * 32(mem_ab); \ + vmovdqu x1, 1 * 32(mem_ab); \ + vmovdqu x2, 2 * 32(mem_ab); \ + vmovdqu x3, 3 * 32(mem_ab); \ + vmovdqu x4, 4 * 32(mem_ab); \ + vmovdqu x5, 5 * 32(mem_ab); \ + vmovdqu x6, 6 * 32(mem_ab); \ + vmovdqu x7, 7 * 32(mem_ab); \ + vmovdqu y0, 0 * 32(mem_cd); \ + vmovdqu y1, 1 * 32(mem_cd); \ + vmovdqu y2, 2 * 32(mem_cd); \ + vmovdqu y3, 3 * 32(mem_cd); \ + vmovdqu y4, 4 * 32(mem_cd); \ + vmovdqu y5, 5 * 32(mem_cd); \ + vmovdqu y6, 6 * 32(mem_cd); \ + vmovdqu y7, 7 * 32(mem_cd); + +/* de-byteslice, apply post-whitening and store blocks */ +#define outunpack32(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, \ + y5, y6, y7, key, stack_tmp0, stack_tmp1) \ + byteslice_16x16b_fast(y0, y4, x0, x4, y1, y5, x1, x5, y2, y6, x2, x6, \ + y3, y7, x3, x7, stack_tmp0, stack_tmp1); \ + \ + vmovdqu x0, stack_tmp0; \ + \ + vpbroadcastq key, x0; \ + vpshufb .Lpack_bswap rRIP, x0, x0; \ + \ + vpxor x0, y7, y7; \ + vpxor x0, y6, y6; \ + vpxor x0, y5, y5; \ + vpxor x0, y4, y4; \ + vpxor x0, y3, y3; \ + vpxor x0, y2, y2; \ + vpxor x0, y1, y1; \ + vpxor x0, y0, y0; \ + vpxor x0, x7, x7; \ + vpxor x0, x6, x6; \ + vpxor x0, x5, x5; \ + vpxor x0, x4, x4; \ + vpxor x0, x3, x3; \ + vpxor x0, x2, x2; \ + vpxor x0, x1, x1; \ + vpxor stack_tmp0, x0, x0; + +#define write_output(x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, \ + y6, y7, rio) \ + vmovdqu x0, 0 * 32(rio); \ + vmovdqu x1, 1 * 32(rio); \ + vmovdqu x2, 2 * 32(rio); \ + vmovdqu x3, 3 * 32(rio); \ + vmovdqu x4, 4 * 32(rio); \ + vmovdqu x5, 5 * 32(rio); \ + vmovdqu x6, 6 * 32(rio); \ + vmovdqu x7, 7 * 32(rio); \ + vmovdqu y0, 8 * 32(rio); \ + vmovdqu y1, 9 * 32(rio); \ + vmovdqu y2, 10 * 32(rio); \ + vmovdqu y3, 11 * 32(rio); \ + vmovdqu y4, 12 * 32(rio); \ + vmovdqu y5, 13 * 32(rio); \ + vmovdqu y6, 14 * 32(rio); \ + vmovdqu y7, 15 * 32(rio); + +.text +.align 32 + +#define SHUFB_BYTES(idx) \ + 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) + +.Lshufb_16x16b: + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) + +.Lpack_bswap: + .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 + .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 + +/* For CTR-mode IV byteswap */ +.Lbswap128_mask: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox1, sbox2, sbox3: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s1: + .byte 0x45, 0xe8, 0x40, 0xed, 0x2e, 0x83, 0x2b, 0x86 + .byte 0x4b, 0xe6, 0x4e, 0xe3, 0x20, 0x8d, 0x25, 0x88 +.Lpre_tf_hi_s1: + .byte 0x00, 0x51, 0xf1, 0xa0, 0x8a, 0xdb, 0x7b, 0x2a + .byte 0x09, 0x58, 0xf8, 0xa9, 0x83, 0xd2, 0x72, 0x23 + +/* + * pre-SubByte transform + * + * pre-lookup for sbox4: + * swap_bitendianness( + * isom_map_camellia_to_aes( + * camellia_f( + * swap_bitendianess(in <<< 1) + * ) + * ) + * ) + * + * (note: '⊕ 0xc5' inside camellia_f()) + */ +.Lpre_tf_lo_s4: + .byte 0x45, 0x40, 0x2e, 0x2b, 0x4b, 0x4e, 0x20, 0x25 + .byte 0x14, 0x11, 0x7f, 0x7a, 0x1a, 0x1f, 0x71, 0x74 +.Lpre_tf_hi_s4: + .byte 0x00, 0xf1, 0x8a, 0x7b, 0x09, 0xf8, 0x83, 0x72 + .byte 0xad, 0x5c, 0x27, 0xd6, 0xa4, 0x55, 0x2e, 0xdf + +/* + * post-SubByte transform + * + * post-lookup for sbox1, sbox4: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s1: + .byte 0x3c, 0xcc, 0xcf, 0x3f, 0x32, 0xc2, 0xc1, 0x31 + .byte 0xdc, 0x2c, 0x2f, 0xdf, 0xd2, 0x22, 0x21, 0xd1 +.Lpost_tf_hi_s1: + .byte 0x00, 0xf9, 0x86, 0x7f, 0xd7, 0x2e, 0x51, 0xa8 + .byte 0xa4, 0x5d, 0x22, 0xdb, 0x73, 0x8a, 0xf5, 0x0c + +/* + * post-SubByte transform + * + * post-lookup for sbox2: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) <<< 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s2: + .byte 0x78, 0x99, 0x9f, 0x7e, 0x64, 0x85, 0x83, 0x62 + .byte 0xb9, 0x58, 0x5e, 0xbf, 0xa5, 0x44, 0x42, 0xa3 +.Lpost_tf_hi_s2: + .byte 0x00, 0xf3, 0x0d, 0xfe, 0xaf, 0x5c, 0xa2, 0x51 + .byte 0x49, 0xba, 0x44, 0xb7, 0xe6, 0x15, 0xeb, 0x18 + +/* + * post-SubByte transform + * + * post-lookup for sbox3: + * swap_bitendianness( + * camellia_h( + * isom_map_aes_to_camellia( + * swap_bitendianness( + * aes_inverse_affine_transform(in) + * ) + * ) + * ) + * ) >>> 1 + * + * (note: '⊕ 0x6e' inside camellia_h()) + */ +.Lpost_tf_lo_s3: + .byte 0x1e, 0x66, 0xe7, 0x9f, 0x19, 0x61, 0xe0, 0x98 + .byte 0x6e, 0x16, 0x97, 0xef, 0x69, 0x11, 0x90, 0xe8 +.Lpost_tf_hi_s3: + .byte 0x00, 0xfc, 0x43, 0xbf, 0xeb, 0x17, 0xa8, 0x54 + .byte 0x52, 0xae, 0x11, 0xed, 0xb9, 0x45, 0xfa, 0x06 + +/* For isolating SubBytes from AESENCLAST, inverse shift row */ +.Linv_shift_row: + .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b + .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 + +.align 4 +/* 4-bit mask */ +.L0f0f0f0f: + .long 0x0f0f0f0f + + +.align 8 +ELF(.type __camellia_enc_blk32,@function;) + +__camellia_enc_blk32: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 512 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %ymm0..%ymm15: 32 plaintext blocks + * output: + * %ymm0..%ymm15: 32 encrypted blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + CFI_STARTPROC(); + + leaq 8 * 32(%rax), %rcx; + + leaq (-8 * 8)(CTX, %r8, 8), %r8; + + inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx); + +.align 8 +.Lenc_loop: + enc_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 0); + + cmpq %r8, CTX; + je .Lenc_done; + leaq (8 * 8)(CTX), CTX; + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table) + 0)(CTX), + ((key_table) + 4)(CTX), + ((key_table) + 8)(CTX), + ((key_table) + 12)(CTX)); + jmp .Lenc_loop; + +.align 8 +.Lenc_done: + /* load CD for output */ + vmovdqu 0 * 32(%rcx), %ymm8; + vmovdqu 1 * 32(%rcx), %ymm9; + vmovdqu 2 * 32(%rcx), %ymm10; + vmovdqu 3 * 32(%rcx), %ymm11; + vmovdqu 4 * 32(%rcx), %ymm12; + vmovdqu 5 * 32(%rcx), %ymm13; + vmovdqu 6 * 32(%rcx), %ymm14; + vmovdqu 7 * 32(%rcx), %ymm15; + + outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, ((key_table) + 8 * 8)(%r8), (%rax), 1 * 32(%rax)); + + ret; + CFI_ENDPROC(); +ELF(.size __camellia_enc_blk32,.-__camellia_enc_blk32;) + +.align 8 +ELF(.type __camellia_dec_blk32,@function;) + +__camellia_dec_blk32: + /* input: + * %rdi: ctx, CTX + * %rax: temporary storage, 512 bytes + * %r8d: 24 for 16 byte key, 32 for larger + * %ymm0..%ymm15: 16 encrypted blocks + * output: + * %ymm0..%ymm15: 16 plaintext blocks, order swapped: + * 7, 8, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 + */ + CFI_STARTPROC(); + + movq %r8, %rcx; + movq CTX, %r8 + leaq (-8 * 8)(CTX, %rcx, 8), CTX; + + leaq 8 * 32(%rax), %rcx; + + inpack32_post(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx); + +.align 8 +.Ldec_loop: + dec_rounds32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rax, %rcx, 0); + + cmpq %r8, CTX; + je .Ldec_done; + + fls32(%rax, %ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %rcx, %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, + ((key_table) + 8)(CTX), + ((key_table) + 12)(CTX), + ((key_table) + 0)(CTX), + ((key_table) + 4)(CTX)); + + leaq (-8 * 8)(CTX), CTX; + jmp .Ldec_loop; + +.align 8 +.Ldec_done: + /* load CD for output */ + vmovdqu 0 * 32(%rcx), %ymm8; + vmovdqu 1 * 32(%rcx), %ymm9; + vmovdqu 2 * 32(%rcx), %ymm10; + vmovdqu 3 * 32(%rcx), %ymm11; + vmovdqu 4 * 32(%rcx), %ymm12; + vmovdqu 5 * 32(%rcx), %ymm13; + vmovdqu 6 * 32(%rcx), %ymm14; + vmovdqu 7 * 32(%rcx), %ymm15; + + outunpack32(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, (key_table)(CTX), (%rax), 1 * 32(%rax)); + + ret; + CFI_ENDPROC(); +ELF(.size __camellia_dec_blk32,.-__camellia_dec_blk32;) + +#define inc_le128(x, minus_one, tmp) \ + vpcmpeqq minus_one, x, tmp; \ + vpsubq minus_one, x, x; \ + vpslldq $8, tmp, tmp; \ + vpsubq tmp, x, x; + +.align 8 +.globl _gcry_camellia_aesni_avx2_ctr_enc +ELF(.type _gcry_camellia_aesni_avx2_ctr_enc,@function;) + +_gcry_camellia_aesni_avx2_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: iv (big endian, 128bit) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + movq 8(%rcx), %r11; + bswapq %r11; + + vzeroupper; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + subq $(16 * 32), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + vpcmpeqd %ymm15, %ymm15, %ymm15; + vpsrldq $8, %ymm15, %ymm15; /* ab: -1:0 ; cd: -1:0 */ + + /* load IV and byteswap */ + vmovdqu (%rcx), %xmm0; + vpshufb .Lbswap128_mask rRIP, %xmm0, %xmm0; + vmovdqa %xmm0, %xmm1; + inc_le128(%xmm0, %xmm15, %xmm14); + vbroadcasti128 .Lbswap128_mask rRIP, %ymm14; + vinserti128 $1, %xmm0, %ymm1, %ymm0; + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 15 * 32(%rax); + + /* check need for handling 64-bit overflow and carry */ + cmpq $(0xffffffffffffffff - 32), %r11; + ja .Lload_ctr_carry; + + /* construct IVs */ + vpaddq %ymm15, %ymm15, %ymm15; /* ab: -2:0 ; cd: -2:0 */ + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 14 * 32(%rax); + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 13 * 32(%rax); + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm12; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm11; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm10; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm9; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm8; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm7; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm6; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm5; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm4; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm3; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm2; + vpsubq %ymm15, %ymm0, %ymm0; + vpshufb %ymm14, %ymm0, %ymm1; + vpsubq %ymm15, %ymm0, %ymm0; /* +30 ; +31 */ + vpsubq %xmm15, %xmm0, %xmm13; /* +32 */ + vpshufb %ymm14, %ymm0, %ymm0; + vpshufb %xmm14, %xmm13, %xmm13; + vmovdqu %xmm13, (%rcx); + + jmp .Lload_ctr_done; + +.align 4 +.Lload_ctr_carry: + /* construct IVs */ + inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le1 ; cd: le2 */ + inc_le128(%ymm0, %ymm15, %ymm13); /* ab: le2 ; cd: le3 */ + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 14 * 32(%rax); + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm13; + vmovdqu %ymm13, 13 * 32(%rax); + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm12; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm11; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm10; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm9; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm8; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm7; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm6; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm5; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm4; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm3; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm2; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vpshufb %ymm14, %ymm0, %ymm1; + inc_le128(%ymm0, %ymm15, %ymm13); + inc_le128(%ymm0, %ymm15, %ymm13); + vextracti128 $1, %ymm0, %xmm13; + vpshufb %ymm14, %ymm0, %ymm0; + inc_le128(%xmm13, %xmm15, %xmm14); + vpshufb .Lbswap128_mask rRIP, %xmm13, %xmm13; + vmovdqu %xmm13, (%rcx); + +.align 4 +.Lload_ctr_done: + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor 13 * 32(%rax), %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rdx), %ymm7, %ymm7; + vpxor 1 * 32(%rdx), %ymm6, %ymm6; + vpxor 2 * 32(%rdx), %ymm5, %ymm5; + vpxor 3 * 32(%rdx), %ymm4, %ymm4; + vpxor 4 * 32(%rdx), %ymm3, %ymm3; + vpxor 5 * 32(%rdx), %ymm2, %ymm2; + vpxor 6 * 32(%rdx), %ymm1, %ymm1; + vpxor 7 * 32(%rdx), %ymm0, %ymm0; + vpxor 8 * 32(%rdx), %ymm15, %ymm15; + vpxor 9 * 32(%rdx), %ymm14, %ymm14; + vpxor 10 * 32(%rdx), %ymm13, %ymm13; + vpxor 11 * 32(%rdx), %ymm12, %ymm12; + vpxor 12 * 32(%rdx), %ymm11, %ymm11; + vpxor 13 * 32(%rdx), %ymm10, %ymm10; + vpxor 14 * 32(%rdx), %ymm9, %ymm9; + vpxor 15 * 32(%rdx), %ymm8, %ymm8; + leaq 32 * 16(%rdx), %rdx; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx2_ctr_enc,.-_gcry_camellia_aesni_avx2_ctr_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_cbc_dec +ELF(.type _gcry_camellia_aesni_avx2_cbc_dec,@function;) + +_gcry_camellia_aesni_avx2_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: iv + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + movq %rcx, %r9; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + subq $(16 * 32), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + inpack32_pre(%ymm0, %ymm1, %ymm2, %ymm3, %ymm4, %ymm5, %ymm6, %ymm7, + %ymm8, %ymm9, %ymm10, %ymm11, %ymm12, %ymm13, %ymm14, + %ymm15, %rdx, (key_table)(CTX, %r8, 8)); + + call __camellia_dec_blk32; + + /* XOR output with IV */ + vmovdqu %ymm8, (%rax); + vmovdqu (%r9), %xmm8; + vinserti128 $1, (%rdx), %ymm8, %ymm8; + vpxor %ymm8, %ymm7, %ymm7; + vmovdqu (%rax), %ymm8; + vpxor (0 * 32 + 16)(%rdx), %ymm6, %ymm6; + vpxor (1 * 32 + 16)(%rdx), %ymm5, %ymm5; + vpxor (2 * 32 + 16)(%rdx), %ymm4, %ymm4; + vpxor (3 * 32 + 16)(%rdx), %ymm3, %ymm3; + vpxor (4 * 32 + 16)(%rdx), %ymm2, %ymm2; + vpxor (5 * 32 + 16)(%rdx), %ymm1, %ymm1; + vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm0; + vpxor (7 * 32 + 16)(%rdx), %ymm15, %ymm15; + vpxor (8 * 32 + 16)(%rdx), %ymm14, %ymm14; + vpxor (9 * 32 + 16)(%rdx), %ymm13, %ymm13; + vpxor (10 * 32 + 16)(%rdx), %ymm12, %ymm12; + vpxor (11 * 32 + 16)(%rdx), %ymm11, %ymm11; + vpxor (12 * 32 + 16)(%rdx), %ymm10, %ymm10; + vpxor (13 * 32 + 16)(%rdx), %ymm9, %ymm9; + vpxor (14 * 32 + 16)(%rdx), %ymm8, %ymm8; + movq (15 * 32 + 16 + 0)(%rdx), %rax; + movq (15 * 32 + 16 + 8)(%rdx), %rcx; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + /* store new IV */ + movq %rax, (0)(%r9); + movq %rcx, (8)(%r9); + + vzeroall; + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx2_cbc_dec,.-_gcry_camellia_aesni_avx2_cbc_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_cfb_dec +ELF(.type _gcry_camellia_aesni_avx2_cfb_dec,@function;) + +_gcry_camellia_aesni_avx2_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: iv + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %eax; + cmovel %eax, %r8d; /* max */ + + subq $(16 * 32), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm0; + vpshufb .Lpack_bswap rRIP, %ymm0, %ymm0; + vmovdqu (%rcx), %xmm15; + vinserti128 $1, (%rdx), %ymm15, %ymm15; + vpxor %ymm15, %ymm0, %ymm15; + vmovdqu (15 * 32 + 16)(%rdx), %xmm1; + vmovdqu %xmm1, (%rcx); /* store new IV */ + vpxor (0 * 32 + 16)(%rdx), %ymm0, %ymm14; + vpxor (1 * 32 + 16)(%rdx), %ymm0, %ymm13; + vpxor (2 * 32 + 16)(%rdx), %ymm0, %ymm12; + vpxor (3 * 32 + 16)(%rdx), %ymm0, %ymm11; + vpxor (4 * 32 + 16)(%rdx), %ymm0, %ymm10; + vpxor (5 * 32 + 16)(%rdx), %ymm0, %ymm9; + vpxor (6 * 32 + 16)(%rdx), %ymm0, %ymm8; + vpxor (7 * 32 + 16)(%rdx), %ymm0, %ymm7; + vpxor (8 * 32 + 16)(%rdx), %ymm0, %ymm6; + vpxor (9 * 32 + 16)(%rdx), %ymm0, %ymm5; + vpxor (10 * 32 + 16)(%rdx), %ymm0, %ymm4; + vpxor (11 * 32 + 16)(%rdx), %ymm0, %ymm3; + vpxor (12 * 32 + 16)(%rdx), %ymm0, %ymm2; + vpxor (13 * 32 + 16)(%rdx), %ymm0, %ymm1; + vpxor (14 * 32 + 16)(%rdx), %ymm0, %ymm0; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rdx), %ymm7, %ymm7; + vpxor 1 * 32(%rdx), %ymm6, %ymm6; + vpxor 2 * 32(%rdx), %ymm5, %ymm5; + vpxor 3 * 32(%rdx), %ymm4, %ymm4; + vpxor 4 * 32(%rdx), %ymm3, %ymm3; + vpxor 5 * 32(%rdx), %ymm2, %ymm2; + vpxor 6 * 32(%rdx), %ymm1, %ymm1; + vpxor 7 * 32(%rdx), %ymm0, %ymm0; + vpxor 8 * 32(%rdx), %ymm15, %ymm15; + vpxor 9 * 32(%rdx), %ymm14, %ymm14; + vpxor 10 * 32(%rdx), %ymm13, %ymm13; + vpxor 11 * 32(%rdx), %ymm12, %ymm12; + vpxor 12 * 32(%rdx), %ymm11, %ymm11; + vpxor 13 * 32(%rdx), %ymm10, %ymm10; + vpxor 14 * 32(%rdx), %ymm9, %ymm9; + vpxor 15 * 32(%rdx), %ymm8, %ymm8; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx2_cfb_dec,.-_gcry_camellia_aesni_avx2_cfb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_enc +ELF(.type _gcry_camellia_aesni_avx2_ocb_enc,@function;) + +_gcry_camellia_aesni_avx2_ocb_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[32]) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rsp); + movq %r11, (16 * 32 + 1 * 8)(%rsp); + movq %r12, (16 * 32 + 2 * 8)(%rsp); + movq %r13, (16 * 32 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8); + + vmovdqu (%rcx), %xmm14; + vmovdqu (%r8), %xmm13; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm13, %ymm13; \ + vpxor yreg, %ymm15, yreg; \ + vmovdqu %ymm15, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, %ymm0); + vmovdqu %ymm0, (13 * 32)(%rax); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r9), %r10; + movq (17 * 8)(%r9), %r11; + movq (18 * 8)(%r9), %r12; + movq (19 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r9), %r10; + movq (21 * 8)(%r9), %r11; + movq (22 * 8)(%r9), %r12; + movq (23 * 8)(%r9), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r9), %r10; + movq (25 * 8)(%r9), %r11; + movq (26 * 8)(%r9), %r12; + movq (27 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r9), %r10; + movq (29 * 8)(%r9), %r11; + movq (30 * 8)(%r9), %r12; + movq (31 * 8)(%r9), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vextracti128 $1, %ymm13, %xmm15; + vmovdqu %xmm14, (%rcx); + vpxor %xmm13, %xmm15, %xmm15; + vmovdqu %xmm15, (%r8); + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r10d; + cmovel %r10d, %r8d; /* max */ + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor 13 * 32(%rax), %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor 0 * 32(%rsi), %ymm7, %ymm7; + vpxor 1 * 32(%rsi), %ymm6, %ymm6; + vpxor 2 * 32(%rsi), %ymm5, %ymm5; + vpxor 3 * 32(%rsi), %ymm4, %ymm4; + vpxor 4 * 32(%rsi), %ymm3, %ymm3; + vpxor 5 * 32(%rsi), %ymm2, %ymm2; + vpxor 6 * 32(%rsi), %ymm1, %ymm1; + vpxor 7 * 32(%rsi), %ymm0, %ymm0; + vpxor 8 * 32(%rsi), %ymm15, %ymm15; + vpxor 9 * 32(%rsi), %ymm14, %ymm14; + vpxor 10 * 32(%rsi), %ymm13, %ymm13; + vpxor 11 * 32(%rsi), %ymm12, %ymm12; + vpxor 12 * 32(%rsi), %ymm11, %ymm11; + vpxor 13 * 32(%rsi), %ymm10, %ymm10; + vpxor 14 * 32(%rsi), %ymm9, %ymm9; + vpxor 15 * 32(%rsi), %ymm8, %ymm8; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rsp), %r10; + movq (16 * 32 + 1 * 8)(%rsp), %r11; + movq (16 * 32 + 2 * 8)(%rsp), %r12; + movq (16 * 32 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx2_ocb_enc,.-_gcry_camellia_aesni_avx2_ocb_enc;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_dec +ELF(.type _gcry_camellia_aesni_avx2_ocb_dec,@function;) + +_gcry_camellia_aesni_avx2_ocb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (32 blocks) + * %rdx: src (32 blocks) + * %rcx: offset + * %r8 : checksum + * %r9 : L pointers (void *L[32]) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rsp); + movq %r11, (16 * 32 + 1 * 8)(%rsp); + movq %r12, (16 * 32 + 2 * 8)(%rsp); + movq %r13, (16 * 32 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8); + + vmovdqu (%rcx), %xmm14; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rdx), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm15, yreg; \ + vmovdqu %ymm15, (n * 32)(%rsi); + + movq (0 * 8)(%r9), %r10; + movq (1 * 8)(%r9), %r11; + movq (2 * 8)(%r9), %r12; + movq (3 * 8)(%r9), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r9), %r10; + movq (5 * 8)(%r9), %r11; + movq (6 * 8)(%r9), %r12; + movq (7 * 8)(%r9), %r13; + OCB_INPUT(2, %r10, %r11, %ymm13); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r9), %r10; + movq (9 * 8)(%r9), %r11; + movq (10 * 8)(%r9), %r12; + movq (11 * 8)(%r9), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r9), %r10; + movq (13 * 8)(%r9), %r11; + movq (14 * 8)(%r9), %r12; + movq (15 * 8)(%r9), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r9), %r10; + movq (17 * 8)(%r9), %r11; + movq (18 * 8)(%r9), %r12; + movq (19 * 8)(%r9), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r9), %r10; + movq (21 * 8)(%r9), %r11; + movq (22 * 8)(%r9), %r12; + movq (23 * 8)(%r9), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r9), %r10; + movq (25 * 8)(%r9), %r11; + movq (26 * 8)(%r9), %r12; + movq (27 * 8)(%r9), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r9), %r10; + movq (29 * 8)(%r9), %r11; + movq (30 * 8)(%r9), %r12; + movq (31 * 8)(%r9), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rcx); + + movq %r8, %r10; + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r9d; + cmovel %r9d, %r8d; /* max */ + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX, %r8, 8), %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor %ymm13, %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_dec_blk32; + + vpxor 0 * 32(%rsi), %ymm7, %ymm7; + vpxor 1 * 32(%rsi), %ymm6, %ymm6; + vpxor 2 * 32(%rsi), %ymm5, %ymm5; + vpxor 3 * 32(%rsi), %ymm4, %ymm4; + vpxor 4 * 32(%rsi), %ymm3, %ymm3; + vpxor 5 * 32(%rsi), %ymm2, %ymm2; + vpxor 6 * 32(%rsi), %ymm1, %ymm1; + vpxor 7 * 32(%rsi), %ymm0, %ymm0; + vmovdqu %ymm7, (7 * 32)(%rax); + vmovdqu %ymm6, (6 * 32)(%rax); + vpxor 8 * 32(%rsi), %ymm15, %ymm15; + vpxor 9 * 32(%rsi), %ymm14, %ymm14; + vpxor 10 * 32(%rsi), %ymm13, %ymm13; + vpxor 11 * 32(%rsi), %ymm12, %ymm12; + vpxor 12 * 32(%rsi), %ymm11, %ymm11; + vpxor 13 * 32(%rsi), %ymm10, %ymm10; + vpxor 14 * 32(%rsi), %ymm9, %ymm9; + vpxor 15 * 32(%rsi), %ymm8, %ymm8; + + /* Checksum_i = Checksum_{i-1} xor P_i */ + + vpxor %ymm5, %ymm7, %ymm7; + vpxor %ymm4, %ymm6, %ymm6; + vpxor %ymm3, %ymm7, %ymm7; + vpxor %ymm2, %ymm6, %ymm6; + vpxor %ymm1, %ymm7, %ymm7; + vpxor %ymm0, %ymm6, %ymm6; + vpxor %ymm15, %ymm7, %ymm7; + vpxor %ymm14, %ymm6, %ymm6; + vpxor %ymm13, %ymm7, %ymm7; + vpxor %ymm12, %ymm6, %ymm6; + vpxor %ymm11, %ymm7, %ymm7; + vpxor %ymm10, %ymm6, %ymm6; + vpxor %ymm9, %ymm7, %ymm7; + vpxor %ymm8, %ymm6, %ymm6; + vpxor %ymm7, %ymm6, %ymm7; + + vextracti128 $1, %ymm7, %xmm6; + vpxor %xmm6, %xmm7, %xmm7; + vpxor (%r10), %xmm7, %xmm7; + vmovdqu %xmm7, (%r10); + + vmovdqu 7 * 32(%rax), %ymm7; + vmovdqu 6 * 32(%rax), %ymm6; + + write_output(%ymm7, %ymm6, %ymm5, %ymm4, %ymm3, %ymm2, %ymm1, %ymm0, + %ymm15, %ymm14, %ymm13, %ymm12, %ymm11, %ymm10, %ymm9, + %ymm8, %rsi); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rsp), %r10; + movq (16 * 32 + 1 * 8)(%rsp), %r11; + movq (16 * 32 + 2 * 8)(%rsp), %r12; + movq (16 * 32 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx2_ocb_dec,.-_gcry_camellia_aesni_avx2_ocb_dec;) + +.align 8 +.globl _gcry_camellia_aesni_avx2_ocb_auth +ELF(.type _gcry_camellia_aesni_avx2_ocb_auth,@function;) + +_gcry_camellia_aesni_avx2_ocb_auth: + /* input: + * %rdi: ctx, CTX + * %rsi: abuf (16 blocks) + * %rdx: offset + * %rcx: checksum + * %r8 : L pointers (void *L[16]) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(16 * 32 + 4 * 8), %rsp; + andq $~63, %rsp; + movq %rsp, %rax; + + movq %r10, (16 * 32 + 0 * 8)(%rsp); + movq %r11, (16 * 32 + 1 * 8)(%rsp); + movq %r12, (16 * 32 + 2 * 8)(%rsp); + movq %r13, (16 * 32 + 3 * 8)(%rsp); + CFI_REG_ON_STACK(r10, 16 * 32 + 0 * 8); + CFI_REG_ON_STACK(r11, 16 * 32 + 1 * 8); + CFI_REG_ON_STACK(r12, 16 * 32 + 2 * 8); + CFI_REG_ON_STACK(r13, 16 * 32 + 3 * 8); + + vmovdqu (%rdx), %xmm14; + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + /* Checksum_i = Checksum_{i-1} xor P_i */ + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + +#define OCB_INPUT(n, l0reg, l1reg, yreg) \ + vmovdqu (n * 32)(%rsi), yreg; \ + vpxor (l0reg), %xmm14, %xmm15; \ + vpxor (l1reg), %xmm15, %xmm14; \ + vinserti128 $1, %xmm14, %ymm15, %ymm15; \ + vpxor yreg, %ymm15, yreg; + + movq (0 * 8)(%r8), %r10; + movq (1 * 8)(%r8), %r11; + movq (2 * 8)(%r8), %r12; + movq (3 * 8)(%r8), %r13; + OCB_INPUT(0, %r10, %r11, %ymm0); + vmovdqu %ymm0, (15 * 32)(%rax); + OCB_INPUT(1, %r12, %r13, %ymm0); + vmovdqu %ymm0, (14 * 32)(%rax); + movq (4 * 8)(%r8), %r10; + movq (5 * 8)(%r8), %r11; + movq (6 * 8)(%r8), %r12; + movq (7 * 8)(%r8), %r13; + OCB_INPUT(2, %r10, %r11, %ymm13); + OCB_INPUT(3, %r12, %r13, %ymm12); + movq (8 * 8)(%r8), %r10; + movq (9 * 8)(%r8), %r11; + movq (10 * 8)(%r8), %r12; + movq (11 * 8)(%r8), %r13; + OCB_INPUT(4, %r10, %r11, %ymm11); + OCB_INPUT(5, %r12, %r13, %ymm10); + movq (12 * 8)(%r8), %r10; + movq (13 * 8)(%r8), %r11; + movq (14 * 8)(%r8), %r12; + movq (15 * 8)(%r8), %r13; + OCB_INPUT(6, %r10, %r11, %ymm9); + OCB_INPUT(7, %r12, %r13, %ymm8); + movq (16 * 8)(%r8), %r10; + movq (17 * 8)(%r8), %r11; + movq (18 * 8)(%r8), %r12; + movq (19 * 8)(%r8), %r13; + OCB_INPUT(8, %r10, %r11, %ymm7); + OCB_INPUT(9, %r12, %r13, %ymm6); + movq (20 * 8)(%r8), %r10; + movq (21 * 8)(%r8), %r11; + movq (22 * 8)(%r8), %r12; + movq (23 * 8)(%r8), %r13; + OCB_INPUT(10, %r10, %r11, %ymm5); + OCB_INPUT(11, %r12, %r13, %ymm4); + movq (24 * 8)(%r8), %r10; + movq (25 * 8)(%r8), %r11; + movq (26 * 8)(%r8), %r12; + movq (27 * 8)(%r8), %r13; + OCB_INPUT(12, %r10, %r11, %ymm3); + OCB_INPUT(13, %r12, %r13, %ymm2); + movq (28 * 8)(%r8), %r10; + movq (29 * 8)(%r8), %r11; + movq (30 * 8)(%r8), %r12; + movq (31 * 8)(%r8), %r13; + OCB_INPUT(14, %r10, %r11, %ymm1); + OCB_INPUT(15, %r12, %r13, %ymm0); +#undef OCB_INPUT + + vmovdqu %xmm14, (%rdx); + + cmpl $128, key_bitlength(CTX); + movl $32, %r8d; + movl $24, %r10d; + cmovel %r10d, %r8d; /* max */ + + movq %rcx, %r10; + + /* inpack16_pre: */ + vpbroadcastq (key_table)(CTX), %ymm15; + vpshufb .Lpack_bswap rRIP, %ymm15, %ymm15; + vpxor %ymm0, %ymm15, %ymm0; + vpxor %ymm1, %ymm15, %ymm1; + vpxor %ymm2, %ymm15, %ymm2; + vpxor %ymm3, %ymm15, %ymm3; + vpxor %ymm4, %ymm15, %ymm4; + vpxor %ymm5, %ymm15, %ymm5; + vpxor %ymm6, %ymm15, %ymm6; + vpxor %ymm7, %ymm15, %ymm7; + vpxor %ymm8, %ymm15, %ymm8; + vpxor %ymm9, %ymm15, %ymm9; + vpxor %ymm10, %ymm15, %ymm10; + vpxor %ymm11, %ymm15, %ymm11; + vpxor %ymm12, %ymm15, %ymm12; + vpxor %ymm13, %ymm15, %ymm13; + vpxor 14 * 32(%rax), %ymm15, %ymm14; + vpxor 15 * 32(%rax), %ymm15, %ymm15; + + call __camellia_enc_blk32; + + vpxor %ymm7, %ymm6, %ymm6; + vpxor %ymm5, %ymm4, %ymm4; + vpxor %ymm3, %ymm2, %ymm2; + vpxor %ymm1, %ymm0, %ymm0; + vpxor %ymm15, %ymm14, %ymm14; + vpxor %ymm13, %ymm12, %ymm12; + vpxor %ymm11, %ymm10, %ymm10; + vpxor %ymm9, %ymm8, %ymm8; + + vpxor %ymm6, %ymm4, %ymm4; + vpxor %ymm2, %ymm0, %ymm0; + vpxor %ymm14, %ymm12, %ymm12; + vpxor %ymm10, %ymm8, %ymm8; + + vpxor %ymm4, %ymm0, %ymm0; + vpxor %ymm12, %ymm8, %ymm8; + + vpxor %ymm0, %ymm8, %ymm0; + + vextracti128 $1, %ymm0, %xmm1; + vpxor (%r10), %xmm0, %xmm0; + vpxor %xmm0, %xmm1, %xmm0; + vmovdqu %xmm0, (%r10); + + vzeroall; + + movq (16 * 32 + 0 * 8)(%rsp), %r10; + movq (16 * 32 + 1 * 8)(%rsp), %r11; + movq (16 * 32 + 2 * 8)(%rsp), %r12; + movq (16 * 32 + 3 * 8)(%rsp), %r13; + CFI_RESTORE(%r10); + CFI_RESTORE(%r11); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_camellia_aesni_avx2_ocb_auth,.-_gcry_camellia_aesni_avx2_ocb_auth;) + +#endif /*defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/camellia-arm.S b/comm/third_party/libgcrypt/cipher/camellia-arm.S new file mode 100644 index 0000000000..a3d87d1109 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia-arm.S @@ -0,0 +1,626 @@ +/* camellia-arm.S - ARM assembly implementation of Camellia cipher + * + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +/* struct camellia_ctx: */ +#define key_table 0 + +/* register macros */ +#define CTX %r0 +#define RTAB1 %ip +#define RTAB3 %r1 +#define RMASK %lr + +#define IL %r2 +#define IR %r3 + +#define XL %r4 +#define XR %r5 +#define YL %r6 +#define YR %r7 + +#define RT0 %r8 +#define RT1 %r9 +#define RT2 %r10 +#define RT3 %r11 + +/* helper macros */ +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define ldr_input_aligned_be(rin, a, b, c, d, rtmp) \ + ldr a, [rin, #0]; \ + ldr b, [rin, #4]; \ + be_to_host(a, rtmp); \ + ldr c, [rin, #8]; \ + be_to_host(b, rtmp); \ + ldr d, [rin, #12]; \ + be_to_host(c, rtmp); \ + be_to_host(d, rtmp); + +#define str_output_aligned_be(rout, a, b, c, d, rtmp) \ + be_to_host(a, rtmp); \ + be_to_host(b, rtmp); \ + str a, [rout, #0]; \ + be_to_host(c, rtmp); \ + str b, [rout, #4]; \ + be_to_host(d, rtmp); \ + str c, [rout, #8]; \ + str d, [rout, #12]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads/writes allowed */ + #define ldr_input_be(rin, ra, rb, rc, rd, rtmp) \ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp) + + #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0) +#else + /* need to handle unaligned reads/writes by byte reads */ + #define ldr_input_be(rin, ra, rb, rc, rd, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(ra, rin, 0, rtmp0); \ + ldr_unaligned_be(rb, rin, 4, rtmp0); \ + ldr_unaligned_be(rc, rin, 8, rtmp0); \ + ldr_unaligned_be(rd, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + ldr_input_aligned_be(rin, ra, rb, rc, rd, rtmp0); \ + 2:; + + #define str_output_be(rout, ra, rb, rc, rd, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(ra, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(rb, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(rc, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(rd, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + str_output_aligned_be(rout, ra, rb, rc, rd, rtmp0); \ + 2:; +#endif + +/********************************************************************** + 1-way camellia + **********************************************************************/ +#define roundsm(xl, xr, kl, kr, yl, yr) \ + ldr RT2, [CTX, #(key_table + ((kl) * 4))]; \ + and IR, RMASK, xr, lsl#(4); /*sp1110*/ \ + ldr RT3, [CTX, #(key_table + ((kr) * 4))]; \ + and IL, RMASK, xl, lsr#(24 - 4); /*sp1110*/ \ + and RT0, RMASK, xr, lsr#(16 - 4); /*sp3033*/ \ + ldr IR, [RTAB1, IR]; \ + and RT1, RMASK, xl, lsr#(8 - 4); /*sp3033*/ \ + eor yl, RT2; \ + ldr IL, [RTAB1, IL]; \ + eor yr, RT3; \ + \ + ldr RT0, [RTAB3, RT0]; \ + add RTAB1, #4; \ + ldr RT1, [RTAB3, RT1]; \ + add RTAB3, #4; \ + \ + and RT2, RMASK, xr, lsr#(24 - 4); /*sp0222*/ \ + and RT3, RMASK, xl, lsr#(16 - 4); /*sp0222*/ \ + \ + eor IR, RT0; \ + eor IL, RT1; \ + \ + ldr RT2, [RTAB1, RT2]; \ + and RT0, RMASK, xr, lsr#(8 - 4); /*sp4404*/ \ + ldr RT3, [RTAB1, RT3]; \ + and RT1, RMASK, xl, lsl#(4); /*sp4404*/ \ + \ + ldr RT0, [RTAB3, RT0]; \ + sub RTAB1, #4; \ + ldr RT1, [RTAB3, RT1]; \ + sub RTAB3, #4; \ + \ + eor IR, RT2; \ + eor IL, RT3; \ + eor IR, RT0; \ + eor IL, RT1; \ + \ + eor IR, IL; \ + eor yr, yr, IL, ror#8; \ + eor yl, IR; \ + eor yr, IR; + +#define enc_rounds(n) \ + roundsm(XL, XR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, XL, XR); + +#define dec_rounds(n) \ + roundsm(XL, XR, ((n) + 7) * 2 + 0, ((n) + 7) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 6) * 2 + 0, ((n) + 6) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 5) * 2 + 0, ((n) + 5) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 4) * 2 + 0, ((n) + 4) * 2 + 1, XL, XR); \ + roundsm(XL, XR, ((n) + 3) * 2 + 0, ((n) + 3) * 2 + 1, YL, YR); \ + roundsm(YL, YR, ((n) + 2) * 2 + 0, ((n) + 2) * 2 + 1, XL, XR); + +/* perform FL and FL⁻¹ */ +#define fls(ll, lr, rl, rr, kll, klr, krl, krr) \ + ldr RT0, [CTX, #(key_table + ((kll) * 4))]; \ + ldr RT2, [CTX, #(key_table + ((krr) * 4))]; \ + and RT0, ll; \ + ldr RT3, [CTX, #(key_table + ((krl) * 4))]; \ + orr RT2, rr; \ + ldr RT1, [CTX, #(key_table + ((klr) * 4))]; \ + eor rl, RT2; \ + eor lr, lr, RT0, ror#31; \ + and RT3, rl; \ + orr RT1, lr; \ + eor ll, RT1; \ + eor rr, rr, RT3, ror#31; + +#define enc_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 0, (n) * 2 + 1, \ + (n) * 2 + 2, (n) * 2 + 3); + +#define dec_fls(n) \ + fls(XL, XR, YL, YR, \ + (n) * 2 + 2, (n) * 2 + 3, \ + (n) * 2 + 0, (n) * 2 + 1); + +#define inpack(n) \ + ldr_input_be(%r2, XL, XR, YL, YR, RT0); \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor XL, RT0; \ + eor XR, RT1; + +#define outunpack(n) \ + ldr RT0, [CTX, #(key_table + ((n) * 8) + 0)]; \ + ldr RT1, [CTX, #(key_table + ((n) * 8) + 4)]; \ + eor YL, RT0; \ + eor YR, RT1; \ + str_output_be(%r1, YL, YR, XL, XR, RT0, RT1); + +.align 3 +.globl _gcry_camellia_arm_encrypt_block +.type _gcry_camellia_arm_encrypt_block,%function; + +_gcry_camellia_arm_encrypt_block: + /* input: + * %r0: keytable + * %r1: dst + * %r2: src + * %r3: keybitlen + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3); + mov RMASK, #0xff; + add RTAB3, RTAB1, #(2 * 4); + push {%r3}; + mov RMASK, RMASK, lsl#4 /* byte mask */ + + inpack(0); + + enc_rounds(0); + enc_fls(8); + enc_rounds(8); + enc_fls(16); + enc_rounds(16); + + pop {RT0}; + cmp RT0, #(16 * 8); + bne .Lenc_256; + + pop {%r1}; + outunpack(24); + + pop {%r4-%r11, %ip, %pc}; +.ltorg + +.Lenc_256: + enc_fls(24); + enc_rounds(24); + + pop {%r1}; + outunpack(32); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_camellia_arm_encrypt_block,.-_gcry_camellia_arm_encrypt_block; + +.align 3 +.globl _gcry_camellia_arm_decrypt_block +.type _gcry_camellia_arm_decrypt_block,%function; + +_gcry_camellia_arm_decrypt_block: + /* input: + * %r0: keytable + * %r1: dst + * %r2: src + * %r3: keybitlen + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(RTAB1, .Lcamellia_sp1110, RTAB3); + mov RMASK, #0xff; + add RTAB3, RTAB1, #(2 * 4); + mov RMASK, RMASK, lsl#4 /* byte mask */ + + cmp %r3, #(16 * 8); + bne .Ldec_256; + + inpack(24); + +.Ldec_128: + dec_rounds(16); + dec_fls(16); + dec_rounds(8); + dec_fls(8); + dec_rounds(0); + + pop {%r1}; + outunpack(0); + + pop {%r4-%r11, %ip, %pc}; +.ltorg + +.Ldec_256: + inpack(32); + dec_rounds(24); + dec_fls(24); + + b .Ldec_128; +.ltorg +.size _gcry_camellia_arm_decrypt_block,.-_gcry_camellia_arm_decrypt_block; + +.data + +/* Encryption/Decryption tables */ +.align 5 +.Lcamellia_sp1110: +.long 0x70707000 +.Lcamellia_sp0222: + .long 0x00e0e0e0 +.Lcamellia_sp3033: + .long 0x38003838 +.Lcamellia_sp4404: + .long 0x70700070 +.long 0x82828200, 0x00050505, 0x41004141, 0x2c2c002c +.long 0x2c2c2c00, 0x00585858, 0x16001616, 0xb3b300b3 +.long 0xececec00, 0x00d9d9d9, 0x76007676, 0xc0c000c0 +.long 0xb3b3b300, 0x00676767, 0xd900d9d9, 0xe4e400e4 +.long 0x27272700, 0x004e4e4e, 0x93009393, 0x57570057 +.long 0xc0c0c000, 0x00818181, 0x60006060, 0xeaea00ea +.long 0xe5e5e500, 0x00cbcbcb, 0xf200f2f2, 0xaeae00ae +.long 0xe4e4e400, 0x00c9c9c9, 0x72007272, 0x23230023 +.long 0x85858500, 0x000b0b0b, 0xc200c2c2, 0x6b6b006b +.long 0x57575700, 0x00aeaeae, 0xab00abab, 0x45450045 +.long 0x35353500, 0x006a6a6a, 0x9a009a9a, 0xa5a500a5 +.long 0xeaeaea00, 0x00d5d5d5, 0x75007575, 0xeded00ed +.long 0x0c0c0c00, 0x00181818, 0x06000606, 0x4f4f004f +.long 0xaeaeae00, 0x005d5d5d, 0x57005757, 0x1d1d001d +.long 0x41414100, 0x00828282, 0xa000a0a0, 0x92920092 +.long 0x23232300, 0x00464646, 0x91009191, 0x86860086 +.long 0xefefef00, 0x00dfdfdf, 0xf700f7f7, 0xafaf00af +.long 0x6b6b6b00, 0x00d6d6d6, 0xb500b5b5, 0x7c7c007c +.long 0x93939300, 0x00272727, 0xc900c9c9, 0x1f1f001f +.long 0x45454500, 0x008a8a8a, 0xa200a2a2, 0x3e3e003e +.long 0x19191900, 0x00323232, 0x8c008c8c, 0xdcdc00dc +.long 0xa5a5a500, 0x004b4b4b, 0xd200d2d2, 0x5e5e005e +.long 0x21212100, 0x00424242, 0x90009090, 0x0b0b000b +.long 0xededed00, 0x00dbdbdb, 0xf600f6f6, 0xa6a600a6 +.long 0x0e0e0e00, 0x001c1c1c, 0x07000707, 0x39390039 +.long 0x4f4f4f00, 0x009e9e9e, 0xa700a7a7, 0xd5d500d5 +.long 0x4e4e4e00, 0x009c9c9c, 0x27002727, 0x5d5d005d +.long 0x1d1d1d00, 0x003a3a3a, 0x8e008e8e, 0xd9d900d9 +.long 0x65656500, 0x00cacaca, 0xb200b2b2, 0x5a5a005a +.long 0x92929200, 0x00252525, 0x49004949, 0x51510051 +.long 0xbdbdbd00, 0x007b7b7b, 0xde00dede, 0x6c6c006c +.long 0x86868600, 0x000d0d0d, 0x43004343, 0x8b8b008b +.long 0xb8b8b800, 0x00717171, 0x5c005c5c, 0x9a9a009a +.long 0xafafaf00, 0x005f5f5f, 0xd700d7d7, 0xfbfb00fb +.long 0x8f8f8f00, 0x001f1f1f, 0xc700c7c7, 0xb0b000b0 +.long 0x7c7c7c00, 0x00f8f8f8, 0x3e003e3e, 0x74740074 +.long 0xebebeb00, 0x00d7d7d7, 0xf500f5f5, 0x2b2b002b +.long 0x1f1f1f00, 0x003e3e3e, 0x8f008f8f, 0xf0f000f0 +.long 0xcecece00, 0x009d9d9d, 0x67006767, 0x84840084 +.long 0x3e3e3e00, 0x007c7c7c, 0x1f001f1f, 0xdfdf00df +.long 0x30303000, 0x00606060, 0x18001818, 0xcbcb00cb +.long 0xdcdcdc00, 0x00b9b9b9, 0x6e006e6e, 0x34340034 +.long 0x5f5f5f00, 0x00bebebe, 0xaf00afaf, 0x76760076 +.long 0x5e5e5e00, 0x00bcbcbc, 0x2f002f2f, 0x6d6d006d +.long 0xc5c5c500, 0x008b8b8b, 0xe200e2e2, 0xa9a900a9 +.long 0x0b0b0b00, 0x00161616, 0x85008585, 0xd1d100d1 +.long 0x1a1a1a00, 0x00343434, 0x0d000d0d, 0x04040004 +.long 0xa6a6a600, 0x004d4d4d, 0x53005353, 0x14140014 +.long 0xe1e1e100, 0x00c3c3c3, 0xf000f0f0, 0x3a3a003a +.long 0x39393900, 0x00727272, 0x9c009c9c, 0xdede00de +.long 0xcacaca00, 0x00959595, 0x65006565, 0x11110011 +.long 0xd5d5d500, 0x00ababab, 0xea00eaea, 0x32320032 +.long 0x47474700, 0x008e8e8e, 0xa300a3a3, 0x9c9c009c +.long 0x5d5d5d00, 0x00bababa, 0xae00aeae, 0x53530053 +.long 0x3d3d3d00, 0x007a7a7a, 0x9e009e9e, 0xf2f200f2 +.long 0xd9d9d900, 0x00b3b3b3, 0xec00ecec, 0xfefe00fe +.long 0x01010100, 0x00020202, 0x80008080, 0xcfcf00cf +.long 0x5a5a5a00, 0x00b4b4b4, 0x2d002d2d, 0xc3c300c3 +.long 0xd6d6d600, 0x00adadad, 0x6b006b6b, 0x7a7a007a +.long 0x51515100, 0x00a2a2a2, 0xa800a8a8, 0x24240024 +.long 0x56565600, 0x00acacac, 0x2b002b2b, 0xe8e800e8 +.long 0x6c6c6c00, 0x00d8d8d8, 0x36003636, 0x60600060 +.long 0x4d4d4d00, 0x009a9a9a, 0xa600a6a6, 0x69690069 +.long 0x8b8b8b00, 0x00171717, 0xc500c5c5, 0xaaaa00aa +.long 0x0d0d0d00, 0x001a1a1a, 0x86008686, 0xa0a000a0 +.long 0x9a9a9a00, 0x00353535, 0x4d004d4d, 0xa1a100a1 +.long 0x66666600, 0x00cccccc, 0x33003333, 0x62620062 +.long 0xfbfbfb00, 0x00f7f7f7, 0xfd00fdfd, 0x54540054 +.long 0xcccccc00, 0x00999999, 0x66006666, 0x1e1e001e +.long 0xb0b0b000, 0x00616161, 0x58005858, 0xe0e000e0 +.long 0x2d2d2d00, 0x005a5a5a, 0x96009696, 0x64640064 +.long 0x74747400, 0x00e8e8e8, 0x3a003a3a, 0x10100010 +.long 0x12121200, 0x00242424, 0x09000909, 0x00000000 +.long 0x2b2b2b00, 0x00565656, 0x95009595, 0xa3a300a3 +.long 0x20202000, 0x00404040, 0x10001010, 0x75750075 +.long 0xf0f0f000, 0x00e1e1e1, 0x78007878, 0x8a8a008a +.long 0xb1b1b100, 0x00636363, 0xd800d8d8, 0xe6e600e6 +.long 0x84848400, 0x00090909, 0x42004242, 0x09090009 +.long 0x99999900, 0x00333333, 0xcc00cccc, 0xdddd00dd +.long 0xdfdfdf00, 0x00bfbfbf, 0xef00efef, 0x87870087 +.long 0x4c4c4c00, 0x00989898, 0x26002626, 0x83830083 +.long 0xcbcbcb00, 0x00979797, 0xe500e5e5, 0xcdcd00cd +.long 0xc2c2c200, 0x00858585, 0x61006161, 0x90900090 +.long 0x34343400, 0x00686868, 0x1a001a1a, 0x73730073 +.long 0x7e7e7e00, 0x00fcfcfc, 0x3f003f3f, 0xf6f600f6 +.long 0x76767600, 0x00ececec, 0x3b003b3b, 0x9d9d009d +.long 0x05050500, 0x000a0a0a, 0x82008282, 0xbfbf00bf +.long 0x6d6d6d00, 0x00dadada, 0xb600b6b6, 0x52520052 +.long 0xb7b7b700, 0x006f6f6f, 0xdb00dbdb, 0xd8d800d8 +.long 0xa9a9a900, 0x00535353, 0xd400d4d4, 0xc8c800c8 +.long 0x31313100, 0x00626262, 0x98009898, 0xc6c600c6 +.long 0xd1d1d100, 0x00a3a3a3, 0xe800e8e8, 0x81810081 +.long 0x17171700, 0x002e2e2e, 0x8b008b8b, 0x6f6f006f +.long 0x04040400, 0x00080808, 0x02000202, 0x13130013 +.long 0xd7d7d700, 0x00afafaf, 0xeb00ebeb, 0x63630063 +.long 0x14141400, 0x00282828, 0x0a000a0a, 0xe9e900e9 +.long 0x58585800, 0x00b0b0b0, 0x2c002c2c, 0xa7a700a7 +.long 0x3a3a3a00, 0x00747474, 0x1d001d1d, 0x9f9f009f +.long 0x61616100, 0x00c2c2c2, 0xb000b0b0, 0xbcbc00bc +.long 0xdedede00, 0x00bdbdbd, 0x6f006f6f, 0x29290029 +.long 0x1b1b1b00, 0x00363636, 0x8d008d8d, 0xf9f900f9 +.long 0x11111100, 0x00222222, 0x88008888, 0x2f2f002f +.long 0x1c1c1c00, 0x00383838, 0x0e000e0e, 0xb4b400b4 +.long 0x32323200, 0x00646464, 0x19001919, 0x78780078 +.long 0x0f0f0f00, 0x001e1e1e, 0x87008787, 0x06060006 +.long 0x9c9c9c00, 0x00393939, 0x4e004e4e, 0xe7e700e7 +.long 0x16161600, 0x002c2c2c, 0x0b000b0b, 0x71710071 +.long 0x53535300, 0x00a6a6a6, 0xa900a9a9, 0xd4d400d4 +.long 0x18181800, 0x00303030, 0x0c000c0c, 0xabab00ab +.long 0xf2f2f200, 0x00e5e5e5, 0x79007979, 0x88880088 +.long 0x22222200, 0x00444444, 0x11001111, 0x8d8d008d +.long 0xfefefe00, 0x00fdfdfd, 0x7f007f7f, 0x72720072 +.long 0x44444400, 0x00888888, 0x22002222, 0xb9b900b9 +.long 0xcfcfcf00, 0x009f9f9f, 0xe700e7e7, 0xf8f800f8 +.long 0xb2b2b200, 0x00656565, 0x59005959, 0xacac00ac +.long 0xc3c3c300, 0x00878787, 0xe100e1e1, 0x36360036 +.long 0xb5b5b500, 0x006b6b6b, 0xda00dada, 0x2a2a002a +.long 0x7a7a7a00, 0x00f4f4f4, 0x3d003d3d, 0x3c3c003c +.long 0x91919100, 0x00232323, 0xc800c8c8, 0xf1f100f1 +.long 0x24242400, 0x00484848, 0x12001212, 0x40400040 +.long 0x08080800, 0x00101010, 0x04000404, 0xd3d300d3 +.long 0xe8e8e800, 0x00d1d1d1, 0x74007474, 0xbbbb00bb +.long 0xa8a8a800, 0x00515151, 0x54005454, 0x43430043 +.long 0x60606000, 0x00c0c0c0, 0x30003030, 0x15150015 +.long 0xfcfcfc00, 0x00f9f9f9, 0x7e007e7e, 0xadad00ad +.long 0x69696900, 0x00d2d2d2, 0xb400b4b4, 0x77770077 +.long 0x50505000, 0x00a0a0a0, 0x28002828, 0x80800080 +.long 0xaaaaaa00, 0x00555555, 0x55005555, 0x82820082 +.long 0xd0d0d000, 0x00a1a1a1, 0x68006868, 0xecec00ec +.long 0xa0a0a000, 0x00414141, 0x50005050, 0x27270027 +.long 0x7d7d7d00, 0x00fafafa, 0xbe00bebe, 0xe5e500e5 +.long 0xa1a1a100, 0x00434343, 0xd000d0d0, 0x85850085 +.long 0x89898900, 0x00131313, 0xc400c4c4, 0x35350035 +.long 0x62626200, 0x00c4c4c4, 0x31003131, 0x0c0c000c +.long 0x97979700, 0x002f2f2f, 0xcb00cbcb, 0x41410041 +.long 0x54545400, 0x00a8a8a8, 0x2a002a2a, 0xefef00ef +.long 0x5b5b5b00, 0x00b6b6b6, 0xad00adad, 0x93930093 +.long 0x1e1e1e00, 0x003c3c3c, 0x0f000f0f, 0x19190019 +.long 0x95959500, 0x002b2b2b, 0xca00caca, 0x21210021 +.long 0xe0e0e000, 0x00c1c1c1, 0x70007070, 0x0e0e000e +.long 0xffffff00, 0x00ffffff, 0xff00ffff, 0x4e4e004e +.long 0x64646400, 0x00c8c8c8, 0x32003232, 0x65650065 +.long 0xd2d2d200, 0x00a5a5a5, 0x69006969, 0xbdbd00bd +.long 0x10101000, 0x00202020, 0x08000808, 0xb8b800b8 +.long 0xc4c4c400, 0x00898989, 0x62006262, 0x8f8f008f +.long 0x00000000, 0x00000000, 0x00000000, 0xebeb00eb +.long 0x48484800, 0x00909090, 0x24002424, 0xcece00ce +.long 0xa3a3a300, 0x00474747, 0xd100d1d1, 0x30300030 +.long 0xf7f7f700, 0x00efefef, 0xfb00fbfb, 0x5f5f005f +.long 0x75757500, 0x00eaeaea, 0xba00baba, 0xc5c500c5 +.long 0xdbdbdb00, 0x00b7b7b7, 0xed00eded, 0x1a1a001a +.long 0x8a8a8a00, 0x00151515, 0x45004545, 0xe1e100e1 +.long 0x03030300, 0x00060606, 0x81008181, 0xcaca00ca +.long 0xe6e6e600, 0x00cdcdcd, 0x73007373, 0x47470047 +.long 0xdadada00, 0x00b5b5b5, 0x6d006d6d, 0x3d3d003d +.long 0x09090900, 0x00121212, 0x84008484, 0x01010001 +.long 0x3f3f3f00, 0x007e7e7e, 0x9f009f9f, 0xd6d600d6 +.long 0xdddddd00, 0x00bbbbbb, 0xee00eeee, 0x56560056 +.long 0x94949400, 0x00292929, 0x4a004a4a, 0x4d4d004d +.long 0x87878700, 0x000f0f0f, 0xc300c3c3, 0x0d0d000d +.long 0x5c5c5c00, 0x00b8b8b8, 0x2e002e2e, 0x66660066 +.long 0x83838300, 0x00070707, 0xc100c1c1, 0xcccc00cc +.long 0x02020200, 0x00040404, 0x01000101, 0x2d2d002d +.long 0xcdcdcd00, 0x009b9b9b, 0xe600e6e6, 0x12120012 +.long 0x4a4a4a00, 0x00949494, 0x25002525, 0x20200020 +.long 0x90909000, 0x00212121, 0x48004848, 0xb1b100b1 +.long 0x33333300, 0x00666666, 0x99009999, 0x99990099 +.long 0x73737300, 0x00e6e6e6, 0xb900b9b9, 0x4c4c004c +.long 0x67676700, 0x00cecece, 0xb300b3b3, 0xc2c200c2 +.long 0xf6f6f600, 0x00ededed, 0x7b007b7b, 0x7e7e007e +.long 0xf3f3f300, 0x00e7e7e7, 0xf900f9f9, 0x05050005 +.long 0x9d9d9d00, 0x003b3b3b, 0xce00cece, 0xb7b700b7 +.long 0x7f7f7f00, 0x00fefefe, 0xbf00bfbf, 0x31310031 +.long 0xbfbfbf00, 0x007f7f7f, 0xdf00dfdf, 0x17170017 +.long 0xe2e2e200, 0x00c5c5c5, 0x71007171, 0xd7d700d7 +.long 0x52525200, 0x00a4a4a4, 0x29002929, 0x58580058 +.long 0x9b9b9b00, 0x00373737, 0xcd00cdcd, 0x61610061 +.long 0xd8d8d800, 0x00b1b1b1, 0x6c006c6c, 0x1b1b001b +.long 0x26262600, 0x004c4c4c, 0x13001313, 0x1c1c001c +.long 0xc8c8c800, 0x00919191, 0x64006464, 0x0f0f000f +.long 0x37373700, 0x006e6e6e, 0x9b009b9b, 0x16160016 +.long 0xc6c6c600, 0x008d8d8d, 0x63006363, 0x18180018 +.long 0x3b3b3b00, 0x00767676, 0x9d009d9d, 0x22220022 +.long 0x81818100, 0x00030303, 0xc000c0c0, 0x44440044 +.long 0x96969600, 0x002d2d2d, 0x4b004b4b, 0xb2b200b2 +.long 0x6f6f6f00, 0x00dedede, 0xb700b7b7, 0xb5b500b5 +.long 0x4b4b4b00, 0x00969696, 0xa500a5a5, 0x91910091 +.long 0x13131300, 0x00262626, 0x89008989, 0x08080008 +.long 0xbebebe00, 0x007d7d7d, 0x5f005f5f, 0xa8a800a8 +.long 0x63636300, 0x00c6c6c6, 0xb100b1b1, 0xfcfc00fc +.long 0x2e2e2e00, 0x005c5c5c, 0x17001717, 0x50500050 +.long 0xe9e9e900, 0x00d3d3d3, 0xf400f4f4, 0xd0d000d0 +.long 0x79797900, 0x00f2f2f2, 0xbc00bcbc, 0x7d7d007d +.long 0xa7a7a700, 0x004f4f4f, 0xd300d3d3, 0x89890089 +.long 0x8c8c8c00, 0x00191919, 0x46004646, 0x97970097 +.long 0x9f9f9f00, 0x003f3f3f, 0xcf00cfcf, 0x5b5b005b +.long 0x6e6e6e00, 0x00dcdcdc, 0x37003737, 0x95950095 +.long 0xbcbcbc00, 0x00797979, 0x5e005e5e, 0xffff00ff +.long 0x8e8e8e00, 0x001d1d1d, 0x47004747, 0xd2d200d2 +.long 0x29292900, 0x00525252, 0x94009494, 0xc4c400c4 +.long 0xf5f5f500, 0x00ebebeb, 0xfa00fafa, 0x48480048 +.long 0xf9f9f900, 0x00f3f3f3, 0xfc00fcfc, 0xf7f700f7 +.long 0xb6b6b600, 0x006d6d6d, 0x5b005b5b, 0xdbdb00db +.long 0x2f2f2f00, 0x005e5e5e, 0x97009797, 0x03030003 +.long 0xfdfdfd00, 0x00fbfbfb, 0xfe00fefe, 0xdada00da +.long 0xb4b4b400, 0x00696969, 0x5a005a5a, 0x3f3f003f +.long 0x59595900, 0x00b2b2b2, 0xac00acac, 0x94940094 +.long 0x78787800, 0x00f0f0f0, 0x3c003c3c, 0x5c5c005c +.long 0x98989800, 0x00313131, 0x4c004c4c, 0x02020002 +.long 0x06060600, 0x000c0c0c, 0x03000303, 0x4a4a004a +.long 0x6a6a6a00, 0x00d4d4d4, 0x35003535, 0x33330033 +.long 0xe7e7e700, 0x00cfcfcf, 0xf300f3f3, 0x67670067 +.long 0x46464600, 0x008c8c8c, 0x23002323, 0xf3f300f3 +.long 0x71717100, 0x00e2e2e2, 0xb800b8b8, 0x7f7f007f +.long 0xbababa00, 0x00757575, 0x5d005d5d, 0xe2e200e2 +.long 0xd4d4d400, 0x00a9a9a9, 0x6a006a6a, 0x9b9b009b +.long 0x25252500, 0x004a4a4a, 0x92009292, 0x26260026 +.long 0xababab00, 0x00575757, 0xd500d5d5, 0x37370037 +.long 0x42424200, 0x00848484, 0x21002121, 0x3b3b003b +.long 0x88888800, 0x00111111, 0x44004444, 0x96960096 +.long 0xa2a2a200, 0x00454545, 0x51005151, 0x4b4b004b +.long 0x8d8d8d00, 0x001b1b1b, 0xc600c6c6, 0xbebe00be +.long 0xfafafa00, 0x00f5f5f5, 0x7d007d7d, 0x2e2e002e +.long 0x72727200, 0x00e4e4e4, 0x39003939, 0x79790079 +.long 0x07070700, 0x000e0e0e, 0x83008383, 0x8c8c008c +.long 0xb9b9b900, 0x00737373, 0xdc00dcdc, 0x6e6e006e +.long 0x55555500, 0x00aaaaaa, 0xaa00aaaa, 0x8e8e008e +.long 0xf8f8f800, 0x00f1f1f1, 0x7c007c7c, 0xf5f500f5 +.long 0xeeeeee00, 0x00dddddd, 0x77007777, 0xb6b600b6 +.long 0xacacac00, 0x00595959, 0x56005656, 0xfdfd00fd +.long 0x0a0a0a00, 0x00141414, 0x05000505, 0x59590059 +.long 0x36363600, 0x006c6c6c, 0x1b001b1b, 0x98980098 +.long 0x49494900, 0x00929292, 0xa400a4a4, 0x6a6a006a +.long 0x2a2a2a00, 0x00545454, 0x15001515, 0x46460046 +.long 0x68686800, 0x00d0d0d0, 0x34003434, 0xbaba00ba +.long 0x3c3c3c00, 0x00787878, 0x1e001e1e, 0x25250025 +.long 0x38383800, 0x00707070, 0x1c001c1c, 0x42420042 +.long 0xf1f1f100, 0x00e3e3e3, 0xf800f8f8, 0xa2a200a2 +.long 0xa4a4a400, 0x00494949, 0x52005252, 0xfafa00fa +.long 0x40404000, 0x00808080, 0x20002020, 0x07070007 +.long 0x28282800, 0x00505050, 0x14001414, 0x55550055 +.long 0xd3d3d300, 0x00a7a7a7, 0xe900e9e9, 0xeeee00ee +.long 0x7b7b7b00, 0x00f6f6f6, 0xbd00bdbd, 0x0a0a000a +.long 0xbbbbbb00, 0x00777777, 0xdd00dddd, 0x49490049 +.long 0xc9c9c900, 0x00939393, 0xe400e4e4, 0x68680068 +.long 0x43434300, 0x00868686, 0xa100a1a1, 0x38380038 +.long 0xc1c1c100, 0x00838383, 0xe000e0e0, 0xa4a400a4 +.long 0x15151500, 0x002a2a2a, 0x8a008a8a, 0x28280028 +.long 0xe3e3e300, 0x00c7c7c7, 0xf100f1f1, 0x7b7b007b +.long 0xadadad00, 0x005b5b5b, 0xd600d6d6, 0xc9c900c9 +.long 0xf4f4f400, 0x00e9e9e9, 0x7a007a7a, 0xc1c100c1 +.long 0x77777700, 0x00eeeeee, 0xbb00bbbb, 0xe3e300e3 +.long 0xc7c7c700, 0x008f8f8f, 0xe300e3e3, 0xf4f400f4 +.long 0x80808000, 0x00010101, 0x40004040, 0xc7c700c7 +.long 0x9e9e9e00, 0x003d3d3d, 0x4f004f4f, 0x9e9e009e + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/comm/third_party/libgcrypt/cipher/camellia-glue.c b/comm/third_party/libgcrypt/cipher/camellia-glue.c new file mode 100644 index 0000000000..6577b6516a --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia-glue.c @@ -0,0 +1,1097 @@ +/* camellia-glue.c - Glue for the Camellia cipher + * Copyright (C) 2007 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +/* I put all the libgcrypt-specific stuff in this file to keep the + camellia.c/camellia.h files exactly as provided by NTT. If they + update their code, this should make it easier to bring the changes + in. - dshaw + + There is one small change which needs to be done: Include the + following code at the top of camellia.h: */ +#if 0 + +/* To use Camellia with libraries it is often useful to keep the name + * space of the library clean. The following macro is thus useful: + * + * #define CAMELLIA_EXT_SYM_PREFIX foo_ + * + * This prefixes all external symbols with "foo_". + */ +#ifdef HAVE_CONFIG_H +#include +#endif +#ifdef CAMELLIA_EXT_SYM_PREFIX +#define CAMELLIA_PREFIX1(x,y) x ## y +#define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y) +#define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x) +#define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen) +#define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock) +#define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock) +#define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128) +#define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256) +#define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128) +#define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256) +#define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128) +#define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192) +#define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256) +#endif /*CAMELLIA_EXT_SYM_PREFIX*/ + +#endif /* Code sample. */ + + +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "camellia.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + +/* USE_AESNI inidicates whether to compile with Intel AES-NI/AVX code. */ +#undef USE_AESNI_AVX +#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX_SUPPORT) +# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AESNI_AVX 1 +# endif +#endif + +/* USE_AESNI_AVX2 inidicates whether to compile with Intel AES-NI/AVX2 code. */ +#undef USE_AESNI_AVX2 +#if defined(ENABLE_AESNI_SUPPORT) && defined(ENABLE_AVX2_SUPPORT) +# if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AESNI_AVX2 1 +# endif +#endif + +typedef struct +{ + KEY_TABLE_TYPE keytable; + int keybitlength; +#ifdef USE_AESNI_AVX + unsigned int use_aesni_avx:1; /* AES-NI/AVX implementation shall be used. */ +#endif /*USE_AESNI_AVX*/ +#ifdef USE_AESNI_AVX2 + unsigned int use_aesni_avx2:1;/* AES-NI/AVX2 implementation shall be used. */ +#endif /*USE_AESNI_AVX2*/ +} CAMELLIA_context; + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) +# ifdef HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +# define ASM_EXTRA_STACK (10 * 16) +# else +# define ASM_FUNC_ABI +# define ASM_EXTRA_STACK 0 +# endif +#endif + +#ifdef USE_AESNI_AVX +/* Assembler implementations of Camellia using AES-NI and AVX. Process data + in 16 block same time. + */ +extern void _gcry_camellia_aesni_avx_ctr_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_cbc_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_cfb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_ocb_auth(CAMELLIA_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[16]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx_keygen(CAMELLIA_context *ctx, + const unsigned char *key, + unsigned int keylen) ASM_FUNC_ABI; +#endif + +#ifdef USE_AESNI_AVX2 +/* Assembler implementations of Camellia using AES-NI and AVX2. Process data + in 32 block same time. + */ +extern void _gcry_camellia_aesni_avx2_ctr_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *ctr) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_cbc_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_cfb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *iv) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_enc(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[32]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_dec(CAMELLIA_context *ctx, + unsigned char *out, + const unsigned char *in, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[32]) ASM_FUNC_ABI; + +extern void _gcry_camellia_aesni_avx2_ocb_auth(CAMELLIA_context *ctx, + const unsigned char *abuf, + unsigned char *offset, + unsigned char *checksum, + const u64 Ls[32]) ASM_FUNC_ABI; +#endif + +static const char *selftest(void); + +static void _gcry_camellia_ctr_enc (void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +static void _gcry_camellia_cbc_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +static void _gcry_camellia_cfb_dec (void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks); +static size_t _gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, + int encrypt); +static size_t _gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks); + +static gcry_err_code_t +camellia_setkey(void *c, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops) +{ + CAMELLIA_context *ctx=c; + static int initialized=0; + static const char *selftest_failed=NULL; +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + unsigned int hwf = _gcry_get_hw_features (); +#endif + + if(keylen!=16 && keylen!=24 && keylen!=32) + return GPG_ERR_INV_KEYLEN; + + if(!initialized) + { + initialized=1; + selftest_failed=selftest(); + if(selftest_failed) + log_error("%s\n",selftest_failed); + } + + if(selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + +#ifdef USE_AESNI_AVX + ctx->use_aesni_avx = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX); +#endif +#ifdef USE_AESNI_AVX2 + ctx->use_aesni_avx2 = (hwf & HWF_INTEL_AESNI) && (hwf & HWF_INTEL_AVX2); +#endif + + ctx->keybitlength=keylen*8; + + /* Setup bulk encryption routines. */ + memset (bulk_ops, 0, sizeof(*bulk_ops)); + bulk_ops->cbc_dec = _gcry_camellia_cbc_dec; + bulk_ops->cfb_dec = _gcry_camellia_cfb_dec; + bulk_ops->ctr_enc = _gcry_camellia_ctr_enc; + bulk_ops->ocb_crypt = _gcry_camellia_ocb_crypt; + bulk_ops->ocb_auth = _gcry_camellia_ocb_auth; + + if (0) + { } +#ifdef USE_AESNI_AVX + else if (ctx->use_aesni_avx) + _gcry_camellia_aesni_avx_keygen(ctx, key, keylen); + else +#endif + { + Camellia_Ekeygen(ctx->keybitlength,key,ctx->keytable); + _gcry_burn_stack + ((19+34+34)*sizeof(u32)+2*sizeof(void*) /* camellia_setup256 */ + +(4+32)*sizeof(u32)+2*sizeof(void*) /* camellia_setup192 */ + +0+sizeof(int)+2*sizeof(void*) /* Camellia_Ekeygen */ + +3*2*sizeof(void*) /* Function calls. */ + ); + } + + return 0; +} + +#ifdef USE_ARM_ASM + +/* Assembly implementations of Camellia. */ +extern void _gcry_camellia_arm_encrypt_block(const KEY_TABLE_TYPE keyTable, + byte *outbuf, const byte *inbuf, + const int keybits); + +extern void _gcry_camellia_arm_decrypt_block(const KEY_TABLE_TYPE keyTable, + byte *outbuf, const byte *inbuf, + const int keybits); + +static void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *cipherText) +{ + _gcry_camellia_arm_encrypt_block(keyTable, cipherText, plaintext, + keyBitLength); +} + +static void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *cipherText, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext) +{ + _gcry_camellia_arm_decrypt_block(keyTable, plaintext, cipherText, + keyBitLength); +} + +#ifdef __aarch64__ +# define CAMELLIA_encrypt_stack_burn_size (0) +# define CAMELLIA_decrypt_stack_burn_size (0) +#else +# define CAMELLIA_encrypt_stack_burn_size (15*4) +# define CAMELLIA_decrypt_stack_burn_size (15*4) +#endif + +static unsigned int +camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx = c; + Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); +} + +static unsigned int +camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx=c; + Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); +} + +#else /*USE_ARM_ASM*/ + +static unsigned int +camellia_encrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx=c; + + Camellia_EncryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + +#define CAMELLIA_encrypt_stack_burn_size \ + (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ + +4*sizeof(u32)+4*sizeof(u32) \ + +2*sizeof(u32*)+4*sizeof(u32) \ + +2*2*sizeof(void*) /* Function calls. */ \ + ) + + return /*burn_stack*/ (CAMELLIA_encrypt_stack_burn_size); +} + +static unsigned int +camellia_decrypt(void *c, byte *outbuf, const byte *inbuf) +{ + CAMELLIA_context *ctx=c; + + Camellia_DecryptBlock(ctx->keybitlength,inbuf,ctx->keytable,outbuf); + +#define CAMELLIA_decrypt_stack_burn_size \ + (sizeof(int)+2*sizeof(unsigned char *)+sizeof(void*/*KEY_TABLE_TYPE*/) \ + +4*sizeof(u32)+4*sizeof(u32) \ + +2*sizeof(u32*)+4*sizeof(u32) \ + +2*2*sizeof(void*) /* Function calls. */ \ + ) + + return /*burn_stack*/ (CAMELLIA_decrypt_stack_burn_size); +} + +#endif /*!USE_ARM_ASM*/ + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size CAMELLIA_BLOCK_SIZE. */ +static void +_gcry_camellia_ctr_enc(void *context, unsigned char *ctr, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[CAMELLIA_BLOCK_SIZE]; + int burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + _gcry_camellia_aesni_avx2_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_camellia_aesni_avx_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + /* TODO: use caching instead? */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + Camellia_EncryptBlock(ctx->keybitlength, ctr, ctx->keytable, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + cipher_block_xor(outbuf, tmpbuf, inbuf, CAMELLIA_BLOCK_SIZE); + outbuf += CAMELLIA_BLOCK_SIZE; + inbuf += CAMELLIA_BLOCK_SIZE; + /* Increment the counter. */ + cipher_block_add(ctr, 1, CAMELLIA_BLOCK_SIZE); + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_camellia_cbc_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[CAMELLIA_BLOCK_SIZE]; + int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + _gcry_camellia_aesni_avx2_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + + 2 * sizeof(void *) + ASM_EXTRA_STACK;; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_camellia_aesni_avx_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + Camellia_DecryptBlock(ctx->keybitlength, inbuf, ctx->keytable, savebuf); + + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, + CAMELLIA_BLOCK_SIZE); + inbuf += CAMELLIA_BLOCK_SIZE; + outbuf += CAMELLIA_BLOCK_SIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_camellia_cfb_dec(void *context, unsigned char *iv, + void *outbuf_arg, const void *inbuf_arg, + size_t nblocks) +{ + CAMELLIA_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = CAMELLIA_decrypt_stack_burn_size; + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + _gcry_camellia_aesni_avx2_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + 16 + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + _gcry_camellia_aesni_avx_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + Camellia_EncryptBlock(ctx->keybitlength, iv, ctx->keytable, iv); + cipher_block_xor_n_copy(outbuf, iv, inbuf, CAMELLIA_BLOCK_SIZE); + outbuf += CAMELLIA_BLOCK_SIZE; + inbuf += CAMELLIA_BLOCK_SIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk encryption/decryption of complete blocks in OCB mode. */ +static size_t +_gcry_camellia_ocb_crypt (gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt) +{ +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + CAMELLIA_context *ctx = (void *)&c->context.c; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth; + u64 blkn = c->u_mode.ocb.data_nblocks; + + burn_stack_depth = encrypt ? CAMELLIA_encrypt_stack_burn_size : + CAMELLIA_decrypt_stack_burn_size; +#else + (void)c; + (void)outbuf_arg; + (void)inbuf_arg; + (void)encrypt; +#endif + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + u64 Ls[32]; + unsigned int n = 32 - (blkn % 32); + u64 *l; + int i; + + if (nblocks >= 32) + { + for (i = 0; i < 32; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(31 + n) % 32]; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + blkn += 32; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); + + if (encrypt) + _gcry_camellia_aesni_avx2_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_camellia_aesni_avx2_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 32; + outbuf += 32 * CAMELLIA_BLOCK_SIZE; + inbuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + if (encrypt) + _gcry_camellia_aesni_avx_ocb_enc(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + else + _gcry_camellia_aesni_avx_ocb_dec(ctx, outbuf, inbuf, c->u_iv.iv, + c->u_ctr.ctr, Ls); + + nblocks -= 16; + outbuf += 16 * CAMELLIA_BLOCK_SIZE; + inbuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + c->u_mode.ocb.data_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; +} + +/* Bulk authentication of complete blocks in OCB mode. */ +static size_t +_gcry_camellia_ocb_auth (gcry_cipher_hd_t c, const void *abuf_arg, + size_t nblocks) +{ +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + CAMELLIA_context *ctx = (void *)&c->context.c; + const unsigned char *abuf = abuf_arg; + int burn_stack_depth; + u64 blkn = c->u_mode.ocb.aad_nblocks; + + burn_stack_depth = CAMELLIA_encrypt_stack_burn_size; +#else + (void)c; + (void)abuf_arg; +#endif + +#ifdef USE_AESNI_AVX2 + if (ctx->use_aesni_avx2) + { + int did_use_aesni_avx2 = 0; + u64 Ls[32]; + unsigned int n = 32 - (blkn % 32); + u64 *l; + int i; + + if (nblocks >= 32) + { + for (i = 0; i < 32; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + Ls[(15 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[4]; + Ls[(23 + n) % 32] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(31 + n) % 32]; + + /* Process data in 32 block chunks. */ + while (nblocks >= 32) + { + blkn += 32; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 32); + + _gcry_camellia_aesni_avx2_ocb_auth(ctx, abuf, + c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 32; + abuf += 32 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx2 = 1; + } + } + + if (did_use_aesni_avx2) + { + int avx2_burn_stack_depth = 32 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx2_burn_stack_depth) + burn_stack_depth = avx2_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#ifdef USE_AESNI_AVX + if (ctx->use_aesni_avx) + { + int did_use_aesni_avx = 0; + u64 Ls[16]; + unsigned int n = 16 - (blkn % 16); + u64 *l; + int i; + + if (nblocks >= 16) + { + for (i = 0; i < 16; i += 8) + { + /* Use u64 to store pointers for x32 support (assembly function + * assumes 64-bit pointers). */ + Ls[(i + 0 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 1 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 2 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 3 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[2]; + Ls[(i + 4 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + Ls[(i + 5 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[1]; + Ls[(i + 6 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[0]; + } + + Ls[(7 + n) % 16] = (uintptr_t)(void *)c->u_mode.ocb.L[3]; + l = &Ls[(15 + n) % 16]; + + /* Process data in 16 block chunks. */ + while (nblocks >= 16) + { + blkn += 16; + *l = (uintptr_t)(void *)ocb_get_l(c, blkn - blkn % 16); + + _gcry_camellia_aesni_avx_ocb_auth(ctx, abuf, + c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_sum, Ls); + + nblocks -= 16; + abuf += 16 * CAMELLIA_BLOCK_SIZE; + did_use_aesni_avx = 1; + } + } + + if (did_use_aesni_avx) + { + int avx_burn_stack_depth = 16 * CAMELLIA_BLOCK_SIZE + + 2 * sizeof(void *) + ASM_EXTRA_STACK; + + if (burn_stack_depth < avx_burn_stack_depth) + burn_stack_depth = avx_burn_stack_depth; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if defined(USE_AESNI_AVX) || defined(USE_AESNI_AVX2) + c->u_mode.ocb.aad_nblocks = blkn; + + if (burn_stack_depth) + _gcry_burn_stack (burn_stack_depth + 4 * sizeof(void *)); +#endif + + return nblocks; +} + +/* Run the self-tests for CAMELLIA-CTR-128, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char* +selftest_ctr_128 (void) +{ + const int nblocks = 32+16+1; + const int blocksize = CAMELLIA_BLOCK_SIZE; + const int context_size = sizeof(CAMELLIA_context); + + return _gcry_selftest_helper_ctr("CAMELLIA", &camellia_setkey, + &camellia_encrypt, nblocks, blocksize, context_size); +} + +/* Run the self-tests for CAMELLIA-CBC-128, tests bulk CBC decryption. + Returns NULL on success. */ +static const char* +selftest_cbc_128 (void) +{ + const int nblocks = 32+16+2; + const int blocksize = CAMELLIA_BLOCK_SIZE; + const int context_size = sizeof(CAMELLIA_context); + + return _gcry_selftest_helper_cbc("CAMELLIA", &camellia_setkey, + &camellia_encrypt, nblocks, blocksize, context_size); +} + +/* Run the self-tests for CAMELLIA-CFB-128, tests bulk CFB decryption. + Returns NULL on success. */ +static const char* +selftest_cfb_128 (void) +{ + const int nblocks = 32+16+2; + const int blocksize = CAMELLIA_BLOCK_SIZE; + const int context_size = sizeof(CAMELLIA_context); + + return _gcry_selftest_helper_cfb("CAMELLIA", &camellia_setkey, + &camellia_encrypt, nblocks, blocksize, context_size); +} + +static const char * +selftest(void) +{ + CAMELLIA_context ctx; + byte scratch[16]; + cipher_bulk_ops_t bulk_ops; + const char *r; + + /* These test vectors are from RFC-3713 */ + static const byte plaintext[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, + 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 + }; + static const byte key_128[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef, + 0xfe,0xdc,0xba,0x98,0x76,0x54,0x32,0x10 + }; + static const byte ciphertext_128[]= + { + 0x67,0x67,0x31,0x38,0x54,0x96,0x69,0x73, + 0x08,0x57,0x06,0x56,0x48,0xea,0xbe,0x43 + }; + static const byte key_192[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba,0x98, + 0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55,0x66,0x77 + }; + static const byte ciphertext_192[]= + { + 0xb4,0x99,0x34,0x01,0xb3,0xe9,0x96,0xf8, + 0x4e,0xe5,0xce,0xe7,0xd7,0x9b,0x09,0xb9 + }; + static const byte key_256[]= + { + 0x01,0x23,0x45,0x67,0x89,0xab,0xcd,0xef,0xfe,0xdc,0xba, + 0x98,0x76,0x54,0x32,0x10,0x00,0x11,0x22,0x33,0x44,0x55, + 0x66,0x77,0x88,0x99,0xaa,0xbb,0xcc,0xdd,0xee,0xff + }; + static const byte ciphertext_256[]= + { + 0x9a,0xcc,0x23,0x7d,0xff,0x16,0xd7,0x6c, + 0x20,0xef,0x7c,0x91,0x9e,0x3a,0x75,0x09 + }; + + camellia_setkey(&ctx,key_128,sizeof(key_128),&bulk_ops); + camellia_encrypt(&ctx,scratch,plaintext); + if(memcmp(scratch,ciphertext_128,sizeof(ciphertext_128))!=0) + return "CAMELLIA-128 test encryption failed."; + camellia_decrypt(&ctx,scratch,scratch); + if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) + return "CAMELLIA-128 test decryption failed."; + + camellia_setkey(&ctx,key_192,sizeof(key_192),&bulk_ops); + camellia_encrypt(&ctx,scratch,plaintext); + if(memcmp(scratch,ciphertext_192,sizeof(ciphertext_192))!=0) + return "CAMELLIA-192 test encryption failed."; + camellia_decrypt(&ctx,scratch,scratch); + if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) + return "CAMELLIA-192 test decryption failed."; + + camellia_setkey(&ctx,key_256,sizeof(key_256),&bulk_ops); + camellia_encrypt(&ctx,scratch,plaintext); + if(memcmp(scratch,ciphertext_256,sizeof(ciphertext_256))!=0) + return "CAMELLIA-256 test encryption failed."; + camellia_decrypt(&ctx,scratch,scratch); + if(memcmp(scratch,plaintext,sizeof(plaintext))!=0) + return "CAMELLIA-256 test decryption failed."; + + if ( (r = selftest_ctr_128 ()) ) + return r; + + if ( (r = selftest_cbc_128 ()) ) + return r; + + if ( (r = selftest_cfb_128 ()) ) + return r; + + return NULL; +} + +/* These oids are from + , + retrieved May 1, 2007. */ + +static gcry_cipher_oid_spec_t camellia128_oids[] = + { + {"1.2.392.200011.61.1.1.1.2", GCRY_CIPHER_MODE_CBC}, + {"0.3.4401.5.3.1.9.1", GCRY_CIPHER_MODE_ECB}, + {"0.3.4401.5.3.1.9.3", GCRY_CIPHER_MODE_OFB}, + {"0.3.4401.5.3.1.9.4", GCRY_CIPHER_MODE_CFB}, + { NULL } + }; + +static gcry_cipher_oid_spec_t camellia192_oids[] = + { + {"1.2.392.200011.61.1.1.1.3", GCRY_CIPHER_MODE_CBC}, + {"0.3.4401.5.3.1.9.21", GCRY_CIPHER_MODE_ECB}, + {"0.3.4401.5.3.1.9.23", GCRY_CIPHER_MODE_OFB}, + {"0.3.4401.5.3.1.9.24", GCRY_CIPHER_MODE_CFB}, + { NULL } + }; + +static gcry_cipher_oid_spec_t camellia256_oids[] = + { + {"1.2.392.200011.61.1.1.1.4", GCRY_CIPHER_MODE_CBC}, + {"0.3.4401.5.3.1.9.41", GCRY_CIPHER_MODE_ECB}, + {"0.3.4401.5.3.1.9.43", GCRY_CIPHER_MODE_OFB}, + {"0.3.4401.5.3.1.9.44", GCRY_CIPHER_MODE_CFB}, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_camellia128 = + { + GCRY_CIPHER_CAMELLIA128, {0, 0}, + "CAMELLIA128",NULL,camellia128_oids,CAMELLIA_BLOCK_SIZE,128, + sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_camellia192 = + { + GCRY_CIPHER_CAMELLIA192, {0, 0}, + "CAMELLIA192",NULL,camellia192_oids,CAMELLIA_BLOCK_SIZE,192, + sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt + }; + +gcry_cipher_spec_t _gcry_cipher_spec_camellia256 = + { + GCRY_CIPHER_CAMELLIA256, {0, 0}, + "CAMELLIA256",NULL,camellia256_oids,CAMELLIA_BLOCK_SIZE,256, + sizeof(CAMELLIA_context),camellia_setkey,camellia_encrypt,camellia_decrypt + }; diff --git a/comm/third_party/libgcrypt/cipher/camellia.c b/comm/third_party/libgcrypt/cipher/camellia.c new file mode 100644 index 0000000000..e7085a7ec8 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia.c @@ -0,0 +1,1413 @@ +/* camellia.h ver 1.2.0 + * + * Copyright (C) 2006,2007 + * NTT (Nippon Telegraph and Telephone Corporation). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* + * Algorithm Specification + * http://info.isl.ntt.co.jp/crypt/eng/camellia/specifications.html + */ + +#include +#include +#include + +#include "types.h" +#include "bufhelp.h" +#include "camellia.h" + +typedef byte u8; + +/* key constants */ + +#define CAMELLIA_SIGMA1L (0xA09E667FL) +#define CAMELLIA_SIGMA1R (0x3BCC908BL) +#define CAMELLIA_SIGMA2L (0xB67AE858L) +#define CAMELLIA_SIGMA2R (0x4CAA73B2L) +#define CAMELLIA_SIGMA3L (0xC6EF372FL) +#define CAMELLIA_SIGMA3R (0xE94F82BEL) +#define CAMELLIA_SIGMA4L (0x54FF53A5L) +#define CAMELLIA_SIGMA4R (0xF1D36F1CL) +#define CAMELLIA_SIGMA5L (0x10E527FAL) +#define CAMELLIA_SIGMA5R (0xDE682D1DL) +#define CAMELLIA_SIGMA6L (0xB05688C2L) +#define CAMELLIA_SIGMA6R (0xB3E6C1FDL) + +/* + * macros + */ + + +#if defined(_MSC_VER) + +# define SWAP(x) (_lrotl(x, 8) & 0x00ff00ff | _lrotr(x, 8) & 0xff00ff00) +# define GETU32(p) SWAP(*((u32 *)(p))) +# define PUTU32(ct, st) {*((u32 *)(ct)) = SWAP((st));} + +#else /* not MS-VC */ + +# define GETU32(pt) buf_get_be32(pt) +# define PUTU32(ct, st) buf_put_be32(ct, st) + +#endif + +#define CamelliaSubkeyL(INDEX) (subkey[(INDEX)*2]) +#define CamelliaSubkeyR(INDEX) (subkey[(INDEX)*2 + 1]) + +/* rotation right shift 1byte */ +#define CAMELLIA_RR8(x) (((x) >> 8) + ((x) << 24)) +/* rotation left shift 1bit */ +#define CAMELLIA_RL1(x) (((x) << 1) + ((x) >> 31)) +/* rotation left shift 1byte */ +#define CAMELLIA_RL8(x) (((x) << 8) + ((x) >> 24)) + +#define CAMELLIA_ROLDQ(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + ll = (ll << bits) + (lr >> (32 - bits)); \ + lr = (lr << bits) + (rl >> (32 - bits)); \ + rl = (rl << bits) + (rr >> (32 - bits)); \ + rr = (rr << bits) + (w0 >> (32 - bits)); \ + } while(0) + +#define CAMELLIA_ROLDQo32(ll, lr, rl, rr, w0, w1, bits) \ + do { \ + w0 = ll; \ + w1 = lr; \ + ll = (lr << (bits - 32)) + (rl >> (64 - bits)); \ + lr = (rl << (bits - 32)) + (rr >> (64 - bits)); \ + rl = (rr << (bits - 32)) + (w0 >> (64 - bits)); \ + rr = (w0 << (bits - 32)) + (w1 >> (64 - bits)); \ + } while(0) + +#define CAMELLIA_SP1110(INDEX) (camellia_sp1110[(INDEX)]) +#define CAMELLIA_SP0222(INDEX) (camellia_sp0222[(INDEX)]) +#define CAMELLIA_SP3033(INDEX) (camellia_sp3033[(INDEX)]) +#define CAMELLIA_SP4404(INDEX) (camellia_sp4404[(INDEX)]) + +#define CAMELLIA_F(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + il = xl ^ kl; \ + ir = xr ^ kr; \ + t0 = il >> 16; \ + t1 = ir >> 16; \ + yl = CAMELLIA_SP1110(ir & 0xff) \ + ^ CAMELLIA_SP0222((t1 >> 8) & 0xff) \ + ^ CAMELLIA_SP3033(t1 & 0xff) \ + ^ CAMELLIA_SP4404((ir >> 8) & 0xff); \ + yr = CAMELLIA_SP1110((t0 >> 8) & 0xff) \ + ^ CAMELLIA_SP0222(t0 & 0xff) \ + ^ CAMELLIA_SP3033((il >> 8) & 0xff) \ + ^ CAMELLIA_SP4404(il & 0xff); \ + yl ^= yr; \ + yr = CAMELLIA_RR8(yr); \ + yr ^= yl; \ + } while(0) + + +/* + * for speed up + * + */ +#define CAMELLIA_FLS(ll, lr, rl, rr, kll, klr, krl, krr, t0, t1, t2, t3) \ + do { \ + t0 = kll; \ + t0 &= ll; \ + lr ^= CAMELLIA_RL1(t0); \ + t1 = klr; \ + t1 |= lr; \ + ll ^= t1; \ + \ + t2 = krr; \ + t2 |= rr; \ + rl ^= t2; \ + t3 = krl; \ + t3 &= rl; \ + rr ^= CAMELLIA_RL1(t3); \ + } while(0) + +#define CAMELLIA_ROUNDSM(xl, xr, kl, kr, yl, yr, il, ir, t0, t1) \ + do { \ + yl ^= kl; \ + yr ^= kr; \ + ir = CAMELLIA_SP1110(xr & 0xff) \ + ^ CAMELLIA_SP0222((xr >> 24) & 0xff) \ + ^ CAMELLIA_SP3033((xr >> 16) & 0xff) \ + ^ CAMELLIA_SP4404((xr >> 8) & 0xff); \ + il = CAMELLIA_SP1110((xl >> 24) & 0xff) \ + ^ CAMELLIA_SP0222((xl >> 16) & 0xff) \ + ^ CAMELLIA_SP3033((xl >> 8) & 0xff) \ + ^ CAMELLIA_SP4404(xl & 0xff); \ + ir ^= il; \ + il = CAMELLIA_RR8(il); \ + il ^= ir; \ + yl ^= ir; \ + yr ^= il; \ + } while(0) + + +static const u32 camellia_sp1110[256] = { + 0x70707000,0x82828200,0x2c2c2c00,0xececec00, + 0xb3b3b300,0x27272700,0xc0c0c000,0xe5e5e500, + 0xe4e4e400,0x85858500,0x57575700,0x35353500, + 0xeaeaea00,0x0c0c0c00,0xaeaeae00,0x41414100, + 0x23232300,0xefefef00,0x6b6b6b00,0x93939300, + 0x45454500,0x19191900,0xa5a5a500,0x21212100, + 0xededed00,0x0e0e0e00,0x4f4f4f00,0x4e4e4e00, + 0x1d1d1d00,0x65656500,0x92929200,0xbdbdbd00, + 0x86868600,0xb8b8b800,0xafafaf00,0x8f8f8f00, + 0x7c7c7c00,0xebebeb00,0x1f1f1f00,0xcecece00, + 0x3e3e3e00,0x30303000,0xdcdcdc00,0x5f5f5f00, + 0x5e5e5e00,0xc5c5c500,0x0b0b0b00,0x1a1a1a00, + 0xa6a6a600,0xe1e1e100,0x39393900,0xcacaca00, + 0xd5d5d500,0x47474700,0x5d5d5d00,0x3d3d3d00, + 0xd9d9d900,0x01010100,0x5a5a5a00,0xd6d6d600, + 0x51515100,0x56565600,0x6c6c6c00,0x4d4d4d00, + 0x8b8b8b00,0x0d0d0d00,0x9a9a9a00,0x66666600, + 0xfbfbfb00,0xcccccc00,0xb0b0b000,0x2d2d2d00, + 0x74747400,0x12121200,0x2b2b2b00,0x20202000, + 0xf0f0f000,0xb1b1b100,0x84848400,0x99999900, + 0xdfdfdf00,0x4c4c4c00,0xcbcbcb00,0xc2c2c200, + 0x34343400,0x7e7e7e00,0x76767600,0x05050500, + 0x6d6d6d00,0xb7b7b700,0xa9a9a900,0x31313100, + 0xd1d1d100,0x17171700,0x04040400,0xd7d7d700, + 0x14141400,0x58585800,0x3a3a3a00,0x61616100, + 0xdedede00,0x1b1b1b00,0x11111100,0x1c1c1c00, + 0x32323200,0x0f0f0f00,0x9c9c9c00,0x16161600, + 0x53535300,0x18181800,0xf2f2f200,0x22222200, + 0xfefefe00,0x44444400,0xcfcfcf00,0xb2b2b200, + 0xc3c3c300,0xb5b5b500,0x7a7a7a00,0x91919100, + 0x24242400,0x08080800,0xe8e8e800,0xa8a8a800, + 0x60606000,0xfcfcfc00,0x69696900,0x50505000, + 0xaaaaaa00,0xd0d0d000,0xa0a0a000,0x7d7d7d00, + 0xa1a1a100,0x89898900,0x62626200,0x97979700, + 0x54545400,0x5b5b5b00,0x1e1e1e00,0x95959500, + 0xe0e0e000,0xffffff00,0x64646400,0xd2d2d200, + 0x10101000,0xc4c4c400,0x00000000,0x48484800, + 0xa3a3a300,0xf7f7f700,0x75757500,0xdbdbdb00, + 0x8a8a8a00,0x03030300,0xe6e6e600,0xdadada00, + 0x09090900,0x3f3f3f00,0xdddddd00,0x94949400, + 0x87878700,0x5c5c5c00,0x83838300,0x02020200, + 0xcdcdcd00,0x4a4a4a00,0x90909000,0x33333300, + 0x73737300,0x67676700,0xf6f6f600,0xf3f3f300, + 0x9d9d9d00,0x7f7f7f00,0xbfbfbf00,0xe2e2e200, + 0x52525200,0x9b9b9b00,0xd8d8d800,0x26262600, + 0xc8c8c800,0x37373700,0xc6c6c600,0x3b3b3b00, + 0x81818100,0x96969600,0x6f6f6f00,0x4b4b4b00, + 0x13131300,0xbebebe00,0x63636300,0x2e2e2e00, + 0xe9e9e900,0x79797900,0xa7a7a700,0x8c8c8c00, + 0x9f9f9f00,0x6e6e6e00,0xbcbcbc00,0x8e8e8e00, + 0x29292900,0xf5f5f500,0xf9f9f900,0xb6b6b600, + 0x2f2f2f00,0xfdfdfd00,0xb4b4b400,0x59595900, + 0x78787800,0x98989800,0x06060600,0x6a6a6a00, + 0xe7e7e700,0x46464600,0x71717100,0xbababa00, + 0xd4d4d400,0x25252500,0xababab00,0x42424200, + 0x88888800,0xa2a2a200,0x8d8d8d00,0xfafafa00, + 0x72727200,0x07070700,0xb9b9b900,0x55555500, + 0xf8f8f800,0xeeeeee00,0xacacac00,0x0a0a0a00, + 0x36363600,0x49494900,0x2a2a2a00,0x68686800, + 0x3c3c3c00,0x38383800,0xf1f1f100,0xa4a4a400, + 0x40404000,0x28282800,0xd3d3d300,0x7b7b7b00, + 0xbbbbbb00,0xc9c9c900,0x43434300,0xc1c1c100, + 0x15151500,0xe3e3e300,0xadadad00,0xf4f4f400, + 0x77777700,0xc7c7c700,0x80808000,0x9e9e9e00, +}; + +static const u32 camellia_sp0222[256] = { + 0x00e0e0e0,0x00050505,0x00585858,0x00d9d9d9, + 0x00676767,0x004e4e4e,0x00818181,0x00cbcbcb, + 0x00c9c9c9,0x000b0b0b,0x00aeaeae,0x006a6a6a, + 0x00d5d5d5,0x00181818,0x005d5d5d,0x00828282, + 0x00464646,0x00dfdfdf,0x00d6d6d6,0x00272727, + 0x008a8a8a,0x00323232,0x004b4b4b,0x00424242, + 0x00dbdbdb,0x001c1c1c,0x009e9e9e,0x009c9c9c, + 0x003a3a3a,0x00cacaca,0x00252525,0x007b7b7b, + 0x000d0d0d,0x00717171,0x005f5f5f,0x001f1f1f, + 0x00f8f8f8,0x00d7d7d7,0x003e3e3e,0x009d9d9d, + 0x007c7c7c,0x00606060,0x00b9b9b9,0x00bebebe, + 0x00bcbcbc,0x008b8b8b,0x00161616,0x00343434, + 0x004d4d4d,0x00c3c3c3,0x00727272,0x00959595, + 0x00ababab,0x008e8e8e,0x00bababa,0x007a7a7a, + 0x00b3b3b3,0x00020202,0x00b4b4b4,0x00adadad, + 0x00a2a2a2,0x00acacac,0x00d8d8d8,0x009a9a9a, + 0x00171717,0x001a1a1a,0x00353535,0x00cccccc, + 0x00f7f7f7,0x00999999,0x00616161,0x005a5a5a, + 0x00e8e8e8,0x00242424,0x00565656,0x00404040, + 0x00e1e1e1,0x00636363,0x00090909,0x00333333, + 0x00bfbfbf,0x00989898,0x00979797,0x00858585, + 0x00686868,0x00fcfcfc,0x00ececec,0x000a0a0a, + 0x00dadada,0x006f6f6f,0x00535353,0x00626262, + 0x00a3a3a3,0x002e2e2e,0x00080808,0x00afafaf, + 0x00282828,0x00b0b0b0,0x00747474,0x00c2c2c2, + 0x00bdbdbd,0x00363636,0x00222222,0x00383838, + 0x00646464,0x001e1e1e,0x00393939,0x002c2c2c, + 0x00a6a6a6,0x00303030,0x00e5e5e5,0x00444444, + 0x00fdfdfd,0x00888888,0x009f9f9f,0x00656565, + 0x00878787,0x006b6b6b,0x00f4f4f4,0x00232323, + 0x00484848,0x00101010,0x00d1d1d1,0x00515151, + 0x00c0c0c0,0x00f9f9f9,0x00d2d2d2,0x00a0a0a0, + 0x00555555,0x00a1a1a1,0x00414141,0x00fafafa, + 0x00434343,0x00131313,0x00c4c4c4,0x002f2f2f, + 0x00a8a8a8,0x00b6b6b6,0x003c3c3c,0x002b2b2b, + 0x00c1c1c1,0x00ffffff,0x00c8c8c8,0x00a5a5a5, + 0x00202020,0x00898989,0x00000000,0x00909090, + 0x00474747,0x00efefef,0x00eaeaea,0x00b7b7b7, + 0x00151515,0x00060606,0x00cdcdcd,0x00b5b5b5, + 0x00121212,0x007e7e7e,0x00bbbbbb,0x00292929, + 0x000f0f0f,0x00b8b8b8,0x00070707,0x00040404, + 0x009b9b9b,0x00949494,0x00212121,0x00666666, + 0x00e6e6e6,0x00cecece,0x00ededed,0x00e7e7e7, + 0x003b3b3b,0x00fefefe,0x007f7f7f,0x00c5c5c5, + 0x00a4a4a4,0x00373737,0x00b1b1b1,0x004c4c4c, + 0x00919191,0x006e6e6e,0x008d8d8d,0x00767676, + 0x00030303,0x002d2d2d,0x00dedede,0x00969696, + 0x00262626,0x007d7d7d,0x00c6c6c6,0x005c5c5c, + 0x00d3d3d3,0x00f2f2f2,0x004f4f4f,0x00191919, + 0x003f3f3f,0x00dcdcdc,0x00797979,0x001d1d1d, + 0x00525252,0x00ebebeb,0x00f3f3f3,0x006d6d6d, + 0x005e5e5e,0x00fbfbfb,0x00696969,0x00b2b2b2, + 0x00f0f0f0,0x00313131,0x000c0c0c,0x00d4d4d4, + 0x00cfcfcf,0x008c8c8c,0x00e2e2e2,0x00757575, + 0x00a9a9a9,0x004a4a4a,0x00575757,0x00848484, + 0x00111111,0x00454545,0x001b1b1b,0x00f5f5f5, + 0x00e4e4e4,0x000e0e0e,0x00737373,0x00aaaaaa, + 0x00f1f1f1,0x00dddddd,0x00595959,0x00141414, + 0x006c6c6c,0x00929292,0x00545454,0x00d0d0d0, + 0x00787878,0x00707070,0x00e3e3e3,0x00494949, + 0x00808080,0x00505050,0x00a7a7a7,0x00f6f6f6, + 0x00777777,0x00939393,0x00868686,0x00838383, + 0x002a2a2a,0x00c7c7c7,0x005b5b5b,0x00e9e9e9, + 0x00eeeeee,0x008f8f8f,0x00010101,0x003d3d3d, +}; + +static const u32 camellia_sp3033[256] = { + 0x38003838,0x41004141,0x16001616,0x76007676, + 0xd900d9d9,0x93009393,0x60006060,0xf200f2f2, + 0x72007272,0xc200c2c2,0xab00abab,0x9a009a9a, + 0x75007575,0x06000606,0x57005757,0xa000a0a0, + 0x91009191,0xf700f7f7,0xb500b5b5,0xc900c9c9, + 0xa200a2a2,0x8c008c8c,0xd200d2d2,0x90009090, + 0xf600f6f6,0x07000707,0xa700a7a7,0x27002727, + 0x8e008e8e,0xb200b2b2,0x49004949,0xde00dede, + 0x43004343,0x5c005c5c,0xd700d7d7,0xc700c7c7, + 0x3e003e3e,0xf500f5f5,0x8f008f8f,0x67006767, + 0x1f001f1f,0x18001818,0x6e006e6e,0xaf00afaf, + 0x2f002f2f,0xe200e2e2,0x85008585,0x0d000d0d, + 0x53005353,0xf000f0f0,0x9c009c9c,0x65006565, + 0xea00eaea,0xa300a3a3,0xae00aeae,0x9e009e9e, + 0xec00ecec,0x80008080,0x2d002d2d,0x6b006b6b, + 0xa800a8a8,0x2b002b2b,0x36003636,0xa600a6a6, + 0xc500c5c5,0x86008686,0x4d004d4d,0x33003333, + 0xfd00fdfd,0x66006666,0x58005858,0x96009696, + 0x3a003a3a,0x09000909,0x95009595,0x10001010, + 0x78007878,0xd800d8d8,0x42004242,0xcc00cccc, + 0xef00efef,0x26002626,0xe500e5e5,0x61006161, + 0x1a001a1a,0x3f003f3f,0x3b003b3b,0x82008282, + 0xb600b6b6,0xdb00dbdb,0xd400d4d4,0x98009898, + 0xe800e8e8,0x8b008b8b,0x02000202,0xeb00ebeb, + 0x0a000a0a,0x2c002c2c,0x1d001d1d,0xb000b0b0, + 0x6f006f6f,0x8d008d8d,0x88008888,0x0e000e0e, + 0x19001919,0x87008787,0x4e004e4e,0x0b000b0b, + 0xa900a9a9,0x0c000c0c,0x79007979,0x11001111, + 0x7f007f7f,0x22002222,0xe700e7e7,0x59005959, + 0xe100e1e1,0xda00dada,0x3d003d3d,0xc800c8c8, + 0x12001212,0x04000404,0x74007474,0x54005454, + 0x30003030,0x7e007e7e,0xb400b4b4,0x28002828, + 0x55005555,0x68006868,0x50005050,0xbe00bebe, + 0xd000d0d0,0xc400c4c4,0x31003131,0xcb00cbcb, + 0x2a002a2a,0xad00adad,0x0f000f0f,0xca00caca, + 0x70007070,0xff00ffff,0x32003232,0x69006969, + 0x08000808,0x62006262,0x00000000,0x24002424, + 0xd100d1d1,0xfb00fbfb,0xba00baba,0xed00eded, + 0x45004545,0x81008181,0x73007373,0x6d006d6d, + 0x84008484,0x9f009f9f,0xee00eeee,0x4a004a4a, + 0xc300c3c3,0x2e002e2e,0xc100c1c1,0x01000101, + 0xe600e6e6,0x25002525,0x48004848,0x99009999, + 0xb900b9b9,0xb300b3b3,0x7b007b7b,0xf900f9f9, + 0xce00cece,0xbf00bfbf,0xdf00dfdf,0x71007171, + 0x29002929,0xcd00cdcd,0x6c006c6c,0x13001313, + 0x64006464,0x9b009b9b,0x63006363,0x9d009d9d, + 0xc000c0c0,0x4b004b4b,0xb700b7b7,0xa500a5a5, + 0x89008989,0x5f005f5f,0xb100b1b1,0x17001717, + 0xf400f4f4,0xbc00bcbc,0xd300d3d3,0x46004646, + 0xcf00cfcf,0x37003737,0x5e005e5e,0x47004747, + 0x94009494,0xfa00fafa,0xfc00fcfc,0x5b005b5b, + 0x97009797,0xfe00fefe,0x5a005a5a,0xac00acac, + 0x3c003c3c,0x4c004c4c,0x03000303,0x35003535, + 0xf300f3f3,0x23002323,0xb800b8b8,0x5d005d5d, + 0x6a006a6a,0x92009292,0xd500d5d5,0x21002121, + 0x44004444,0x51005151,0xc600c6c6,0x7d007d7d, + 0x39003939,0x83008383,0xdc00dcdc,0xaa00aaaa, + 0x7c007c7c,0x77007777,0x56005656,0x05000505, + 0x1b001b1b,0xa400a4a4,0x15001515,0x34003434, + 0x1e001e1e,0x1c001c1c,0xf800f8f8,0x52005252, + 0x20002020,0x14001414,0xe900e9e9,0xbd00bdbd, + 0xdd00dddd,0xe400e4e4,0xa100a1a1,0xe000e0e0, + 0x8a008a8a,0xf100f1f1,0xd600d6d6,0x7a007a7a, + 0xbb00bbbb,0xe300e3e3,0x40004040,0x4f004f4f, +}; + +static const u32 camellia_sp4404[256] = { + 0x70700070,0x2c2c002c,0xb3b300b3,0xc0c000c0, + 0xe4e400e4,0x57570057,0xeaea00ea,0xaeae00ae, + 0x23230023,0x6b6b006b,0x45450045,0xa5a500a5, + 0xeded00ed,0x4f4f004f,0x1d1d001d,0x92920092, + 0x86860086,0xafaf00af,0x7c7c007c,0x1f1f001f, + 0x3e3e003e,0xdcdc00dc,0x5e5e005e,0x0b0b000b, + 0xa6a600a6,0x39390039,0xd5d500d5,0x5d5d005d, + 0xd9d900d9,0x5a5a005a,0x51510051,0x6c6c006c, + 0x8b8b008b,0x9a9a009a,0xfbfb00fb,0xb0b000b0, + 0x74740074,0x2b2b002b,0xf0f000f0,0x84840084, + 0xdfdf00df,0xcbcb00cb,0x34340034,0x76760076, + 0x6d6d006d,0xa9a900a9,0xd1d100d1,0x04040004, + 0x14140014,0x3a3a003a,0xdede00de,0x11110011, + 0x32320032,0x9c9c009c,0x53530053,0xf2f200f2, + 0xfefe00fe,0xcfcf00cf,0xc3c300c3,0x7a7a007a, + 0x24240024,0xe8e800e8,0x60600060,0x69690069, + 0xaaaa00aa,0xa0a000a0,0xa1a100a1,0x62620062, + 0x54540054,0x1e1e001e,0xe0e000e0,0x64640064, + 0x10100010,0x00000000,0xa3a300a3,0x75750075, + 0x8a8a008a,0xe6e600e6,0x09090009,0xdddd00dd, + 0x87870087,0x83830083,0xcdcd00cd,0x90900090, + 0x73730073,0xf6f600f6,0x9d9d009d,0xbfbf00bf, + 0x52520052,0xd8d800d8,0xc8c800c8,0xc6c600c6, + 0x81810081,0x6f6f006f,0x13130013,0x63630063, + 0xe9e900e9,0xa7a700a7,0x9f9f009f,0xbcbc00bc, + 0x29290029,0xf9f900f9,0x2f2f002f,0xb4b400b4, + 0x78780078,0x06060006,0xe7e700e7,0x71710071, + 0xd4d400d4,0xabab00ab,0x88880088,0x8d8d008d, + 0x72720072,0xb9b900b9,0xf8f800f8,0xacac00ac, + 0x36360036,0x2a2a002a,0x3c3c003c,0xf1f100f1, + 0x40400040,0xd3d300d3,0xbbbb00bb,0x43430043, + 0x15150015,0xadad00ad,0x77770077,0x80800080, + 0x82820082,0xecec00ec,0x27270027,0xe5e500e5, + 0x85850085,0x35350035,0x0c0c000c,0x41410041, + 0xefef00ef,0x93930093,0x19190019,0x21210021, + 0x0e0e000e,0x4e4e004e,0x65650065,0xbdbd00bd, + 0xb8b800b8,0x8f8f008f,0xebeb00eb,0xcece00ce, + 0x30300030,0x5f5f005f,0xc5c500c5,0x1a1a001a, + 0xe1e100e1,0xcaca00ca,0x47470047,0x3d3d003d, + 0x01010001,0xd6d600d6,0x56560056,0x4d4d004d, + 0x0d0d000d,0x66660066,0xcccc00cc,0x2d2d002d, + 0x12120012,0x20200020,0xb1b100b1,0x99990099, + 0x4c4c004c,0xc2c200c2,0x7e7e007e,0x05050005, + 0xb7b700b7,0x31310031,0x17170017,0xd7d700d7, + 0x58580058,0x61610061,0x1b1b001b,0x1c1c001c, + 0x0f0f000f,0x16160016,0x18180018,0x22220022, + 0x44440044,0xb2b200b2,0xb5b500b5,0x91910091, + 0x08080008,0xa8a800a8,0xfcfc00fc,0x50500050, + 0xd0d000d0,0x7d7d007d,0x89890089,0x97970097, + 0x5b5b005b,0x95950095,0xffff00ff,0xd2d200d2, + 0xc4c400c4,0x48480048,0xf7f700f7,0xdbdb00db, + 0x03030003,0xdada00da,0x3f3f003f,0x94940094, + 0x5c5c005c,0x02020002,0x4a4a004a,0x33330033, + 0x67670067,0xf3f300f3,0x7f7f007f,0xe2e200e2, + 0x9b9b009b,0x26260026,0x37370037,0x3b3b003b, + 0x96960096,0x4b4b004b,0xbebe00be,0x2e2e002e, + 0x79790079,0x8c8c008c,0x6e6e006e,0x8e8e008e, + 0xf5f500f5,0xb6b600b6,0xfdfd00fd,0x59590059, + 0x98980098,0x6a6a006a,0x46460046,0xbaba00ba, + 0x25250025,0x42420042,0xa2a200a2,0xfafa00fa, + 0x07070007,0x55550055,0xeeee00ee,0x0a0a000a, + 0x49490049,0x68680068,0x38380038,0xa4a400a4, + 0x28280028,0x7b7b007b,0xc9c900c9,0xc1c100c1, + 0xe3e300e3,0xf4f400f4,0xc7c700c7,0x9e9e009e, +}; + + +/** + * Stuff related to the Camellia key schedule + */ +#define subl(x) subL[(x)] +#define subr(x) subR[(x)] + +void camellia_setup128(const unsigned char *key, u32 *subkey) +{ + u32 kll, klr, krl, krr; + u32 il, ir, t0, t1, w0, w1; + u32 kw4l, kw4r, dw, tl, tr; + u32 subL[26]; + u32 subR[26]; + + /** + * k == kll || klr || krl || krr (|| is concatination) + */ + kll = GETU32(key ); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + /** + * generate KL dependent subkeys + */ + subl(0) = kll; subr(0) = klr; + subl(1) = krl; subr(1) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(4) = kll; subr(4) = klr; + subl(5) = krl; subr(5) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(10) = kll; subr(10) = klr; + subl(11) = krl; subr(11) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(13) = krl; subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(16) = kll; subr(16) = klr; + subl(17) = krl; subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(18) = kll; subr(18) = klr; + subl(19) = krl; subr(19) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; subr(22) = klr; + subl(23) = krl; subr(23) = krr; + + /* generate KA */ + kll = subl(0); klr = subr(0); + krl = subl(1); krr = subr(1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; klr ^= w1; + + /* generate KA dependent subkeys */ + subl(2) = kll; subr(2) = klr; + subl(3) = krl; subr(3) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; subr(6) = klr; + subl(7) = krl; subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(8) = kll; subr(8) = klr; + subl(9) = krl; subr(9) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(12) = kll; subr(12) = klr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(14) = kll; subr(14) = klr; + subl(15) = krl; subr(15) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(20) = kll; subr(20) = klr; + subl(21) = krl; subr(21) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(24) = kll; subr(24) = klr; + subl(25) = krl; subr(25) = krr; + + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); subr(3) ^= subr(1); + subl(5) ^= subl(1); subr(5) ^= subr(1); + subl(7) ^= subl(1); subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); subr(11) ^= subr(1); + subl(13) ^= subl(1); subr(13) ^= subr(1); + subl(15) ^= subl(1); subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); subr(19) ^= subr(1); + subl(21) ^= subl(1); subr(21) ^= subr(1); + subl(23) ^= subl(1); subr(23) ^= subr(1); + subl(24) ^= subl(1); subr(24) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(25); kw4r = subr(25); + subl(22) ^= kw4l; subr(22) ^= kw4r; + subl(20) ^= kw4l; subr(20) ^= kw4r; + subl(18) ^= kw4l; subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; subr(14) ^= kw4r; + subl(12) ^= kw4l; subr(12) ^= kw4r; + subl(10) ^= kw4l; subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; subr(6) ^= kw4r; + subl(4) ^= kw4l; subr(4) ^= kw4r; + subl(2) ^= kw4l; subr(2) ^= kw4r; + subl(0) ^= kw4l; subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + CamelliaSubkeyL(23) = subl(22); + CamelliaSubkeyR(23) = subr(22); + CamelliaSubkeyL(24) = subl(24) ^ subl(23); + CamelliaSubkeyR(24) = subr(24) ^ subr(23); + + return; +} + +void camellia_setup256(const unsigned char *key, u32 *subkey) +{ + u32 kll,klr,krl,krr; /* left half of key */ + u32 krll,krlr,krrl,krrr; /* right half of key */ + u32 il, ir, t0, t1, w0, w1; /* temporary variables */ + u32 kw4l, kw4r, dw, tl, tr; + u32 subL[34]; + u32 subR[34]; + + /** + * key = (kll || klr || krl || krr || krll || krlr || krrl || krrr) + * (|| is concatination) + */ + + kll = GETU32(key ); + klr = GETU32(key + 4); + krl = GETU32(key + 8); + krr = GETU32(key + 12); + krll = GETU32(key + 16); + krlr = GETU32(key + 20); + krrl = GETU32(key + 24); + krrr = GETU32(key + 28); + + /* generate KL dependent subkeys */ + subl(0) = kll; subr(0) = klr; + subl(1) = krl; subr(1) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 45); + subl(12) = kll; subr(12) = klr; + subl(13) = krl; subr(13) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(16) = kll; subr(16) = klr; + subl(17) = krl; subr(17) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 17); + subl(22) = kll; subr(22) = klr; + subl(23) = krl; subr(23) = krr; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 34); + subl(30) = kll; subr(30) = klr; + subl(31) = krl; subr(31) = krr; + + /* generate KR dependent subkeys */ + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(4) = krll; subr(4) = krlr; + subl(5) = krrl; subr(5) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 15); + subl(8) = krll; subr(8) = krlr; + subl(9) = krrl; subr(9) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(18) = krll; subr(18) = krlr; + subl(19) = krrl; subr(19) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + subl(26) = krll; subr(26) = krlr; + subl(27) = krrl; subr(27) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 34); + + /* generate KA */ + kll = subl(0) ^ krll; klr = subr(0) ^ krlr; + krl = subl(1) ^ krrl; krr = subr(1) ^ krrr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA1L, CAMELLIA_SIGMA1R, + w0, w1, il, ir, t0, t1); + krl ^= w0; krr ^= w1; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA2L, CAMELLIA_SIGMA2R, + kll, klr, il, ir, t0, t1); + kll ^= krll; klr ^= krlr; + CAMELLIA_F(kll, klr, + CAMELLIA_SIGMA3L, CAMELLIA_SIGMA3R, + krl, krr, il, ir, t0, t1); + krl ^= w0 ^ krrl; krr ^= w1 ^ krrr; + CAMELLIA_F(krl, krr, + CAMELLIA_SIGMA4L, CAMELLIA_SIGMA4R, + w0, w1, il, ir, t0, t1); + kll ^= w0; klr ^= w1; + + /* generate KB */ + krll ^= kll; krlr ^= klr; + krrl ^= krl; krrr ^= krr; + CAMELLIA_F(krll, krlr, + CAMELLIA_SIGMA5L, CAMELLIA_SIGMA5R, + w0, w1, il, ir, t0, t1); + krrl ^= w0; krrr ^= w1; + CAMELLIA_F(krrl, krrr, + CAMELLIA_SIGMA6L, CAMELLIA_SIGMA6R, + w0, w1, il, ir, t0, t1); + krll ^= w0; krlr ^= w1; + + /* generate KA dependent subkeys */ + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 15); + subl(6) = kll; subr(6) = klr; + subl(7) = krl; subr(7) = krr; + CAMELLIA_ROLDQ(kll, klr, krl, krr, w0, w1, 30); + subl(14) = kll; subr(14) = klr; + subl(15) = krl; subr(15) = krr; + subl(24) = klr; subr(24) = krl; + subl(25) = krr; subr(25) = kll; + CAMELLIA_ROLDQo32(kll, klr, krl, krr, w0, w1, 49); + subl(28) = kll; subr(28) = klr; + subl(29) = krl; subr(29) = krr; + + /* generate KB dependent subkeys */ + subl(2) = krll; subr(2) = krlr; + subl(3) = krrl; subr(3) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(10) = krll; subr(10) = krlr; + subl(11) = krrl; subr(11) = krrr; + CAMELLIA_ROLDQ(krll, krlr, krrl, krrr, w0, w1, 30); + subl(20) = krll; subr(20) = krlr; + subl(21) = krrl; subr(21) = krrr; + CAMELLIA_ROLDQo32(krll, krlr, krrl, krrr, w0, w1, 51); + subl(32) = krll; subr(32) = krlr; + subl(33) = krrl; subr(33) = krrr; + + /* absorb kw2 to other subkeys */ + subl(3) ^= subl(1); subr(3) ^= subr(1); + subl(5) ^= subl(1); subr(5) ^= subr(1); + subl(7) ^= subl(1); subr(7) ^= subr(1); + subl(1) ^= subr(1) & ~subr(9); + dw = subl(1) & subl(9), subr(1) ^= CAMELLIA_RL1(dw); + subl(11) ^= subl(1); subr(11) ^= subr(1); + subl(13) ^= subl(1); subr(13) ^= subr(1); + subl(15) ^= subl(1); subr(15) ^= subr(1); + subl(1) ^= subr(1) & ~subr(17); + dw = subl(1) & subl(17), subr(1) ^= CAMELLIA_RL1(dw); + subl(19) ^= subl(1); subr(19) ^= subr(1); + subl(21) ^= subl(1); subr(21) ^= subr(1); + subl(23) ^= subl(1); subr(23) ^= subr(1); + subl(1) ^= subr(1) & ~subr(25); + dw = subl(1) & subl(25), subr(1) ^= CAMELLIA_RL1(dw); + subl(27) ^= subl(1); subr(27) ^= subr(1); + subl(29) ^= subl(1); subr(29) ^= subr(1); + subl(31) ^= subl(1); subr(31) ^= subr(1); + subl(32) ^= subl(1); subr(32) ^= subr(1); + + /* absorb kw4 to other subkeys */ + kw4l = subl(33); kw4r = subr(33); + subl(30) ^= kw4l; subr(30) ^= kw4r; + subl(28) ^= kw4l; subr(28) ^= kw4r; + subl(26) ^= kw4l; subr(26) ^= kw4r; + kw4l ^= kw4r & ~subr(24); + dw = kw4l & subl(24), kw4r ^= CAMELLIA_RL1(dw); + subl(22) ^= kw4l; subr(22) ^= kw4r; + subl(20) ^= kw4l; subr(20) ^= kw4r; + subl(18) ^= kw4l; subr(18) ^= kw4r; + kw4l ^= kw4r & ~subr(16); + dw = kw4l & subl(16), kw4r ^= CAMELLIA_RL1(dw); + subl(14) ^= kw4l; subr(14) ^= kw4r; + subl(12) ^= kw4l; subr(12) ^= kw4r; + subl(10) ^= kw4l; subr(10) ^= kw4r; + kw4l ^= kw4r & ~subr(8); + dw = kw4l & subl(8), kw4r ^= CAMELLIA_RL1(dw); + subl(6) ^= kw4l; subr(6) ^= kw4r; + subl(4) ^= kw4l; subr(4) ^= kw4r; + subl(2) ^= kw4l; subr(2) ^= kw4r; + subl(0) ^= kw4l; subr(0) ^= kw4r; + + /* key XOR is end of F-function */ + CamelliaSubkeyL(0) = subl(0) ^ subl(2); + CamelliaSubkeyR(0) = subr(0) ^ subr(2); + CamelliaSubkeyL(2) = subl(3); + CamelliaSubkeyR(2) = subr(3); + CamelliaSubkeyL(3) = subl(2) ^ subl(4); + CamelliaSubkeyR(3) = subr(2) ^ subr(4); + CamelliaSubkeyL(4) = subl(3) ^ subl(5); + CamelliaSubkeyR(4) = subr(3) ^ subr(5); + CamelliaSubkeyL(5) = subl(4) ^ subl(6); + CamelliaSubkeyR(5) = subr(4) ^ subr(6); + CamelliaSubkeyL(6) = subl(5) ^ subl(7); + CamelliaSubkeyR(6) = subr(5) ^ subr(7); + tl = subl(10) ^ (subr(10) & ~subr(8)); + dw = tl & subl(8), tr = subr(10) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(7) = subl(6) ^ tl; + CamelliaSubkeyR(7) = subr(6) ^ tr; + CamelliaSubkeyL(8) = subl(8); + CamelliaSubkeyR(8) = subr(8); + CamelliaSubkeyL(9) = subl(9); + CamelliaSubkeyR(9) = subr(9); + tl = subl(7) ^ (subr(7) & ~subr(9)); + dw = tl & subl(9), tr = subr(7) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(10) = tl ^ subl(11); + CamelliaSubkeyR(10) = tr ^ subr(11); + CamelliaSubkeyL(11) = subl(10) ^ subl(12); + CamelliaSubkeyR(11) = subr(10) ^ subr(12); + CamelliaSubkeyL(12) = subl(11) ^ subl(13); + CamelliaSubkeyR(12) = subr(11) ^ subr(13); + CamelliaSubkeyL(13) = subl(12) ^ subl(14); + CamelliaSubkeyR(13) = subr(12) ^ subr(14); + CamelliaSubkeyL(14) = subl(13) ^ subl(15); + CamelliaSubkeyR(14) = subr(13) ^ subr(15); + tl = subl(18) ^ (subr(18) & ~subr(16)); + dw = tl & subl(16), tr = subr(18) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(15) = subl(14) ^ tl; + CamelliaSubkeyR(15) = subr(14) ^ tr; + CamelliaSubkeyL(16) = subl(16); + CamelliaSubkeyR(16) = subr(16); + CamelliaSubkeyL(17) = subl(17); + CamelliaSubkeyR(17) = subr(17); + tl = subl(15) ^ (subr(15) & ~subr(17)); + dw = tl & subl(17), tr = subr(15) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(18) = tl ^ subl(19); + CamelliaSubkeyR(18) = tr ^ subr(19); + CamelliaSubkeyL(19) = subl(18) ^ subl(20); + CamelliaSubkeyR(19) = subr(18) ^ subr(20); + CamelliaSubkeyL(20) = subl(19) ^ subl(21); + CamelliaSubkeyR(20) = subr(19) ^ subr(21); + CamelliaSubkeyL(21) = subl(20) ^ subl(22); + CamelliaSubkeyR(21) = subr(20) ^ subr(22); + CamelliaSubkeyL(22) = subl(21) ^ subl(23); + CamelliaSubkeyR(22) = subr(21) ^ subr(23); + tl = subl(26) ^ (subr(26) & ~subr(24)); + dw = tl & subl(24), tr = subr(26) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(23) = subl(22) ^ tl; + CamelliaSubkeyR(23) = subr(22) ^ tr; + CamelliaSubkeyL(24) = subl(24); + CamelliaSubkeyR(24) = subr(24); + CamelliaSubkeyL(25) = subl(25); + CamelliaSubkeyR(25) = subr(25); + tl = subl(23) ^ (subr(23) & ~subr(25)); + dw = tl & subl(25), tr = subr(23) ^ CAMELLIA_RL1(dw); + CamelliaSubkeyL(26) = tl ^ subl(27); + CamelliaSubkeyR(26) = tr ^ subr(27); + CamelliaSubkeyL(27) = subl(26) ^ subl(28); + CamelliaSubkeyR(27) = subr(26) ^ subr(28); + CamelliaSubkeyL(28) = subl(27) ^ subl(29); + CamelliaSubkeyR(28) = subr(27) ^ subr(29); + CamelliaSubkeyL(29) = subl(28) ^ subl(30); + CamelliaSubkeyR(29) = subr(28) ^ subr(30); + CamelliaSubkeyL(30) = subl(29) ^ subl(31); + CamelliaSubkeyR(30) = subr(29) ^ subr(31); + CamelliaSubkeyL(31) = subl(30); + CamelliaSubkeyR(31) = subr(30); + CamelliaSubkeyL(32) = subl(32) ^ subl(31); + CamelliaSubkeyR(32) = subr(32) ^ subr(31); + + return; +} + +void camellia_setup192(const unsigned char *key, u32 *subkey) +{ + unsigned char kk[32]; + u32 krll, krlr, krrl,krrr; + + memcpy(kk, key, 24); + memcpy((unsigned char *)&krll, key+16,4); + memcpy((unsigned char *)&krlr, key+20,4); + krrl = ~krll; + krrr = ~krlr; + memcpy(kk+24, (unsigned char *)&krrl, 4); + memcpy(kk+28, (unsigned char *)&krrr, 4); + camellia_setup256(kk, subkey); + return; +} + + +#ifndef USE_ARM_ASM +/** + * Stuff related to camellia encryption/decryption + * + * "io" must be 4byte aligned and big-endian data. + */ +void camellia_encrypt128(const u32 *subkey, u32 *blocks) +{ + u32 il, ir, t0, t1; + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + /* main iteration */ + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(24); + io[3] ^= CamelliaSubkeyR(24); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} + +void camellia_decrypt128(const u32 *subkey, u32 *blocks) +{ + u32 il,ir,t0,t1; /* temporary valiables */ + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(24); + io[1] ^= CamelliaSubkeyR(24); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} + +/** + * stuff for 192 and 256bit encryption/decryption + */ +void camellia_encrypt256(const u32 *subkey, u32 *blocks) +{ + u32 il,ir,t0,t1; /* temporary valiables */ + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(0); + io[1] ^= CamelliaSubkeyR(0); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(24),CamelliaSubkeyR(24), + CamelliaSubkeyL(25),CamelliaSubkeyR(25), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(26),CamelliaSubkeyR(26), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(27),CamelliaSubkeyR(27), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(28),CamelliaSubkeyR(28), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(29),CamelliaSubkeyR(29), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(30),CamelliaSubkeyR(30), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(31),CamelliaSubkeyR(31), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(32); + io[3] ^= CamelliaSubkeyR(32); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} + +void camellia_decrypt256(const u32 *subkey, u32 *blocks) +{ + u32 il,ir,t0,t1; /* temporary valiables */ + u32 io[4]; + + io[0] = blocks[0]; + io[1] = blocks[1]; + io[2] = blocks[2]; + io[3] = blocks[3]; + + /* pre whitening but absorb kw2*/ + io[0] ^= CamelliaSubkeyL(32); + io[1] ^= CamelliaSubkeyR(32); + + /* main iteration */ + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(31),CamelliaSubkeyR(31), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(30),CamelliaSubkeyR(30), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(29),CamelliaSubkeyR(29), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(28),CamelliaSubkeyR(28), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(27),CamelliaSubkeyR(27), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(26),CamelliaSubkeyR(26), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(25),CamelliaSubkeyR(25), + CamelliaSubkeyL(24),CamelliaSubkeyR(24), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(23),CamelliaSubkeyR(23), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(22),CamelliaSubkeyR(22), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(21),CamelliaSubkeyR(21), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(20),CamelliaSubkeyR(20), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(19),CamelliaSubkeyR(19), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(18),CamelliaSubkeyR(18), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(17),CamelliaSubkeyR(17), + CamelliaSubkeyL(16),CamelliaSubkeyR(16), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(15),CamelliaSubkeyR(15), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(14),CamelliaSubkeyR(14), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(13),CamelliaSubkeyR(13), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(12),CamelliaSubkeyR(12), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(11),CamelliaSubkeyR(11), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(10),CamelliaSubkeyR(10), + io[0],io[1],il,ir,t0,t1); + + CAMELLIA_FLS(io[0],io[1],io[2],io[3], + CamelliaSubkeyL(9),CamelliaSubkeyR(9), + CamelliaSubkeyL(8),CamelliaSubkeyR(8), + t0,t1,il,ir); + + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(7),CamelliaSubkeyR(7), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(6),CamelliaSubkeyR(6), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(5),CamelliaSubkeyR(5), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(4),CamelliaSubkeyR(4), + io[0],io[1],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[0],io[1], + CamelliaSubkeyL(3),CamelliaSubkeyR(3), + io[2],io[3],il,ir,t0,t1); + CAMELLIA_ROUNDSM(io[2],io[3], + CamelliaSubkeyL(2),CamelliaSubkeyR(2), + io[0],io[1],il,ir,t0,t1); + + /* post whitening but kw4 */ + io[2] ^= CamelliaSubkeyL(0); + io[3] ^= CamelliaSubkeyR(0); + + t0 = io[0]; + t1 = io[1]; + io[0] = io[2]; + io[1] = io[3]; + io[2] = t0; + io[3] = t1; + + blocks[0] = io[0]; + blocks[1] = io[1]; + blocks[2] = io[2]; + blocks[3] = io[3]; + + return; +} +#endif /*!USE_ARM_ASM*/ + + +/*** + * + * API for compatibility + */ + +void Camellia_Ekeygen(const int keyBitLength, + const unsigned char *rawKey, + KEY_TABLE_TYPE keyTable) +{ + switch(keyBitLength) { + case 128: + camellia_setup128(rawKey, keyTable); + break; + case 192: + camellia_setup192(rawKey, keyTable); + break; + case 256: + camellia_setup256(rawKey, keyTable); + break; + default: + break; + } +} + + +#ifndef USE_ARM_ASM +void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *ciphertext) +{ + u32 tmp[4]; + + tmp[0] = GETU32(plaintext); + tmp[1] = GETU32(plaintext + 4); + tmp[2] = GETU32(plaintext + 8); + tmp[3] = GETU32(plaintext + 12); + + switch (keyBitLength) { + case 128: + camellia_encrypt128(keyTable, tmp); + break; + case 192: + /* fall through */ + case 256: + camellia_encrypt256(keyTable, tmp); + break; + default: + break; + } + + PUTU32(ciphertext, tmp[0]); + PUTU32(ciphertext + 4, tmp[1]); + PUTU32(ciphertext + 8, tmp[2]); + PUTU32(ciphertext + 12, tmp[3]); +} + +void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *ciphertext, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext) +{ + u32 tmp[4]; + + tmp[0] = GETU32(ciphertext); + tmp[1] = GETU32(ciphertext + 4); + tmp[2] = GETU32(ciphertext + 8); + tmp[3] = GETU32(ciphertext + 12); + + switch (keyBitLength) { + case 128: + camellia_decrypt128(keyTable, tmp); + break; + case 192: + /* fall through */ + case 256: + camellia_decrypt256(keyTable, tmp); + break; + default: + break; + } + PUTU32(plaintext, tmp[0]); + PUTU32(plaintext + 4, tmp[1]); + PUTU32(plaintext + 8, tmp[2]); + PUTU32(plaintext + 12, tmp[3]); +} +#endif /*!USE_ARM_ASM*/ diff --git a/comm/third_party/libgcrypt/cipher/camellia.h b/comm/third_party/libgcrypt/cipher/camellia.h new file mode 100644 index 0000000000..d7a1e6f4a0 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/camellia.h @@ -0,0 +1,95 @@ +/* camellia.h ver 1.2.0 + * + * Copyright (C) 2006,2007 + * NTT (Nippon Telegraph and Telephone Corporation). + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef HEADER_CAMELLIA_H +#define HEADER_CAMELLIA_H + +/* To use Camellia with libraries it is often useful to keep the name + * space of the library clean. The following macro is thus useful: + * + * #define CAMELLIA_EXT_SYM_PREFIX foo_ + * + * This prefixes all external symbols with "foo_". + */ +#ifdef HAVE_CONFIG_H +#include +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +# undef USE_ARM_ASM +# if defined(__ARMEL__) +# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +# endif +# if defined(__AARCH64EL__) +# ifdef HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +# endif +#endif +#ifdef CAMELLIA_EXT_SYM_PREFIX +#define CAMELLIA_PREFIX1(x,y) x ## y +#define CAMELLIA_PREFIX2(x,y) CAMELLIA_PREFIX1(x,y) +#define CAMELLIA_PREFIX(x) CAMELLIA_PREFIX2(CAMELLIA_EXT_SYM_PREFIX,x) +#define Camellia_Ekeygen CAMELLIA_PREFIX(Camellia_Ekeygen) +#define Camellia_EncryptBlock CAMELLIA_PREFIX(Camellia_EncryptBlock) +#define Camellia_DecryptBlock CAMELLIA_PREFIX(Camellia_DecryptBlock) +#define camellia_decrypt128 CAMELLIA_PREFIX(camellia_decrypt128) +#define camellia_decrypt256 CAMELLIA_PREFIX(camellia_decrypt256) +#define camellia_encrypt128 CAMELLIA_PREFIX(camellia_encrypt128) +#define camellia_encrypt256 CAMELLIA_PREFIX(camellia_encrypt256) +#define camellia_setup128 CAMELLIA_PREFIX(camellia_setup128) +#define camellia_setup192 CAMELLIA_PREFIX(camellia_setup192) +#define camellia_setup256 CAMELLIA_PREFIX(camellia_setup256) +#endif /*CAMELLIA_EXT_SYM_PREFIX*/ + + +#ifdef __cplusplus +extern "C" { +#endif + +#define CAMELLIA_BLOCK_SIZE 16 +#define CAMELLIA_TABLE_BYTE_LEN 272 +#define CAMELLIA_TABLE_WORD_LEN (CAMELLIA_TABLE_BYTE_LEN / 4) + +typedef unsigned int KEY_TABLE_TYPE[CAMELLIA_TABLE_WORD_LEN]; + + +void Camellia_Ekeygen(const int keyBitLength, + const unsigned char *rawKey, + KEY_TABLE_TYPE keyTable); + +#ifndef USE_ARM_ASM +void Camellia_EncryptBlock(const int keyBitLength, + const unsigned char *plaintext, + const KEY_TABLE_TYPE keyTable, + unsigned char *cipherText); + +void Camellia_DecryptBlock(const int keyBitLength, + const unsigned char *cipherText, + const KEY_TABLE_TYPE keyTable, + unsigned char *plaintext); +#endif /*!USE_ARM_ASM*/ + + +#ifdef __cplusplus +} +#endif + +#endif /* HEADER_CAMELLIA_H */ diff --git a/comm/third_party/libgcrypt/cipher/cast5-amd64.S b/comm/third_party/libgcrypt/cipher/cast5-amd64.S new file mode 100644 index 0000000000..82f678901d --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cast5-amd64.S @@ -0,0 +1,663 @@ +/* cast5-amd64.S - AMD64 assembly implementation of CAST5 cipher + * + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifdef __x86_64 +#include +#if (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) && defined(USE_CAST5) + +#include "asm-common-amd64.h" + +.text + +.extern _gcry_cast5_s1to4; + +#define s1 0 +#define s2 (s1 + (4 * 256)) +#define s3 (s2 + (4 * 256)) +#define s4 (s3 + (4 * 256)) + +/* structure of CAST5_context: */ +#define Km 0 +#define Kr (Km + (16 * 4)) + +/* register macros */ +#define CTX %rdi +#define RIO %rsi +#define RTAB %r8 + +#define RLR0 %r9 +#define RLR1 %r10 +#define RLR2 %r11 +#define RLR3 %r12 + +#define RLR0d %r9d +#define RLR1d %r10d +#define RLR2d %r11d +#define RLR3d %r12d + +#define RX0 %rax +#define RX1 %rbx +#define RX2 %rdx + +#define RX0d %eax +#define RX1d %ebx +#define RX2d %edx + +#define RX0bl %al +#define RX1bl %bl +#define RX2bl %dl + +#define RX0bh %ah +#define RX1bh %bh +#define RX2bh %dh + +#define RKR %rcx +#define RKRd %ecx +#define RKRbl %cl + +#define RT0 %rbp +#define RT1 %rsi + +#define RT0d %ebp +#define RT1d %esi + +#define RKM0d %r13d +#define RKM1d %r14d + +/*********************************************************************** + * 1-way cast5 + ***********************************************************************/ +#define dummy(x) + +#define shr_kr(none) \ + shrq $8, RKR; + +#define F(km, load_next_kr, op0, op1, op2, op3) \ + op0 ## l RLR0d, km ## d; \ + roll RKRbl, km ## d; \ + rorq $32, RLR0; \ + movzbl km ## bh, RT0d; \ + movzbl km ## bl, RT1d; \ + roll $16, km ## d; \ + movl s1(RTAB,RT0,4), RT0d; \ + op1 ## l s2(RTAB,RT1,4), RT0d; \ + load_next_kr(kr_next); \ + movzbl km ## bh, RT1d; \ + movzbl km ## bl, km ## d; \ + op2 ## l s3(RTAB,RT1,4), RT0d; \ + op3 ## l s4(RTAB,km,4), RT0d; \ + xorq RT0, RLR0; + +#define F1(km, load_next_kr) \ + F(##km, load_next_kr, add, xor, sub, add) +#define F2(km, load_next_kr) \ + F(##km, load_next_kr, xor, sub, add, xor) +#define F3(km, load_next_kr) \ + F(##km, load_next_kr, sub, add, xor, sub) + +#define get_round_km(n, km) \ + movl Km+4*(n)(CTX), km; + +#define get_round_kr_enc(n) \ + movq $0x1010101010101010, RKR; \ + \ + /* merge rorl rk and rorl $16 */ \ + xorq Kr+(n)(CTX), RKR; + +#define get_round_kr_dec(n) \ + movq $0x1010101010101010, RKR; \ + \ + /* merge rorl rk and rorl $16 */ \ + xorq Kr+(n - 7)(CTX), RKR; \ + bswapq RKR; + +#define round_enc(n, FA, FB, fn1, fn2) \ + get_round_km(n + 1, RX2d); \ + FA(RX0, fn1); \ + get_round_km(n + 2, RX0d); \ + FB(RX2, fn2); + +#define round_enc_last(n, FXA, FXB) \ + get_round_km(n + 1, RX2d); \ + \ + FXA(RX0, shr_kr); \ + FXB(RX2, dummy); + +#define round_enc_1(n, FA, FB) \ + round_enc(n, FA, FB, shr_kr, shr_kr) + +#define round_enc_2(n, FA, FB) \ + round_enc(n, FA, FB, shr_kr, dummy) + +#define round_dec(n, FA, FB, fn1, fn2) \ + get_round_km(n - 1, RX2d); \ + FA(RX0, fn1); \ + get_round_km(n - 2, RX0d); \ + FB(RX2, fn2); + +#define round_dec_last(n, FXA, FXB) \ + get_round_km(n - 1, RX2d); \ + FXA(RX0, shr_kr); \ + FXB(RX2, dummy); + +#define round_dec_1(n, FA, FB) \ + round_dec(n, FA, FB, shr_kr, shr_kr) + +#define round_dec_2(n, FA, FB) \ + round_dec(n, FA, FB, shr_kr, dummy) + +#define read_block() \ + movq (RIO), RLR0; \ + bswapq RLR0; + +#define write_block() \ + bswapq RLR0; \ + rorq $32, RLR0; \ + movq RLR0, (RIO); + +.align 8 +.globl _gcry_cast5_amd64_encrypt_block +ELF(.type _gcry_cast5_amd64_encrypt_block,@function;) + +_gcry_cast5_amd64_encrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + + movq %rsi, %r10; + + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + movq %rdx, RIO; + read_block(); + + get_round_km(0, RX0d); + get_round_kr_enc(0); + round_enc_1(0, F1, F2); + round_enc_1(2, F3, F1); + round_enc_1(4, F2, F3); + round_enc_2(6, F1, F2); + get_round_kr_enc(8); + round_enc_1(8, F3, F1); + round_enc_1(10, F2, F3); + round_enc_1(12, F1, F2); + round_enc_last(14, F3, F1); + + movq %r10, RIO; + write_block(); + + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_cast5_amd64_encrypt_block,.-_gcry_cast5_amd64_encrypt_block;) + +.align 8 +.globl _gcry_cast5_amd64_decrypt_block +ELF(.type _gcry_cast5_amd64_decrypt_block,@function;) + +_gcry_cast5_amd64_decrypt_block: + /* input: + * %rdi: ctx, CTX + * %rsi: dst + * %rdx: src + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + + movq %rsi, %r10; + + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + movq %rdx, RIO; + read_block(); + + get_round_km(15, RX0d); + get_round_kr_dec(15); + round_dec_1(15, F1, F3); + round_dec_1(13, F2, F1); + round_dec_1(11, F3, F2); + round_dec_2(9, F1, F3); + get_round_kr_dec(7); + round_dec_1(7, F2, F1); + round_dec_1(5, F3, F2); + round_dec_1(3, F1, F3); + round_dec_last(1, F2, F1); + + movq %r10, RIO; + write_block(); + + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_cast5_amd64_decrypt_block,.-_gcry_cast5_amd64_decrypt_block;) + +/********************************************************************** + 4-way cast5, four blocks parallel + **********************************************************************/ +#define F_tail(rlr, rx, op1, op2, op3) \ + movzbl rx ## bh, RT0d; \ + movzbl rx ## bl, RT1d; \ + roll $16, rx ## d; \ + movl s1(RTAB,RT0,4), RT0d; \ + op1 ## l s2(RTAB,RT1,4), RT0d; \ + movzbl rx ## bh, RT1d; \ + movzbl rx ## bl, rx ## d; \ + op2 ## l s3(RTAB,RT1,4), RT0d; \ + op3 ## l s4(RTAB,rx,4), RT0d; \ + xorq RT0, rlr; + +#define F4(km, load_next_kr, op0, op1, op2, op3) \ + movl km, RX0d; \ + op0 ## l RLR0d, RX0d; \ + roll RKRbl, RX0d; \ + rorq $32, RLR0; \ + \ + movl km, RX1d; \ + op0 ## l RLR1d, RX1d; \ + roll RKRbl, RX1d; \ + rorq $32, RLR1; \ + \ + movl km, RX2d; \ + op0 ## l RLR2d, RX2d; \ + roll RKRbl, RX2d; \ + rorq $32, RLR2; \ + \ + F_tail(RLR0, RX0, op1, op2, op3); \ + F_tail(RLR1, RX1, op1, op2, op3); \ + F_tail(RLR2, RX2, op1, op2, op3); \ + \ + movl km, RX0d; \ + op0 ## l RLR3d, RX0d; \ + roll RKRbl, RX0d; \ + load_next_kr(); \ + rorq $32, RLR3; \ + \ + F_tail(RLR3, RX0, op1, op2, op3); + +#define F4_1(km, load_next_kr) \ + F4(km, load_next_kr, add, xor, sub, add) +#define F4_2(km, load_next_kr) \ + F4(km, load_next_kr, xor, sub, add, xor) +#define F4_3(km, load_next_kr) \ + F4(km, load_next_kr, sub, add, xor, sub) + +#define round_enc4(n, FA, FB, fn1, fn2) \ + get_round_km(n + 1, RKM1d); \ + FA(RKM0d, fn1); \ + get_round_km(n + 2, RKM0d); \ + FB(RKM1d, fn2); + +#define round_enc_last4(n, FXA, FXB) \ + get_round_km(n + 1, RKM1d); \ + FXA(RKM0d, shr_kr); \ + FXB(RKM1d, dummy); + +#define round_enc4_1(n, FA, FB) \ + round_enc4(n, FA, FB, shr_kr, shr_kr); + +#define round_enc4_2(n, FA, FB) \ + round_enc4(n, FA, FB, shr_kr, dummy); + +#define round_dec4(n, FA, FB, fn1, fn2) \ + get_round_km(n - 1, RKM1d); \ + FA(RKM0d, fn1); \ + get_round_km(n - 2, RKM0d); \ + FB(RKM1d, fn2); + +#define round_dec_last4(n, FXA, FXB) \ + get_round_km(n - 1, RKM1d); \ + FXA(RKM0d, shr_kr); \ + FXB(RKM1d, dummy); + +#define round_dec4_1(n, FA, FB) \ + round_dec4(n, FA, FB, shr_kr, shr_kr); + +#define round_dec4_2(n, FA, FB) \ + round_dec4(n, FA, FB, shr_kr, dummy); + +#define inbswap_block4(a, b, c, d) \ + bswapq a; \ + bswapq b; \ + bswapq c; \ + bswapq d; + +#define outbswap_block4(a, b, c, d) \ + bswapq a; \ + bswapq b; \ + bswapq c; \ + bswapq d; \ + rorq $32, a; \ + rorq $32, b; \ + rorq $32, c; \ + rorq $32, d; + +.align 8 +ELF(.type __cast5_enc_blk4,@function;) + +__cast5_enc_blk4: + /* input: + * %rdi: ctx, CTX + * RLR0,RLR1,RLR2,RLR3: four input plaintext blocks + * output: + * RLR0,RLR1,RLR2,RLR3: four output ciphertext blocks + */ + CFI_STARTPROC(); + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + get_round_km(0, RKM0d); + get_round_kr_enc(0); + round_enc4_1(0, F4_1, F4_2); + round_enc4_1(2, F4_3, F4_1); + round_enc4_1(4, F4_2, F4_3); + round_enc4_2(6, F4_1, F4_2); + get_round_kr_enc(8); + round_enc4_1(8, F4_3, F4_1); + round_enc4_1(10, F4_2, F4_3); + round_enc4_1(12, F4_1, F4_2); + round_enc_last4(14, F4_3, F4_1); + + outbswap_block4(RLR0, RLR1, RLR2, RLR3); + ret; + CFI_ENDPROC(); +ELF(.size __cast5_enc_blk4,.-__cast5_enc_blk4;) + +.align 8 +ELF(.type __cast5_dec_blk4,@function;) + +__cast5_dec_blk4: + /* input: + * %rdi: ctx, CTX + * RLR0,RLR1,RLR2,RLR3: four input ciphertext blocks + * output: + * RLR0,RLR1,RLR2,RLR3: four output plaintext blocks + */ + CFI_STARTPROC(); + GET_EXTERN_POINTER(_gcry_cast5_s1to4, RTAB); + + inbswap_block4(RLR0, RLR1, RLR2, RLR3); + + get_round_km(15, RKM0d); + get_round_kr_dec(15); + round_dec4_1(15, F4_1, F4_3); + round_dec4_1(13, F4_2, F4_1); + round_dec4_1(11, F4_3, F4_2); + round_dec4_2(9, F4_1, F4_3); + get_round_kr_dec(7); + round_dec4_1(7, F4_2, F4_1); + round_dec4_1(5, F4_3, F4_2); + round_dec4_1(3, F4_1, F4_3); + round_dec_last4(1, F4_2, F4_1); + + outbswap_block4(RLR0, RLR1, RLR2, RLR3); + CFI_ENDPROC(); + ret; +ELF(.size __cast5_dec_blk4,.-__cast5_dec_blk4;) + +.align 8 +.globl _gcry_cast5_amd64_ctr_enc +ELF(.type _gcry_cast5_amd64_ctr_enc,@function;) +_gcry_cast5_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (big endian, 64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + + pushq %rsi; + CFI_PUSH(%rsi); + pushq %rdx; + CFI_PUSH(%rdx); + + /* load IV and byteswap */ + movq (%rcx), RX0; + bswapq RX0; + movq RX0, RLR0; + + /* construct IVs */ + leaq 1(RX0), RLR1; + leaq 2(RX0), RLR2; + leaq 3(RX0), RLR3; + leaq 4(RX0), RX0; + bswapq RX0; + + /* store new IV */ + movq RX0, (%rcx); + + call __cast5_enc_blk4; + + popq %r14; /*src*/ + CFI_POP_TMP_REG(); + popq %r13; /*dst*/ + CFI_POP_TMP_REG(); + + /* XOR key-stream with plaintext */ + xorq 0 * 8(%r14), RLR0; + xorq 1 * 8(%r14), RLR1; + xorq 2 * 8(%r14), RLR2; + xorq 3 * 8(%r14), RLR3; + movq RLR0, 0 * 8(%r13); + movq RLR1, 1 * 8(%r13); + movq RLR2, 2 * 8(%r13); + movq RLR3, 3 * 8(%r13); + + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret + CFI_ENDPROC(); +ELF(.size _gcry_cast5_amd64_ctr_enc,.-_gcry_cast5_amd64_ctr_enc;) + +.align 8 +.globl _gcry_cast5_amd64_cbc_dec +ELF(.type _gcry_cast5_amd64_cbc_dec,@function;) +_gcry_cast5_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + + pushq %rcx; + CFI_PUSH(%rcx); + pushq %rsi; + CFI_PUSH(%rsi); + pushq %rdx; + CFI_PUSH(%rdx); + + /* load input */ + movq 0 * 8(%rdx), RLR0; + movq 1 * 8(%rdx), RLR1; + movq 2 * 8(%rdx), RLR2; + movq 3 * 8(%rdx), RLR3; + + call __cast5_dec_blk4; + + popq RX0; /*src*/ + CFI_POP_TMP_REG(); + popq RX1; /*dst*/ + CFI_POP_TMP_REG(); + popq RX2; /*iv*/ + CFI_POP_TMP_REG(); + + movq 3 * 8(RX0), %r14; + xorq (RX2), RLR0; + xorq 0 * 8(RX0), RLR1; + xorq 1 * 8(RX0), RLR2; + xorq 2 * 8(RX0), RLR3; + movq %r14, (RX2); /* store new IV */ + + movq RLR0, 0 * 8(RX1); + movq RLR1, 1 * 8(RX1); + movq RLR2, 2 * 8(RX1); + movq RLR3, 3 * 8(RX1); + + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_cast5_amd64_cbc_dec,.-_gcry_cast5_amd64_cbc_dec;) + +.align 8 +.globl _gcry_cast5_amd64_cfb_dec +ELF(.type _gcry_cast5_amd64_cfb_dec,@function;) +_gcry_cast5_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (4 blocks) + * %rdx: src (4 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + + pushq %rsi; + CFI_PUSH(%rsi); + pushq %rdx; + CFI_PUSH(%rdx); + + /* Load input */ + movq (%rcx), RLR0; + movq 0 * 8(%rdx), RLR1; + movq 1 * 8(%rdx), RLR2; + movq 2 * 8(%rdx), RLR3; + + inbswap_block4(RLR0, RLR1, RLR2, RLR3); + + /* Update IV */ + movq 3 * 8(%rdx), %rdx; + movq %rdx, (%rcx); + + call __cast5_enc_blk4; + + popq %rdx; /*src*/ + CFI_POP_TMP_REG(); + popq %rcx; /*dst*/ + CFI_POP_TMP_REG(); + + xorq 0 * 8(%rdx), RLR0; + xorq 1 * 8(%rdx), RLR1; + xorq 2 * 8(%rdx), RLR2; + xorq 3 * 8(%rdx), RLR3; + movq RLR0, 0 * 8(%rcx); + movq RLR1, 1 * 8(%rcx); + movq RLR2, 2 * 8(%rcx); + movq RLR3, 3 * 8(%rcx); + + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_cast5_amd64_cfb_dec,.-_gcry_cast5_amd64_cfb_dec;) + +#endif /*defined(USE_CAST5)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/cast5-arm.S b/comm/third_party/libgcrypt/cipher/cast5-arm.S new file mode 100644 index 0000000000..76ddd2e335 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cast5-arm.S @@ -0,0 +1,728 @@ +/* cast5-arm.S - ARM assembly implementation of CAST5 cipher + * + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(__ARMEL__) +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +.text + +.syntax unified +.arm + +.extern _gcry_cast5_s1to4; + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +/* structure of crypto context */ +#define Km 0 +#define Kr (Km + (16 * 4)) +#define Kr_arm_enc (Kr + (16)) +#define Kr_arm_dec (Kr_arm_enc + (16)) + +/* register macros */ +#define CTX %r0 +#define Rs1 %r7 +#define Rs2 %r8 +#define Rs3 %r9 +#define Rs4 %r10 +#define RMASK %r11 +#define RKM %r1 +#define RKR %r2 + +#define RL0 %r3 +#define RR0 %r4 + +#define RL1 %r9 +#define RR1 %r10 + +#define RT0 %lr +#define RT1 %ip +#define RT2 %r5 +#define RT3 %r6 + +/* helper macros */ +#define ldr_unaligned_le(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 0)]; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 3)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_le(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 0)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 1)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 2)]; \ + strb rtmp0, [rdst, #((offs) + 3)]; + +#define ldr_unaligned_be(rout, rsrc, offs, rtmp) \ + ldrb rout, [rsrc, #((offs) + 3)]; \ + ldrb rtmp, [rsrc, #((offs) + 2)]; \ + orr rout, rout, rtmp, lsl #8; \ + ldrb rtmp, [rsrc, #((offs) + 1)]; \ + orr rout, rout, rtmp, lsl #16; \ + ldrb rtmp, [rsrc, #((offs) + 0)]; \ + orr rout, rout, rtmp, lsl #24; + +#define str_unaligned_be(rin, rdst, offs, rtmp0, rtmp1) \ + mov rtmp0, rin, lsr #8; \ + strb rin, [rdst, #((offs) + 3)]; \ + mov rtmp1, rin, lsr #16; \ + strb rtmp0, [rdst, #((offs) + 2)]; \ + mov rtmp0, rin, lsr #24; \ + strb rtmp1, [rdst, #((offs) + 1)]; \ + strb rtmp0, [rdst, #((offs) + 0)]; + +#ifdef __ARMEL__ + #define ldr_unaligned_host ldr_unaligned_le + #define str_unaligned_host str_unaligned_le + + /* bswap on little-endian */ +#ifdef HAVE_ARM_ARCH_V6 + #define host_to_be(reg, rtmp) \ + rev reg, reg; + #define be_to_host(reg, rtmp) \ + rev reg, reg; +#else + #define host_to_be(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; + #define be_to_host(reg, rtmp) \ + eor rtmp, reg, reg, ror #16; \ + mov rtmp, rtmp, lsr #8; \ + bic rtmp, rtmp, #65280; \ + eor reg, rtmp, reg, ror #8; +#endif +#else + #define ldr_unaligned_host ldr_unaligned_be + #define str_unaligned_host str_unaligned_be + + /* nop on big-endian */ + #define host_to_be(reg, rtmp) /*_*/ + #define be_to_host(reg, rtmp) /*_*/ +#endif + +#define host_to_host(x, y) /*_*/ + +/********************************************************************** + 1-way cast5 + **********************************************************************/ + +#define dummy(n) /*_*/ + +#define load_kr(n) \ + ldr RKR, [CTX, #(Kr_arm_enc + (n))]; /* Kr[n] */ + +#define load_dec_kr(n) \ + ldr RKR, [CTX, #(Kr_arm_dec + (n) - 3)]; /* Kr[n] */ + +#define load_km(n) \ + ldr RKM, [CTX, #(Km + (n) * 4)]; /* Km[n] */ + +#define shift_kr(dummy) \ + mov RKR, RKR, lsr #8; + +#define F(n, rl, rr, op1, op2, op3, op4, dec, loadkm, shiftkr, loadkr) \ + op1 RKM, rr; \ + mov RKM, RKM, ror RKR; \ + \ + and RT0, RMASK, RKM, ror #(24); \ + and RT1, RMASK, RKM, lsr #(16); \ + and RT2, RMASK, RKM, lsr #(8); \ + ldr RT0, [Rs1, RT0]; \ + and RT3, RMASK, RKM; \ + ldr RT1, [Rs2, RT1]; \ + shiftkr(RKR); \ + \ + ldr RT2, [Rs3, RT2]; \ + \ + op2 RT0, RT1; \ + ldr RT3, [Rs4, RT3]; \ + op3 RT0, RT2; \ + loadkm((n) + (1 - ((dec) * 2))); \ + op4 RT0, RT3; \ + loadkr((n) + (1 - ((dec) * 2))); \ + eor rl, RT0; + +#define F1(n, rl, rr, dec, loadkm, shiftkr, loadkr) \ + F(n, rl, rr, add, eor, sub, add, dec, loadkm, shiftkr, loadkr) +#define F2(n, rl, rr, dec, loadkm, shiftkr, loadkr) \ + F(n, rl, rr, eor, sub, add, eor, dec, loadkm, shiftkr, loadkr) +#define F3(n, rl, rr, dec, loadkm, shiftkr, loadkr) \ + F(n, rl, rr, sub, add, eor, sub, dec, loadkm, shiftkr, loadkr) + +#define enc_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx(n, rl, rr, 0, loadkm, shiftkr, loadkr) + +#define dec_round(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx(n, rl, rr, 1, loadkm, shiftkr, loadkr) + +#define read_block_aligned(rin, offs, l0, r0, convert, rtmp) \ + ldr l0, [rin, #((offs) + 0)]; \ + ldr r0, [rin, #((offs) + 4)]; \ + convert(l0, rtmp); \ + convert(r0, rtmp); + +#define write_block_aligned(rout, offs, l0, r0, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + str l0, [rout, #((offs) + 0)]; \ + str r0, [rout, #((offs) + 4)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0) + + #define write_block(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, be_to_host, rtmp0) + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0) + + #define write_block_host(rout, offs, r0, l0, rtmp0, rtmp1) \ + write_block_aligned(rout, offs, r0, l0, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_be(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_be, rtmp0); \ + 2:; + + #define write_block(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, be_to_host, rtmp0); \ + 2:; + + #define read_block_host(rin, offs, l0, r0, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, (offs) + 0, rtmp0); \ + ldr_unaligned_host(r0, rin, (offs) + 4, rtmp0); \ + b 2f; \ + 1:;\ + read_block_aligned(rin, offs, l0, r0, host_to_host, rtmp0); \ + 2:; + + #define write_block_host(rout, offs, l0, r0, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, (offs) + 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, (offs) + 4, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block_aligned(rout, offs, l0, r0, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.globl _gcry_cast5_arm_encrypt_block +.type _gcry_cast5_arm_encrypt_block,%function; + +_gcry_cast5_arm_encrypt_block: + /* input: + * %r0: CTX + * %r1: dst + * %r2: src + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100*4); + add Rs3, Rs1, #(0x100*4*2); + add Rs4, Rs1, #(0x100*4*3); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_km(0); + load_kr(0); + enc_round(0, F1, RL0, RR0, load_km, shift_kr, dummy); + enc_round(1, F2, RR0, RL0, load_km, shift_kr, dummy); + enc_round(2, F3, RL0, RR0, load_km, shift_kr, dummy); + enc_round(3, F1, RR0, RL0, load_km, dummy, load_kr); + enc_round(4, F2, RL0, RR0, load_km, shift_kr, dummy); + enc_round(5, F3, RR0, RL0, load_km, shift_kr, dummy); + enc_round(6, F1, RL0, RR0, load_km, shift_kr, dummy); + enc_round(7, F2, RR0, RL0, load_km, dummy, load_kr); + enc_round(8, F3, RL0, RR0, load_km, shift_kr, dummy); + enc_round(9, F1, RR0, RL0, load_km, shift_kr, dummy); + enc_round(10, F2, RL0, RR0, load_km, shift_kr, dummy); + enc_round(11, F3, RR0, RL0, load_km, dummy, load_kr); + enc_round(12, F1, RL0, RR0, load_km, shift_kr, dummy); + enc_round(13, F2, RR0, RL0, load_km, shift_kr, dummy); + enc_round(14, F3, RL0, RR0, load_km, shift_kr, dummy); + enc_round(15, F1, RR0, RL0, dummy, dummy, dummy); + + ldr %r1, [%sp], #4; + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_encrypt_block,.-_gcry_cast5_arm_encrypt_block; + +.align 3 +.globl _gcry_cast5_arm_decrypt_block +.type _gcry_cast5_arm_decrypt_block,%function; + +_gcry_cast5_arm_decrypt_block: + /* input: + * %r0: CTX + * %r1: dst + * %r2: src + */ + push {%r1, %r4-%r11, %ip, %lr}; + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100 * 4); + add Rs3, Rs1, #(0x100 * 4 * 2); + add Rs4, Rs1, #(0x100 * 4 * 3); + + read_block(%r2, 0, RL0, RR0, RT0); + + load_km(15); + load_dec_kr(15); + dec_round(15, F1, RL0, RR0, load_km, shift_kr, dummy); + dec_round(14, F3, RR0, RL0, load_km, shift_kr, dummy); + dec_round(13, F2, RL0, RR0, load_km, shift_kr, dummy); + dec_round(12, F1, RR0, RL0, load_km, dummy, load_dec_kr); + dec_round(11, F3, RL0, RR0, load_km, shift_kr, dummy); + dec_round(10, F2, RR0, RL0, load_km, shift_kr, dummy); + dec_round(9, F1, RL0, RR0, load_km, shift_kr, dummy); + dec_round(8, F3, RR0, RL0, load_km, dummy, load_dec_kr); + dec_round(7, F2, RL0, RR0, load_km, shift_kr, dummy); + dec_round(6, F1, RR0, RL0, load_km, shift_kr, dummy); + dec_round(5, F3, RL0, RR0, load_km, shift_kr, dummy); + dec_round(4, F2, RR0, RL0, load_km, dummy, load_dec_kr); + dec_round(3, F1, RL0, RR0, load_km, shift_kr, dummy); + dec_round(2, F3, RR0, RL0, load_km, shift_kr, dummy); + dec_round(1, F2, RL0, RR0, load_km, shift_kr, dummy); + dec_round(0, F1, RR0, RL0, dummy, dummy, dummy); + + ldr %r1, [%sp], #4; + write_block(%r1, 0, RR0, RL0, RT0, RT1); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_decrypt_block,.-_gcry_cast5_arm_decrypt_block; + +/********************************************************************** + 2-way cast5 + **********************************************************************/ + +#define F_2w(n, rl0, rr0, rl1, rr1, op1, op2, op3, op4, dec, loadkm, shiftkr, \ + loadkr) \ + op1 RT3, RKM, rr0; \ + op1 RKM, RKM, rr1; \ + mov RT3, RT3, ror RKR; \ + mov RKM, RKM, ror RKR; \ + \ + and RT0, RMASK, RT3, ror #(24); \ + and RT1, RMASK, RT3, lsr #(16); \ + and RT2, RMASK, RT3, lsr #(8); \ + and RT3, RMASK, RT3; \ + \ + ldr RT0, [Rs1, RT0]; \ + add RT2, #(0x100 * 4); \ + ldr RT1, [Rs2, RT1]; \ + add RT3, #(0x100 * 4 * 2); \ + \ + ldr RT2, [Rs2, RT2]; \ + \ + op2 RT0, RT1; \ + ldr RT3, [Rs2, RT3]; \ + and RT1, RMASK, RKM, ror #(24); \ + op3 RT0, RT2; \ + and RT2, RMASK, RKM, lsr #(16); \ + op4 RT0, RT3; \ + and RT3, RMASK, RKM, lsr #(8); \ + eor rl0, RT0; \ + add RT3, #(0x100 * 4); \ + ldr RT1, [Rs1, RT1]; \ + and RT0, RMASK, RKM; \ + ldr RT2, [Rs2, RT2]; \ + add RT0, #(0x100 * 4 * 2); \ + \ + ldr RT3, [Rs2, RT3]; \ + \ + op2 RT1, RT2; \ + ldr RT0, [Rs2, RT0]; \ + op3 RT1, RT3; \ + loadkm((n) + (1 - ((dec) * 2))); \ + op4 RT1, RT0; \ + loadkr((n) + (1 - ((dec) * 2))); \ + shiftkr(RKR); \ + eor rl1, RT1; + +#define F1_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \ + F_2w(n, rl0, rr0, rl1, rr1, add, eor, sub, add, dec, \ + loadkm, shiftkr, loadkr) +#define F2_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \ + F_2w(n, rl0, rr0, rl1, rr1, eor, sub, add, eor, dec, \ + loadkm, shiftkr, loadkr) +#define F3_2w(n, rl0, rr0, rl1, rr1, dec, loadkm, shiftkr, loadkr) \ + F_2w(n, rl0, rr0, rl1, rr1, sub, add, eor, sub, dec, \ + loadkm, shiftkr, loadkr) + +#define enc_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 0, loadkm, shiftkr, loadkr) + +#define dec_round2(n, Fx, rl, rr, loadkm, shiftkr, loadkr) \ + Fx##_2w(n, rl##0, rr##0, rl##1, rr##1, 1, loadkm, shiftkr, loadkr) + +#define read_block2_aligned(rin, l0, r0, l1, r1, convert, rtmp) \ + ldr l0, [rin, #(0)]; \ + ldr r0, [rin, #(4)]; \ + convert(l0, rtmp); \ + ldr l1, [rin, #(8)]; \ + convert(r0, rtmp); \ + ldr r1, [rin, #(12)]; \ + convert(l1, rtmp); \ + convert(r1, rtmp); + +#define write_block2_aligned(rout, l0, r0, l1, r1, convert, rtmp) \ + convert(l0, rtmp); \ + convert(r0, rtmp); \ + convert(l1, rtmp); \ + str l0, [rout, #(0)]; \ + convert(r1, rtmp); \ + str r0, [rout, #(4)]; \ + str l1, [rout, #(8)]; \ + str r1, [rout, #(12)]; + +#ifdef __ARM_FEATURE_UNALIGNED + /* unaligned word reads allowed */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0) + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0) + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0) + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0) +#else + /* need to handle unaligned reads by byte reads */ + #define read_block2(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_be(l0, rin, 0, rtmp0); \ + ldr_unaligned_be(r0, rin, 4, rtmp0); \ + ldr_unaligned_be(l1, rin, 8, rtmp0); \ + ldr_unaligned_be(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_be, rtmp0); \ + 2:; + + #define write_block2(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_be(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_be(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_be(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_be(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, be_to_host, rtmp0); \ + 2:; + + #define read_block2_host(rin, l0, r0, l1, r1, rtmp0) \ + tst rin, #3; \ + beq 1f; \ + ldr_unaligned_host(l0, rin, 0, rtmp0); \ + ldr_unaligned_host(r0, rin, 4, rtmp0); \ + ldr_unaligned_host(l1, rin, 8, rtmp0); \ + ldr_unaligned_host(r1, rin, 12, rtmp0); \ + b 2f; \ + 1:;\ + read_block2_aligned(rin, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; + + #define write_block2_host(rout, l0, r0, l1, r1, rtmp0, rtmp1) \ + tst rout, #3; \ + beq 1f; \ + str_unaligned_host(l0, rout, 0, rtmp0, rtmp1); \ + str_unaligned_host(r0, rout, 4, rtmp0, rtmp1); \ + str_unaligned_host(l1, rout, 8, rtmp0, rtmp1); \ + str_unaligned_host(r1, rout, 12, rtmp0, rtmp1); \ + b 2f; \ + 1:;\ + write_block2_aligned(rout, l0, r0, l1, r1, host_to_host, rtmp0); \ + 2:; +#endif + +.align 3 +.type _gcry_cast5_arm_enc_blk2,%function; + +_gcry_cast5_arm_enc_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + push {%lr}; + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100 * 4); + + load_km(0); + load_kr(0); + enc_round2(0, F1, RL, RR, load_km, shift_kr, dummy); + enc_round2(1, F2, RR, RL, load_km, shift_kr, dummy); + enc_round2(2, F3, RL, RR, load_km, shift_kr, dummy); + enc_round2(3, F1, RR, RL, load_km, dummy, load_kr); + enc_round2(4, F2, RL, RR, load_km, shift_kr, dummy); + enc_round2(5, F3, RR, RL, load_km, shift_kr, dummy); + enc_round2(6, F1, RL, RR, load_km, shift_kr, dummy); + enc_round2(7, F2, RR, RL, load_km, dummy, load_kr); + enc_round2(8, F3, RL, RR, load_km, shift_kr, dummy); + enc_round2(9, F1, RR, RL, load_km, shift_kr, dummy); + enc_round2(10, F2, RL, RR, load_km, shift_kr, dummy); + enc_round2(11, F3, RR, RL, load_km, dummy, load_kr); + enc_round2(12, F1, RL, RR, load_km, shift_kr, dummy); + enc_round2(13, F2, RR, RL, load_km, shift_kr, dummy); + enc_round2(14, F3, RL, RR, load_km, shift_kr, dummy); + enc_round2(15, F1, RR, RL, dummy, dummy, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + pop {%pc}; +.ltorg +.size _gcry_cast5_arm_enc_blk2,.-_gcry_cast5_arm_enc_blk2; + +.align 3 +.globl _gcry_cast5_arm_cfb_dec; +.type _gcry_cast5_arm_cfb_dec,%function; + +_gcry_cast5_arm_cfb_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r1, %r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load input (iv/%r3 is aligned, src/%r2 might not be) */ + ldm %r3, {RL0, RR0}; + host_to_be(RL0, RT1); + host_to_be(RR0, RT1); + read_block(%r2, 0, RL1, RR1, %ip); + + /* Update IV, load src[1] and save to iv[0] */ + read_block_host(%r2, 8, %r5, %r6, %r7); + stm %lr, {%r5, %r6}; + + bl _gcry_cast5_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: dst, %r1: %src */ + pop {%r0, %r1}; + + /* dst = src ^ result */ + read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_cfb_dec,.-_gcry_cast5_arm_cfb_dec; + +.align 3 +.globl _gcry_cast5_arm_ctr_enc; +.type _gcry_cast5_arm_ctr_enc,%function; + +_gcry_cast5_arm_ctr_enc: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit, big-endian) + */ + push {%r1, %r2, %r4-%r11, %ip, %lr}; + + mov %lr, %r3; + + /* Load IV (big => host endian) */ + read_block_aligned(%lr, 0, RL0, RR0, be_to_host, RT1); + + /* Construct IVs */ + adds RR1, RR0, #1; /* +1 */ + adc RL1, RL0, #0; + adds %r6, RR1, #1; /* +2 */ + adc %r5, RL1, #0; + + /* Store new IV (host => big-endian) */ + write_block_aligned(%lr, 0, %r5, %r6, host_to_be, RT1); + + bl _gcry_cast5_arm_enc_blk2; + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: dst, %r1: %src */ + pop {%r0, %r1}; + + /* XOR key-stream with plaintext */ + read_block2_host(%r1, %r5, %r6, %r7, %r8, %lr); + eor %r5, %r4; + eor %r6, %r3; + eor %r7, %r10; + eor %r8, %r9; + write_block2_host(%r0, %r5, %r6, %r7, %r8, %r1, %r2); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_ctr_enc,.-_gcry_cast5_arm_ctr_enc; + +.align 3 +.type _gcry_cast5_arm_dec_blk2,%function; + +_gcry_cast5_arm_dec_blk2: + /* input: + * preloaded: CTX + * [RL0, RR0], [RL1, RR1]: src + * output: + * [RR0, RL0], [RR1, RL1]: dst + */ + + GET_DATA_POINTER(Rs1, _gcry_cast5_s1to4, Rs2); + mov RMASK, #(0xff << 2); + add Rs2, Rs1, #(0x100 * 4); + + load_km(15); + load_dec_kr(15); + dec_round2(15, F1, RL, RR, load_km, shift_kr, dummy); + dec_round2(14, F3, RR, RL, load_km, shift_kr, dummy); + dec_round2(13, F2, RL, RR, load_km, shift_kr, dummy); + dec_round2(12, F1, RR, RL, load_km, dummy, load_dec_kr); + dec_round2(11, F3, RL, RR, load_km, shift_kr, dummy); + dec_round2(10, F2, RR, RL, load_km, shift_kr, dummy); + dec_round2(9, F1, RL, RR, load_km, shift_kr, dummy); + dec_round2(8, F3, RR, RL, load_km, dummy, load_dec_kr); + dec_round2(7, F2, RL, RR, load_km, shift_kr, dummy); + dec_round2(6, F1, RR, RL, load_km, shift_kr, dummy); + dec_round2(5, F3, RL, RR, load_km, shift_kr, dummy); + dec_round2(4, F2, RR, RL, load_km, dummy, load_dec_kr); + dec_round2(3, F1, RL, RR, load_km, shift_kr, dummy); + dec_round2(2, F3, RR, RL, load_km, shift_kr, dummy); + dec_round2(1, F2, RL, RR, load_km, shift_kr, dummy); + dec_round2(0, F1, RR, RL, dummy, dummy, dummy); + + host_to_be(RR0, RT0); + host_to_be(RL0, RT0); + host_to_be(RR1, RT0); + host_to_be(RL1, RT0); + + b .Ldec_cbc_tail; +.ltorg +.size _gcry_cast5_arm_dec_blk2,.-_gcry_cast5_arm_dec_blk2; + +.align 3 +.globl _gcry_cast5_arm_cbc_dec; +.type _gcry_cast5_arm_cbc_dec,%function; + +_gcry_cast5_arm_cbc_dec: + /* input: + * %r0: CTX + * %r1: dst (2 blocks) + * %r2: src (2 blocks) + * %r3: iv (64bit) + */ + push {%r1-%r11, %ip, %lr}; + + read_block2(%r2, RL0, RR0, RL1, RR1, RT0); + + /* dec_blk2 is only used by cbc_dec, jump directly in/out instead + * of function call. */ + b _gcry_cast5_arm_dec_blk2; +.Ldec_cbc_tail: + /* result in RR0:RL0, RR1:RL1 = %r4:%r3, %r10:%r9 */ + + /* %r0: dst, %r1: %src, %r2: iv */ + pop {%r0-%r2}; + + /* load IV+1 (src[0]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r1, 0, %r7, %r8, %r5); + /* load IV (iv[0]) to %r5:%r6. 'iv' is aligned. */ + ldm %r2, {%r5, %r6}; + + /* out[1] ^= IV+1 */ + eor %r10, %r7; + eor %r9, %r8; + /* out[0] ^= IV */ + eor %r4, %r5; + eor %r3, %r6; + + /* load IV+2 (src[1]) to %r7:%r8. Might be unaligned. */ + read_block_host(%r1, 8, %r7, %r8, %r5); + /* store IV+2 to iv[0] (aligned). */ + stm %r2, {%r7, %r8}; + + /* store result to dst[0-3]. Might be unaligned. */ + write_block2_host(%r0, %r4, %r3, %r10, %r9, %r5, %r6); + + pop {%r4-%r11, %ip, %pc}; +.ltorg +.size _gcry_cast5_arm_cbc_dec,.-_gcry_cast5_arm_cbc_dec; + +#endif /*HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS*/ +#endif /*__ARM_ARCH >= 6*/ diff --git a/comm/third_party/libgcrypt/cipher/cast5.c b/comm/third_party/libgcrypt/cipher/cast5.c new file mode 100644 index 0000000000..837ea0fe57 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cast5.c @@ -0,0 +1,1238 @@ +/* cast5.c - CAST5 cipher (RFC2144) + * Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +/* Test vectors: + * + * 128-bit key = 01 23 45 67 12 34 56 78 23 45 67 89 34 56 78 9A + * plaintext = 01 23 45 67 89 AB CD EF + * ciphertext = 23 8B 4F E5 84 7E 44 B2 + * + * 80-bit key = 01 23 45 67 12 34 56 78 23 45 + * = 01 23 45 67 12 34 56 78 23 45 00 00 00 00 00 00 + * plaintext = 01 23 45 67 89 AB CD EF + * ciphertext = EB 6A 71 1A 2C 02 27 1B + * + * 40-bit key = 01 23 45 67 12 + * = 01 23 45 67 12 00 00 00 00 00 00 00 00 00 00 00 + * plaintext = 01 23 45 67 89 AB CD EF + * ciphertext = 7A C8 16 D1 6E 9B 30 2E + */ + +#include +#include +#include +#include +#include "g10lib.h" +#include "types.h" +#include "cipher.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* USE_ARM_ASM indicates whether to use ARM assembly code. */ +#undef USE_ARM_ASM +#if defined(__ARMEL__) +# ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS +# define USE_ARM_ASM 1 +# endif +#endif + +#define CAST5_BLOCKSIZE 8 + +typedef struct { + u32 Km[16]; + byte Kr[16]; +#ifdef USE_ARM_ASM + u32 Kr_arm_enc[16 / sizeof(u32)]; + u32 Kr_arm_dec[16 / sizeof(u32)]; +#endif +} CAST5_context; + +static gcry_err_code_t cast_setkey (void *c, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops); +static unsigned int encrypt_block (void *c, byte *outbuf, const byte *inbuf); +static unsigned int decrypt_block (void *c, byte *outbuf, const byte *inbuf); + + + +#define s1 _gcry_cast5_s1to4[0] +#define s2 _gcry_cast5_s1to4[1] +#define s3 _gcry_cast5_s1to4[2] +#define s4 _gcry_cast5_s1to4[3] + +const u32 _gcry_cast5_s1to4[4][256] = { { +0x30fb40d4, 0x9fa0ff0b, 0x6beccd2f, 0x3f258c7a, 0x1e213f2f, 0x9c004dd3, 0x6003e540, 0xcf9fc949, +0xbfd4af27, 0x88bbbdb5, 0xe2034090, 0x98d09675, 0x6e63a0e0, 0x15c361d2, 0xc2e7661d, 0x22d4ff8e, +0x28683b6f, 0xc07fd059, 0xff2379c8, 0x775f50e2, 0x43c340d3, 0xdf2f8656, 0x887ca41a, 0xa2d2bd2d, +0xa1c9e0d6, 0x346c4819, 0x61b76d87, 0x22540f2f, 0x2abe32e1, 0xaa54166b, 0x22568e3a, 0xa2d341d0, +0x66db40c8, 0xa784392f, 0x004dff2f, 0x2db9d2de, 0x97943fac, 0x4a97c1d8, 0x527644b7, 0xb5f437a7, +0xb82cbaef, 0xd751d159, 0x6ff7f0ed, 0x5a097a1f, 0x827b68d0, 0x90ecf52e, 0x22b0c054, 0xbc8e5935, +0x4b6d2f7f, 0x50bb64a2, 0xd2664910, 0xbee5812d, 0xb7332290, 0xe93b159f, 0xb48ee411, 0x4bff345d, +0xfd45c240, 0xad31973f, 0xc4f6d02e, 0x55fc8165, 0xd5b1caad, 0xa1ac2dae, 0xa2d4b76d, 0xc19b0c50, +0x882240f2, 0x0c6e4f38, 0xa4e4bfd7, 0x4f5ba272, 0x564c1d2f, 0xc59c5319, 0xb949e354, 0xb04669fe, +0xb1b6ab8a, 0xc71358dd, 0x6385c545, 0x110f935d, 0x57538ad5, 0x6a390493, 0xe63d37e0, 0x2a54f6b3, +0x3a787d5f, 0x6276a0b5, 0x19a6fcdf, 0x7a42206a, 0x29f9d4d5, 0xf61b1891, 0xbb72275e, 0xaa508167, +0x38901091, 0xc6b505eb, 0x84c7cb8c, 0x2ad75a0f, 0x874a1427, 0xa2d1936b, 0x2ad286af, 0xaa56d291, +0xd7894360, 0x425c750d, 0x93b39e26, 0x187184c9, 0x6c00b32d, 0x73e2bb14, 0xa0bebc3c, 0x54623779, +0x64459eab, 0x3f328b82, 0x7718cf82, 0x59a2cea6, 0x04ee002e, 0x89fe78e6, 0x3fab0950, 0x325ff6c2, +0x81383f05, 0x6963c5c8, 0x76cb5ad6, 0xd49974c9, 0xca180dcf, 0x380782d5, 0xc7fa5cf6, 0x8ac31511, +0x35e79e13, 0x47da91d0, 0xf40f9086, 0xa7e2419e, 0x31366241, 0x051ef495, 0xaa573b04, 0x4a805d8d, +0x548300d0, 0x00322a3c, 0xbf64cddf, 0xba57a68e, 0x75c6372b, 0x50afd341, 0xa7c13275, 0x915a0bf5, +0x6b54bfab, 0x2b0b1426, 0xab4cc9d7, 0x449ccd82, 0xf7fbf265, 0xab85c5f3, 0x1b55db94, 0xaad4e324, +0xcfa4bd3f, 0x2deaa3e2, 0x9e204d02, 0xc8bd25ac, 0xeadf55b3, 0xd5bd9e98, 0xe31231b2, 0x2ad5ad6c, +0x954329de, 0xadbe4528, 0xd8710f69, 0xaa51c90f, 0xaa786bf6, 0x22513f1e, 0xaa51a79b, 0x2ad344cc, +0x7b5a41f0, 0xd37cfbad, 0x1b069505, 0x41ece491, 0xb4c332e6, 0x032268d4, 0xc9600acc, 0xce387e6d, +0xbf6bb16c, 0x6a70fb78, 0x0d03d9c9, 0xd4df39de, 0xe01063da, 0x4736f464, 0x5ad328d8, 0xb347cc96, +0x75bb0fc3, 0x98511bfb, 0x4ffbcc35, 0xb58bcf6a, 0xe11f0abc, 0xbfc5fe4a, 0xa70aec10, 0xac39570a, +0x3f04442f, 0x6188b153, 0xe0397a2e, 0x5727cb79, 0x9ceb418f, 0x1cacd68d, 0x2ad37c96, 0x0175cb9d, +0xc69dff09, 0xc75b65f0, 0xd9db40d8, 0xec0e7779, 0x4744ead4, 0xb11c3274, 0xdd24cb9e, 0x7e1c54bd, +0xf01144f9, 0xd2240eb1, 0x9675b3fd, 0xa3ac3755, 0xd47c27af, 0x51c85f4d, 0x56907596, 0xa5bb15e6, +0x580304f0, 0xca042cf1, 0x011a37ea, 0x8dbfaadb, 0x35ba3e4a, 0x3526ffa0, 0xc37b4d09, 0xbc306ed9, +0x98a52666, 0x5648f725, 0xff5e569d, 0x0ced63d0, 0x7c63b2cf, 0x700b45e1, 0xd5ea50f1, 0x85a92872, +0xaf1fbda7, 0xd4234870, 0xa7870bf3, 0x2d3b4d79, 0x42e04198, 0x0cd0ede7, 0x26470db8, 0xf881814c, +0x474d6ad7, 0x7c0c5e5c, 0xd1231959, 0x381b7298, 0xf5d2f4db, 0xab838653, 0x6e2f1e23, 0x83719c9e, +0xbd91e046, 0x9a56456e, 0xdc39200c, 0x20c8c571, 0x962bda1c, 0xe1e696ff, 0xb141ab08, 0x7cca89b9, +0x1a69e783, 0x02cc4843, 0xa2f7c579, 0x429ef47d, 0x427b169c, 0x5ac9f049, 0xdd8f0f00, 0x5c8165bf +}, { +0x1f201094, 0xef0ba75b, 0x69e3cf7e, 0x393f4380, 0xfe61cf7a, 0xeec5207a, 0x55889c94, 0x72fc0651, +0xada7ef79, 0x4e1d7235, 0xd55a63ce, 0xde0436ba, 0x99c430ef, 0x5f0c0794, 0x18dcdb7d, 0xa1d6eff3, +0xa0b52f7b, 0x59e83605, 0xee15b094, 0xe9ffd909, 0xdc440086, 0xef944459, 0xba83ccb3, 0xe0c3cdfb, +0xd1da4181, 0x3b092ab1, 0xf997f1c1, 0xa5e6cf7b, 0x01420ddb, 0xe4e7ef5b, 0x25a1ff41, 0xe180f806, +0x1fc41080, 0x179bee7a, 0xd37ac6a9, 0xfe5830a4, 0x98de8b7f, 0x77e83f4e, 0x79929269, 0x24fa9f7b, +0xe113c85b, 0xacc40083, 0xd7503525, 0xf7ea615f, 0x62143154, 0x0d554b63, 0x5d681121, 0xc866c359, +0x3d63cf73, 0xcee234c0, 0xd4d87e87, 0x5c672b21, 0x071f6181, 0x39f7627f, 0x361e3084, 0xe4eb573b, +0x602f64a4, 0xd63acd9c, 0x1bbc4635, 0x9e81032d, 0x2701f50c, 0x99847ab4, 0xa0e3df79, 0xba6cf38c, +0x10843094, 0x2537a95e, 0xf46f6ffe, 0xa1ff3b1f, 0x208cfb6a, 0x8f458c74, 0xd9e0a227, 0x4ec73a34, +0xfc884f69, 0x3e4de8df, 0xef0e0088, 0x3559648d, 0x8a45388c, 0x1d804366, 0x721d9bfd, 0xa58684bb, +0xe8256333, 0x844e8212, 0x128d8098, 0xfed33fb4, 0xce280ae1, 0x27e19ba5, 0xd5a6c252, 0xe49754bd, +0xc5d655dd, 0xeb667064, 0x77840b4d, 0xa1b6a801, 0x84db26a9, 0xe0b56714, 0x21f043b7, 0xe5d05860, +0x54f03084, 0x066ff472, 0xa31aa153, 0xdadc4755, 0xb5625dbf, 0x68561be6, 0x83ca6b94, 0x2d6ed23b, +0xeccf01db, 0xa6d3d0ba, 0xb6803d5c, 0xaf77a709, 0x33b4a34c, 0x397bc8d6, 0x5ee22b95, 0x5f0e5304, +0x81ed6f61, 0x20e74364, 0xb45e1378, 0xde18639b, 0x881ca122, 0xb96726d1, 0x8049a7e8, 0x22b7da7b, +0x5e552d25, 0x5272d237, 0x79d2951c, 0xc60d894c, 0x488cb402, 0x1ba4fe5b, 0xa4b09f6b, 0x1ca815cf, +0xa20c3005, 0x8871df63, 0xb9de2fcb, 0x0cc6c9e9, 0x0beeff53, 0xe3214517, 0xb4542835, 0x9f63293c, +0xee41e729, 0x6e1d2d7c, 0x50045286, 0x1e6685f3, 0xf33401c6, 0x30a22c95, 0x31a70850, 0x60930f13, +0x73f98417, 0xa1269859, 0xec645c44, 0x52c877a9, 0xcdff33a6, 0xa02b1741, 0x7cbad9a2, 0x2180036f, +0x50d99c08, 0xcb3f4861, 0xc26bd765, 0x64a3f6ab, 0x80342676, 0x25a75e7b, 0xe4e6d1fc, 0x20c710e6, +0xcdf0b680, 0x17844d3b, 0x31eef84d, 0x7e0824e4, 0x2ccb49eb, 0x846a3bae, 0x8ff77888, 0xee5d60f6, +0x7af75673, 0x2fdd5cdb, 0xa11631c1, 0x30f66f43, 0xb3faec54, 0x157fd7fa, 0xef8579cc, 0xd152de58, +0xdb2ffd5e, 0x8f32ce19, 0x306af97a, 0x02f03ef8, 0x99319ad5, 0xc242fa0f, 0xa7e3ebb0, 0xc68e4906, +0xb8da230c, 0x80823028, 0xdcdef3c8, 0xd35fb171, 0x088a1bc8, 0xbec0c560, 0x61a3c9e8, 0xbca8f54d, +0xc72feffa, 0x22822e99, 0x82c570b4, 0xd8d94e89, 0x8b1c34bc, 0x301e16e6, 0x273be979, 0xb0ffeaa6, +0x61d9b8c6, 0x00b24869, 0xb7ffce3f, 0x08dc283b, 0x43daf65a, 0xf7e19798, 0x7619b72f, 0x8f1c9ba4, +0xdc8637a0, 0x16a7d3b1, 0x9fc393b7, 0xa7136eeb, 0xc6bcc63e, 0x1a513742, 0xef6828bc, 0x520365d6, +0x2d6a77ab, 0x3527ed4b, 0x821fd216, 0x095c6e2e, 0xdb92f2fb, 0x5eea29cb, 0x145892f5, 0x91584f7f, +0x5483697b, 0x2667a8cc, 0x85196048, 0x8c4bacea, 0x833860d4, 0x0d23e0f9, 0x6c387e8a, 0x0ae6d249, +0xb284600c, 0xd835731d, 0xdcb1c647, 0xac4c56ea, 0x3ebd81b3, 0x230eabb0, 0x6438bc87, 0xf0b5b1fa, +0x8f5ea2b3, 0xfc184642, 0x0a036b7a, 0x4fb089bd, 0x649da589, 0xa345415e, 0x5c038323, 0x3e5d3bb9, +0x43d79572, 0x7e6dd07c, 0x06dfdf1e, 0x6c6cc4ef, 0x7160a539, 0x73bfbe70, 0x83877605, 0x4523ecf1 +}, { +0x8defc240, 0x25fa5d9f, 0xeb903dbf, 0xe810c907, 0x47607fff, 0x369fe44b, 0x8c1fc644, 0xaececa90, +0xbeb1f9bf, 0xeefbcaea, 0xe8cf1950, 0x51df07ae, 0x920e8806, 0xf0ad0548, 0xe13c8d83, 0x927010d5, +0x11107d9f, 0x07647db9, 0xb2e3e4d4, 0x3d4f285e, 0xb9afa820, 0xfade82e0, 0xa067268b, 0x8272792e, +0x553fb2c0, 0x489ae22b, 0xd4ef9794, 0x125e3fbc, 0x21fffcee, 0x825b1bfd, 0x9255c5ed, 0x1257a240, +0x4e1a8302, 0xbae07fff, 0x528246e7, 0x8e57140e, 0x3373f7bf, 0x8c9f8188, 0xa6fc4ee8, 0xc982b5a5, +0xa8c01db7, 0x579fc264, 0x67094f31, 0xf2bd3f5f, 0x40fff7c1, 0x1fb78dfc, 0x8e6bd2c1, 0x437be59b, +0x99b03dbf, 0xb5dbc64b, 0x638dc0e6, 0x55819d99, 0xa197c81c, 0x4a012d6e, 0xc5884a28, 0xccc36f71, +0xb843c213, 0x6c0743f1, 0x8309893c, 0x0feddd5f, 0x2f7fe850, 0xd7c07f7e, 0x02507fbf, 0x5afb9a04, +0xa747d2d0, 0x1651192e, 0xaf70bf3e, 0x58c31380, 0x5f98302e, 0x727cc3c4, 0x0a0fb402, 0x0f7fef82, +0x8c96fdad, 0x5d2c2aae, 0x8ee99a49, 0x50da88b8, 0x8427f4a0, 0x1eac5790, 0x796fb449, 0x8252dc15, +0xefbd7d9b, 0xa672597d, 0xada840d8, 0x45f54504, 0xfa5d7403, 0xe83ec305, 0x4f91751a, 0x925669c2, +0x23efe941, 0xa903f12e, 0x60270df2, 0x0276e4b6, 0x94fd6574, 0x927985b2, 0x8276dbcb, 0x02778176, +0xf8af918d, 0x4e48f79e, 0x8f616ddf, 0xe29d840e, 0x842f7d83, 0x340ce5c8, 0x96bbb682, 0x93b4b148, +0xef303cab, 0x984faf28, 0x779faf9b, 0x92dc560d, 0x224d1e20, 0x8437aa88, 0x7d29dc96, 0x2756d3dc, +0x8b907cee, 0xb51fd240, 0xe7c07ce3, 0xe566b4a1, 0xc3e9615e, 0x3cf8209d, 0x6094d1e3, 0xcd9ca341, +0x5c76460e, 0x00ea983b, 0xd4d67881, 0xfd47572c, 0xf76cedd9, 0xbda8229c, 0x127dadaa, 0x438a074e, +0x1f97c090, 0x081bdb8a, 0x93a07ebe, 0xb938ca15, 0x97b03cff, 0x3dc2c0f8, 0x8d1ab2ec, 0x64380e51, +0x68cc7bfb, 0xd90f2788, 0x12490181, 0x5de5ffd4, 0xdd7ef86a, 0x76a2e214, 0xb9a40368, 0x925d958f, +0x4b39fffa, 0xba39aee9, 0xa4ffd30b, 0xfaf7933b, 0x6d498623, 0x193cbcfa, 0x27627545, 0x825cf47a, +0x61bd8ba0, 0xd11e42d1, 0xcead04f4, 0x127ea392, 0x10428db7, 0x8272a972, 0x9270c4a8, 0x127de50b, +0x285ba1c8, 0x3c62f44f, 0x35c0eaa5, 0xe805d231, 0x428929fb, 0xb4fcdf82, 0x4fb66a53, 0x0e7dc15b, +0x1f081fab, 0x108618ae, 0xfcfd086d, 0xf9ff2889, 0x694bcc11, 0x236a5cae, 0x12deca4d, 0x2c3f8cc5, +0xd2d02dfe, 0xf8ef5896, 0xe4cf52da, 0x95155b67, 0x494a488c, 0xb9b6a80c, 0x5c8f82bc, 0x89d36b45, +0x3a609437, 0xec00c9a9, 0x44715253, 0x0a874b49, 0xd773bc40, 0x7c34671c, 0x02717ef6, 0x4feb5536, +0xa2d02fff, 0xd2bf60c4, 0xd43f03c0, 0x50b4ef6d, 0x07478cd1, 0x006e1888, 0xa2e53f55, 0xb9e6d4bc, +0xa2048016, 0x97573833, 0xd7207d67, 0xde0f8f3d, 0x72f87b33, 0xabcc4f33, 0x7688c55d, 0x7b00a6b0, +0x947b0001, 0x570075d2, 0xf9bb88f8, 0x8942019e, 0x4264a5ff, 0x856302e0, 0x72dbd92b, 0xee971b69, +0x6ea22fde, 0x5f08ae2b, 0xaf7a616d, 0xe5c98767, 0xcf1febd2, 0x61efc8c2, 0xf1ac2571, 0xcc8239c2, +0x67214cb8, 0xb1e583d1, 0xb7dc3e62, 0x7f10bdce, 0xf90a5c38, 0x0ff0443d, 0x606e6dc6, 0x60543a49, +0x5727c148, 0x2be98a1d, 0x8ab41738, 0x20e1be24, 0xaf96da0f, 0x68458425, 0x99833be5, 0x600d457d, +0x282f9350, 0x8334b362, 0xd91d1120, 0x2b6d8da0, 0x642b1e31, 0x9c305a00, 0x52bce688, 0x1b03588a, +0xf7baefd5, 0x4142ed9c, 0xa4315c11, 0x83323ec5, 0xdfef4636, 0xa133c501, 0xe9d3531c, 0xee353783 +}, { +0x9db30420, 0x1fb6e9de, 0xa7be7bef, 0xd273a298, 0x4a4f7bdb, 0x64ad8c57, 0x85510443, 0xfa020ed1, +0x7e287aff, 0xe60fb663, 0x095f35a1, 0x79ebf120, 0xfd059d43, 0x6497b7b1, 0xf3641f63, 0x241e4adf, +0x28147f5f, 0x4fa2b8cd, 0xc9430040, 0x0cc32220, 0xfdd30b30, 0xc0a5374f, 0x1d2d00d9, 0x24147b15, +0xee4d111a, 0x0fca5167, 0x71ff904c, 0x2d195ffe, 0x1a05645f, 0x0c13fefe, 0x081b08ca, 0x05170121, +0x80530100, 0xe83e5efe, 0xac9af4f8, 0x7fe72701, 0xd2b8ee5f, 0x06df4261, 0xbb9e9b8a, 0x7293ea25, +0xce84ffdf, 0xf5718801, 0x3dd64b04, 0xa26f263b, 0x7ed48400, 0x547eebe6, 0x446d4ca0, 0x6cf3d6f5, +0x2649abdf, 0xaea0c7f5, 0x36338cc1, 0x503f7e93, 0xd3772061, 0x11b638e1, 0x72500e03, 0xf80eb2bb, +0xabe0502e, 0xec8d77de, 0x57971e81, 0xe14f6746, 0xc9335400, 0x6920318f, 0x081dbb99, 0xffc304a5, +0x4d351805, 0x7f3d5ce3, 0xa6c866c6, 0x5d5bcca9, 0xdaec6fea, 0x9f926f91, 0x9f46222f, 0x3991467d, +0xa5bf6d8e, 0x1143c44f, 0x43958302, 0xd0214eeb, 0x022083b8, 0x3fb6180c, 0x18f8931e, 0x281658e6, +0x26486e3e, 0x8bd78a70, 0x7477e4c1, 0xb506e07c, 0xf32d0a25, 0x79098b02, 0xe4eabb81, 0x28123b23, +0x69dead38, 0x1574ca16, 0xdf871b62, 0x211c40b7, 0xa51a9ef9, 0x0014377b, 0x041e8ac8, 0x09114003, +0xbd59e4d2, 0xe3d156d5, 0x4fe876d5, 0x2f91a340, 0x557be8de, 0x00eae4a7, 0x0ce5c2ec, 0x4db4bba6, +0xe756bdff, 0xdd3369ac, 0xec17b035, 0x06572327, 0x99afc8b0, 0x56c8c391, 0x6b65811c, 0x5e146119, +0x6e85cb75, 0xbe07c002, 0xc2325577, 0x893ff4ec, 0x5bbfc92d, 0xd0ec3b25, 0xb7801ab7, 0x8d6d3b24, +0x20c763ef, 0xc366a5fc, 0x9c382880, 0x0ace3205, 0xaac9548a, 0xeca1d7c7, 0x041afa32, 0x1d16625a, +0x6701902c, 0x9b757a54, 0x31d477f7, 0x9126b031, 0x36cc6fdb, 0xc70b8b46, 0xd9e66a48, 0x56e55a79, +0x026a4ceb, 0x52437eff, 0x2f8f76b4, 0x0df980a5, 0x8674cde3, 0xedda04eb, 0x17a9be04, 0x2c18f4df, +0xb7747f9d, 0xab2af7b4, 0xefc34d20, 0x2e096b7c, 0x1741a254, 0xe5b6a035, 0x213d42f6, 0x2c1c7c26, +0x61c2f50f, 0x6552daf9, 0xd2c231f8, 0x25130f69, 0xd8167fa2, 0x0418f2c8, 0x001a96a6, 0x0d1526ab, +0x63315c21, 0x5e0a72ec, 0x49bafefd, 0x187908d9, 0x8d0dbd86, 0x311170a7, 0x3e9b640c, 0xcc3e10d7, +0xd5cad3b6, 0x0caec388, 0xf73001e1, 0x6c728aff, 0x71eae2a1, 0x1f9af36e, 0xcfcbd12f, 0xc1de8417, +0xac07be6b, 0xcb44a1d8, 0x8b9b0f56, 0x013988c3, 0xb1c52fca, 0xb4be31cd, 0xd8782806, 0x12a3a4e2, +0x6f7de532, 0x58fd7eb6, 0xd01ee900, 0x24adffc2, 0xf4990fc5, 0x9711aac5, 0x001d7b95, 0x82e5e7d2, +0x109873f6, 0x00613096, 0xc32d9521, 0xada121ff, 0x29908415, 0x7fbb977f, 0xaf9eb3db, 0x29c9ed2a, +0x5ce2a465, 0xa730f32c, 0xd0aa3fe8, 0x8a5cc091, 0xd49e2ce7, 0x0ce454a9, 0xd60acd86, 0x015f1919, +0x77079103, 0xdea03af6, 0x78a8565e, 0xdee356df, 0x21f05cbe, 0x8b75e387, 0xb3c50651, 0xb8a5c3ef, +0xd8eeb6d2, 0xe523be77, 0xc2154529, 0x2f69efdf, 0xafe67afb, 0xf470c4b2, 0xf3e0eb5b, 0xd6cc9876, +0x39e4460c, 0x1fda8538, 0x1987832f, 0xca007367, 0xa99144f8, 0x296b299e, 0x492fc295, 0x9266beab, +0xb5676e69, 0x9bd3ddda, 0xdf7e052f, 0xdb25701c, 0x1b5e51ee, 0xf65324e6, 0x6afce36c, 0x0316cc04, +0x8644213e, 0xb7dc59d0, 0x7965291f, 0xccd6fd43, 0x41823979, 0x932bcdf6, 0xb657c34d, 0x4edfd282, +0x7ae5290c, 0x3cb9536b, 0x851e20fe, 0x9833557e, 0x13ecf0b0, 0xd3ffb372, 0x3f85c5c1, 0x0aef7ed2 +} }; +static const u32 s5[256] = { +0x7ec90c04, 0x2c6e74b9, 0x9b0e66df, 0xa6337911, 0xb86a7fff, 0x1dd358f5, 0x44dd9d44, 0x1731167f, +0x08fbf1fa, 0xe7f511cc, 0xd2051b00, 0x735aba00, 0x2ab722d8, 0x386381cb, 0xacf6243a, 0x69befd7a, +0xe6a2e77f, 0xf0c720cd, 0xc4494816, 0xccf5c180, 0x38851640, 0x15b0a848, 0xe68b18cb, 0x4caadeff, +0x5f480a01, 0x0412b2aa, 0x259814fc, 0x41d0efe2, 0x4e40b48d, 0x248eb6fb, 0x8dba1cfe, 0x41a99b02, +0x1a550a04, 0xba8f65cb, 0x7251f4e7, 0x95a51725, 0xc106ecd7, 0x97a5980a, 0xc539b9aa, 0x4d79fe6a, +0xf2f3f763, 0x68af8040, 0xed0c9e56, 0x11b4958b, 0xe1eb5a88, 0x8709e6b0, 0xd7e07156, 0x4e29fea7, +0x6366e52d, 0x02d1c000, 0xc4ac8e05, 0x9377f571, 0x0c05372a, 0x578535f2, 0x2261be02, 0xd642a0c9, +0xdf13a280, 0x74b55bd2, 0x682199c0, 0xd421e5ec, 0x53fb3ce8, 0xc8adedb3, 0x28a87fc9, 0x3d959981, +0x5c1ff900, 0xfe38d399, 0x0c4eff0b, 0x062407ea, 0xaa2f4fb1, 0x4fb96976, 0x90c79505, 0xb0a8a774, +0xef55a1ff, 0xe59ca2c2, 0xa6b62d27, 0xe66a4263, 0xdf65001f, 0x0ec50966, 0xdfdd55bc, 0x29de0655, +0x911e739a, 0x17af8975, 0x32c7911c, 0x89f89468, 0x0d01e980, 0x524755f4, 0x03b63cc9, 0x0cc844b2, +0xbcf3f0aa, 0x87ac36e9, 0xe53a7426, 0x01b3d82b, 0x1a9e7449, 0x64ee2d7e, 0xcddbb1da, 0x01c94910, +0xb868bf80, 0x0d26f3fd, 0x9342ede7, 0x04a5c284, 0x636737b6, 0x50f5b616, 0xf24766e3, 0x8eca36c1, +0x136e05db, 0xfef18391, 0xfb887a37, 0xd6e7f7d4, 0xc7fb7dc9, 0x3063fcdf, 0xb6f589de, 0xec2941da, +0x26e46695, 0xb7566419, 0xf654efc5, 0xd08d58b7, 0x48925401, 0xc1bacb7f, 0xe5ff550f, 0xb6083049, +0x5bb5d0e8, 0x87d72e5a, 0xab6a6ee1, 0x223a66ce, 0xc62bf3cd, 0x9e0885f9, 0x68cb3e47, 0x086c010f, +0xa21de820, 0xd18b69de, 0xf3f65777, 0xfa02c3f6, 0x407edac3, 0xcbb3d550, 0x1793084d, 0xb0d70eba, +0x0ab378d5, 0xd951fb0c, 0xded7da56, 0x4124bbe4, 0x94ca0b56, 0x0f5755d1, 0xe0e1e56e, 0x6184b5be, +0x580a249f, 0x94f74bc0, 0xe327888e, 0x9f7b5561, 0xc3dc0280, 0x05687715, 0x646c6bd7, 0x44904db3, +0x66b4f0a3, 0xc0f1648a, 0x697ed5af, 0x49e92ff6, 0x309e374f, 0x2cb6356a, 0x85808573, 0x4991f840, +0x76f0ae02, 0x083be84d, 0x28421c9a, 0x44489406, 0x736e4cb8, 0xc1092910, 0x8bc95fc6, 0x7d869cf4, +0x134f616f, 0x2e77118d, 0xb31b2be1, 0xaa90b472, 0x3ca5d717, 0x7d161bba, 0x9cad9010, 0xaf462ba2, +0x9fe459d2, 0x45d34559, 0xd9f2da13, 0xdbc65487, 0xf3e4f94e, 0x176d486f, 0x097c13ea, 0x631da5c7, +0x445f7382, 0x175683f4, 0xcdc66a97, 0x70be0288, 0xb3cdcf72, 0x6e5dd2f3, 0x20936079, 0x459b80a5, +0xbe60e2db, 0xa9c23101, 0xeba5315c, 0x224e42f2, 0x1c5c1572, 0xf6721b2c, 0x1ad2fff3, 0x8c25404e, +0x324ed72f, 0x4067b7fd, 0x0523138e, 0x5ca3bc78, 0xdc0fd66e, 0x75922283, 0x784d6b17, 0x58ebb16e, +0x44094f85, 0x3f481d87, 0xfcfeae7b, 0x77b5ff76, 0x8c2302bf, 0xaaf47556, 0x5f46b02a, 0x2b092801, +0x3d38f5f7, 0x0ca81f36, 0x52af4a8a, 0x66d5e7c0, 0xdf3b0874, 0x95055110, 0x1b5ad7a8, 0xf61ed5ad, +0x6cf6e479, 0x20758184, 0xd0cefa65, 0x88f7be58, 0x4a046826, 0x0ff6f8f3, 0xa09c7f70, 0x5346aba0, +0x5ce96c28, 0xe176eda3, 0x6bac307f, 0x376829d2, 0x85360fa9, 0x17e3fe2a, 0x24b79767, 0xf5a96b20, +0xd6cd2595, 0x68ff1ebf, 0x7555442c, 0xf19f06be, 0xf9e0659a, 0xeeb9491d, 0x34010718, 0xbb30cab8, +0xe822fe15, 0x88570983, 0x750e6249, 0xda627e55, 0x5e76ffa8, 0xb1534546, 0x6d47de08, 0xefe9e7d4 +}; +static const u32 s6[256] = { +0xf6fa8f9d, 0x2cac6ce1, 0x4ca34867, 0xe2337f7c, 0x95db08e7, 0x016843b4, 0xeced5cbc, 0x325553ac, +0xbf9f0960, 0xdfa1e2ed, 0x83f0579d, 0x63ed86b9, 0x1ab6a6b8, 0xde5ebe39, 0xf38ff732, 0x8989b138, +0x33f14961, 0xc01937bd, 0xf506c6da, 0xe4625e7e, 0xa308ea99, 0x4e23e33c, 0x79cbd7cc, 0x48a14367, +0xa3149619, 0xfec94bd5, 0xa114174a, 0xeaa01866, 0xa084db2d, 0x09a8486f, 0xa888614a, 0x2900af98, +0x01665991, 0xe1992863, 0xc8f30c60, 0x2e78ef3c, 0xd0d51932, 0xcf0fec14, 0xf7ca07d2, 0xd0a82072, +0xfd41197e, 0x9305a6b0, 0xe86be3da, 0x74bed3cd, 0x372da53c, 0x4c7f4448, 0xdab5d440, 0x6dba0ec3, +0x083919a7, 0x9fbaeed9, 0x49dbcfb0, 0x4e670c53, 0x5c3d9c01, 0x64bdb941, 0x2c0e636a, 0xba7dd9cd, +0xea6f7388, 0xe70bc762, 0x35f29adb, 0x5c4cdd8d, 0xf0d48d8c, 0xb88153e2, 0x08a19866, 0x1ae2eac8, +0x284caf89, 0xaa928223, 0x9334be53, 0x3b3a21bf, 0x16434be3, 0x9aea3906, 0xefe8c36e, 0xf890cdd9, +0x80226dae, 0xc340a4a3, 0xdf7e9c09, 0xa694a807, 0x5b7c5ecc, 0x221db3a6, 0x9a69a02f, 0x68818a54, +0xceb2296f, 0x53c0843a, 0xfe893655, 0x25bfe68a, 0xb4628abc, 0xcf222ebf, 0x25ac6f48, 0xa9a99387, +0x53bddb65, 0xe76ffbe7, 0xe967fd78, 0x0ba93563, 0x8e342bc1, 0xe8a11be9, 0x4980740d, 0xc8087dfc, +0x8de4bf99, 0xa11101a0, 0x7fd37975, 0xda5a26c0, 0xe81f994f, 0x9528cd89, 0xfd339fed, 0xb87834bf, +0x5f04456d, 0x22258698, 0xc9c4c83b, 0x2dc156be, 0x4f628daa, 0x57f55ec5, 0xe2220abe, 0xd2916ebf, +0x4ec75b95, 0x24f2c3c0, 0x42d15d99, 0xcd0d7fa0, 0x7b6e27ff, 0xa8dc8af0, 0x7345c106, 0xf41e232f, +0x35162386, 0xe6ea8926, 0x3333b094, 0x157ec6f2, 0x372b74af, 0x692573e4, 0xe9a9d848, 0xf3160289, +0x3a62ef1d, 0xa787e238, 0xf3a5f676, 0x74364853, 0x20951063, 0x4576698d, 0xb6fad407, 0x592af950, +0x36f73523, 0x4cfb6e87, 0x7da4cec0, 0x6c152daa, 0xcb0396a8, 0xc50dfe5d, 0xfcd707ab, 0x0921c42f, +0x89dff0bb, 0x5fe2be78, 0x448f4f33, 0x754613c9, 0x2b05d08d, 0x48b9d585, 0xdc049441, 0xc8098f9b, +0x7dede786, 0xc39a3373, 0x42410005, 0x6a091751, 0x0ef3c8a6, 0x890072d6, 0x28207682, 0xa9a9f7be, +0xbf32679d, 0xd45b5b75, 0xb353fd00, 0xcbb0e358, 0x830f220a, 0x1f8fb214, 0xd372cf08, 0xcc3c4a13, +0x8cf63166, 0x061c87be, 0x88c98f88, 0x6062e397, 0x47cf8e7a, 0xb6c85283, 0x3cc2acfb, 0x3fc06976, +0x4e8f0252, 0x64d8314d, 0xda3870e3, 0x1e665459, 0xc10908f0, 0x513021a5, 0x6c5b68b7, 0x822f8aa0, +0x3007cd3e, 0x74719eef, 0xdc872681, 0x073340d4, 0x7e432fd9, 0x0c5ec241, 0x8809286c, 0xf592d891, +0x08a930f6, 0x957ef305, 0xb7fbffbd, 0xc266e96f, 0x6fe4ac98, 0xb173ecc0, 0xbc60b42a, 0x953498da, +0xfba1ae12, 0x2d4bd736, 0x0f25faab, 0xa4f3fceb, 0xe2969123, 0x257f0c3d, 0x9348af49, 0x361400bc, +0xe8816f4a, 0x3814f200, 0xa3f94043, 0x9c7a54c2, 0xbc704f57, 0xda41e7f9, 0xc25ad33a, 0x54f4a084, +0xb17f5505, 0x59357cbe, 0xedbd15c8, 0x7f97c5ab, 0xba5ac7b5, 0xb6f6deaf, 0x3a479c3a, 0x5302da25, +0x653d7e6a, 0x54268d49, 0x51a477ea, 0x5017d55b, 0xd7d25d88, 0x44136c76, 0x0404a8c8, 0xb8e5a121, +0xb81a928a, 0x60ed5869, 0x97c55b96, 0xeaec991b, 0x29935913, 0x01fdb7f1, 0x088e8dfa, 0x9ab6f6f5, +0x3b4cbf9f, 0x4a5de3ab, 0xe6051d35, 0xa0e1d855, 0xd36b4cf1, 0xf544edeb, 0xb0e93524, 0xbebb8fbd, +0xa2d762cf, 0x49c92f54, 0x38b5f331, 0x7128a454, 0x48392905, 0xa65b1db8, 0x851c97bd, 0xd675cf2f +}; +static const u32 s7[256] = { +0x85e04019, 0x332bf567, 0x662dbfff, 0xcfc65693, 0x2a8d7f6f, 0xab9bc912, 0xde6008a1, 0x2028da1f, +0x0227bce7, 0x4d642916, 0x18fac300, 0x50f18b82, 0x2cb2cb11, 0xb232e75c, 0x4b3695f2, 0xb28707de, +0xa05fbcf6, 0xcd4181e9, 0xe150210c, 0xe24ef1bd, 0xb168c381, 0xfde4e789, 0x5c79b0d8, 0x1e8bfd43, +0x4d495001, 0x38be4341, 0x913cee1d, 0x92a79c3f, 0x089766be, 0xbaeeadf4, 0x1286becf, 0xb6eacb19, +0x2660c200, 0x7565bde4, 0x64241f7a, 0x8248dca9, 0xc3b3ad66, 0x28136086, 0x0bd8dfa8, 0x356d1cf2, +0x107789be, 0xb3b2e9ce, 0x0502aa8f, 0x0bc0351e, 0x166bf52a, 0xeb12ff82, 0xe3486911, 0xd34d7516, +0x4e7b3aff, 0x5f43671b, 0x9cf6e037, 0x4981ac83, 0x334266ce, 0x8c9341b7, 0xd0d854c0, 0xcb3a6c88, +0x47bc2829, 0x4725ba37, 0xa66ad22b, 0x7ad61f1e, 0x0c5cbafa, 0x4437f107, 0xb6e79962, 0x42d2d816, +0x0a961288, 0xe1a5c06e, 0x13749e67, 0x72fc081a, 0xb1d139f7, 0xf9583745, 0xcf19df58, 0xbec3f756, +0xc06eba30, 0x07211b24, 0x45c28829, 0xc95e317f, 0xbc8ec511, 0x38bc46e9, 0xc6e6fa14, 0xbae8584a, +0xad4ebc46, 0x468f508b, 0x7829435f, 0xf124183b, 0x821dba9f, 0xaff60ff4, 0xea2c4e6d, 0x16e39264, +0x92544a8b, 0x009b4fc3, 0xaba68ced, 0x9ac96f78, 0x06a5b79a, 0xb2856e6e, 0x1aec3ca9, 0xbe838688, +0x0e0804e9, 0x55f1be56, 0xe7e5363b, 0xb3a1f25d, 0xf7debb85, 0x61fe033c, 0x16746233, 0x3c034c28, +0xda6d0c74, 0x79aac56c, 0x3ce4e1ad, 0x51f0c802, 0x98f8f35a, 0x1626a49f, 0xeed82b29, 0x1d382fe3, +0x0c4fb99a, 0xbb325778, 0x3ec6d97b, 0x6e77a6a9, 0xcb658b5c, 0xd45230c7, 0x2bd1408b, 0x60c03eb7, +0xb9068d78, 0xa33754f4, 0xf430c87d, 0xc8a71302, 0xb96d8c32, 0xebd4e7be, 0xbe8b9d2d, 0x7979fb06, +0xe7225308, 0x8b75cf77, 0x11ef8da4, 0xe083c858, 0x8d6b786f, 0x5a6317a6, 0xfa5cf7a0, 0x5dda0033, +0xf28ebfb0, 0xf5b9c310, 0xa0eac280, 0x08b9767a, 0xa3d9d2b0, 0x79d34217, 0x021a718d, 0x9ac6336a, +0x2711fd60, 0x438050e3, 0x069908a8, 0x3d7fedc4, 0x826d2bef, 0x4eeb8476, 0x488dcf25, 0x36c9d566, +0x28e74e41, 0xc2610aca, 0x3d49a9cf, 0xbae3b9df, 0xb65f8de6, 0x92aeaf64, 0x3ac7d5e6, 0x9ea80509, +0xf22b017d, 0xa4173f70, 0xdd1e16c3, 0x15e0d7f9, 0x50b1b887, 0x2b9f4fd5, 0x625aba82, 0x6a017962, +0x2ec01b9c, 0x15488aa9, 0xd716e740, 0x40055a2c, 0x93d29a22, 0xe32dbf9a, 0x058745b9, 0x3453dc1e, +0xd699296e, 0x496cff6f, 0x1c9f4986, 0xdfe2ed07, 0xb87242d1, 0x19de7eae, 0x053e561a, 0x15ad6f8c, +0x66626c1c, 0x7154c24c, 0xea082b2a, 0x93eb2939, 0x17dcb0f0, 0x58d4f2ae, 0x9ea294fb, 0x52cf564c, +0x9883fe66, 0x2ec40581, 0x763953c3, 0x01d6692e, 0xd3a0c108, 0xa1e7160e, 0xe4f2dfa6, 0x693ed285, +0x74904698, 0x4c2b0edd, 0x4f757656, 0x5d393378, 0xa132234f, 0x3d321c5d, 0xc3f5e194, 0x4b269301, +0xc79f022f, 0x3c997e7e, 0x5e4f9504, 0x3ffafbbd, 0x76f7ad0e, 0x296693f4, 0x3d1fce6f, 0xc61e45be, +0xd3b5ab34, 0xf72bf9b7, 0x1b0434c0, 0x4e72b567, 0x5592a33d, 0xb5229301, 0xcfd2a87f, 0x60aeb767, +0x1814386b, 0x30bcc33d, 0x38a0c07d, 0xfd1606f2, 0xc363519b, 0x589dd390, 0x5479f8e6, 0x1cb8d647, +0x97fd61a9, 0xea7759f4, 0x2d57539d, 0x569a58cf, 0xe84e63ad, 0x462e1b78, 0x6580f87e, 0xf3817914, +0x91da55f4, 0x40a230f3, 0xd1988f35, 0xb6e318d2, 0x3ffa50bc, 0x3d40f021, 0xc3c0bdae, 0x4958c24c, +0x518f36b2, 0x84b1d370, 0x0fedce83, 0x878ddada, 0xf2a279c7, 0x94e01be8, 0x90716f4b, 0x954b8aa3 +}; +static const u32 s8[256] = { +0xe216300d, 0xbbddfffc, 0xa7ebdabd, 0x35648095, 0x7789f8b7, 0xe6c1121b, 0x0e241600, 0x052ce8b5, +0x11a9cfb0, 0xe5952f11, 0xece7990a, 0x9386d174, 0x2a42931c, 0x76e38111, 0xb12def3a, 0x37ddddfc, +0xde9adeb1, 0x0a0cc32c, 0xbe197029, 0x84a00940, 0xbb243a0f, 0xb4d137cf, 0xb44e79f0, 0x049eedfd, +0x0b15a15d, 0x480d3168, 0x8bbbde5a, 0x669ded42, 0xc7ece831, 0x3f8f95e7, 0x72df191b, 0x7580330d, +0x94074251, 0x5c7dcdfa, 0xabbe6d63, 0xaa402164, 0xb301d40a, 0x02e7d1ca, 0x53571dae, 0x7a3182a2, +0x12a8ddec, 0xfdaa335d, 0x176f43e8, 0x71fb46d4, 0x38129022, 0xce949ad4, 0xb84769ad, 0x965bd862, +0x82f3d055, 0x66fb9767, 0x15b80b4e, 0x1d5b47a0, 0x4cfde06f, 0xc28ec4b8, 0x57e8726e, 0x647a78fc, +0x99865d44, 0x608bd593, 0x6c200e03, 0x39dc5ff6, 0x5d0b00a3, 0xae63aff2, 0x7e8bd632, 0x70108c0c, +0xbbd35049, 0x2998df04, 0x980cf42a, 0x9b6df491, 0x9e7edd53, 0x06918548, 0x58cb7e07, 0x3b74ef2e, +0x522fffb1, 0xd24708cc, 0x1c7e27cd, 0xa4eb215b, 0x3cf1d2e2, 0x19b47a38, 0x424f7618, 0x35856039, +0x9d17dee7, 0x27eb35e6, 0xc9aff67b, 0x36baf5b8, 0x09c467cd, 0xc18910b1, 0xe11dbf7b, 0x06cd1af8, +0x7170c608, 0x2d5e3354, 0xd4de495a, 0x64c6d006, 0xbcc0c62c, 0x3dd00db3, 0x708f8f34, 0x77d51b42, +0x264f620f, 0x24b8d2bf, 0x15c1b79e, 0x46a52564, 0xf8d7e54e, 0x3e378160, 0x7895cda5, 0x859c15a5, +0xe6459788, 0xc37bc75f, 0xdb07ba0c, 0x0676a3ab, 0x7f229b1e, 0x31842e7b, 0x24259fd7, 0xf8bef472, +0x835ffcb8, 0x6df4c1f2, 0x96f5b195, 0xfd0af0fc, 0xb0fe134c, 0xe2506d3d, 0x4f9b12ea, 0xf215f225, +0xa223736f, 0x9fb4c428, 0x25d04979, 0x34c713f8, 0xc4618187, 0xea7a6e98, 0x7cd16efc, 0x1436876c, +0xf1544107, 0xbedeee14, 0x56e9af27, 0xa04aa441, 0x3cf7c899, 0x92ecbae6, 0xdd67016d, 0x151682eb, +0xa842eedf, 0xfdba60b4, 0xf1907b75, 0x20e3030f, 0x24d8c29e, 0xe139673b, 0xefa63fb8, 0x71873054, +0xb6f2cf3b, 0x9f326442, 0xcb15a4cc, 0xb01a4504, 0xf1e47d8d, 0x844a1be5, 0xbae7dfdc, 0x42cbda70, +0xcd7dae0a, 0x57e85b7a, 0xd53f5af6, 0x20cf4d8c, 0xcea4d428, 0x79d130a4, 0x3486ebfb, 0x33d3cddc, +0x77853b53, 0x37effcb5, 0xc5068778, 0xe580b3e6, 0x4e68b8f4, 0xc5c8b37e, 0x0d809ea2, 0x398feb7c, +0x132a4f94, 0x43b7950e, 0x2fee7d1c, 0x223613bd, 0xdd06caa2, 0x37df932b, 0xc4248289, 0xacf3ebc3, +0x5715f6b7, 0xef3478dd, 0xf267616f, 0xc148cbe4, 0x9052815e, 0x5e410fab, 0xb48a2465, 0x2eda7fa4, +0xe87b40e4, 0xe98ea084, 0x5889e9e1, 0xefd390fc, 0xdd07d35b, 0xdb485694, 0x38d7e5b2, 0x57720101, +0x730edebc, 0x5b643113, 0x94917e4f, 0x503c2fba, 0x646f1282, 0x7523d24a, 0xe0779695, 0xf9c17a8f, +0x7a5b2121, 0xd187b896, 0x29263a4d, 0xba510cdf, 0x81f47c9f, 0xad1163ed, 0xea7b5965, 0x1a00726e, +0x11403092, 0x00da6d77, 0x4a0cdd61, 0xad1f4603, 0x605bdfb0, 0x9eedc364, 0x22ebe6a8, 0xcee7d28a, +0xa0e736a0, 0x5564a6b9, 0x10853209, 0xc7eb8f37, 0x2de705ca, 0x8951570f, 0xdf09822b, 0xbd691a6c, +0xaa12e4f2, 0x87451c0f, 0xe0f6a27a, 0x3ada4819, 0x4cf1764f, 0x0d771c2b, 0x67cdb156, 0x350d8384, +0x5938fa0f, 0x42399ef3, 0x36997b07, 0x0e84093d, 0x4aa93e61, 0x8360d87b, 0x1fa98b0c, 0x1149382c, +0xe97625a5, 0x0614d1b7, 0x0e25244b, 0x0c768347, 0x589e8d82, 0x0d2059d1, 0xa466bb1e, 0xf8da0a82, +0x04f19130, 0xba6e4ec0, 0x99265164, 0x1ee7230d, 0x50b2ad80, 0xeaee6801, 0x8db2a283, 0xea8bf59e +}; + + +#ifdef USE_AMD64_ASM + +/* Assembly implementations of CAST5. */ +extern void _gcry_cast5_amd64_encrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +extern void _gcry_cast5_amd64_decrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +/* These assembly implementations process four blocks in parallel. */ +extern void _gcry_cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_cast5_amd64_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_cast5_amd64_decrypt_block (context, outbuf, inbuf); +} + +static void +cast5_amd64_ctr_enc(CAST5_context *ctx, byte *out, const byte *in, byte *ctr) +{ + _gcry_cast5_amd64_ctr_enc (ctx, out, in, ctr); +} + +static void +cast5_amd64_cbc_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv) +{ + _gcry_cast5_amd64_cbc_dec (ctx, out, in, iv); +} + +static void +cast5_amd64_cfb_dec(CAST5_context *ctx, byte *out, const byte *in, byte *iv) +{ + _gcry_cast5_amd64_cfb_dec (ctx, out, in, iv); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (2*8); +} + +#elif defined(USE_ARM_ASM) + +/* ARM assembly implementations of CAST5. */ +extern void _gcry_cast5_arm_encrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +extern void _gcry_cast5_arm_decrypt_block(CAST5_context *c, byte *outbuf, + const byte *inbuf); + +/* These assembly implementations process two blocks in parallel. */ +extern void _gcry_cast5_arm_ctr_enc(CAST5_context *ctx, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_cast5_arm_cbc_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +extern void _gcry_cast5_arm_cfb_dec(CAST5_context *ctx, byte *out, + const byte *in, byte *iv); + +static void +do_encrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_cast5_arm_encrypt_block (context, outbuf, inbuf); +} + +static void +do_decrypt_block (CAST5_context *context, byte *outbuf, const byte *inbuf) +{ + _gcry_cast5_arm_decrypt_block (context, outbuf, inbuf); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (10*4); +} + +#else /*USE_ARM_ASM*/ + +#define F1(D,m,r) ( (I = ((m) + (D))), (I=rol(I,(r))), \ + (((s1[I >> 24] ^ s2[(I>>16)&0xff]) - s3[(I>>8)&0xff]) + s4[I&0xff]) ) +#define F2(D,m,r) ( (I = ((m) ^ (D))), (I=rol(I,(r))), \ + (((s1[I >> 24] - s2[(I>>16)&0xff]) + s3[(I>>8)&0xff]) ^ s4[I&0xff]) ) +#define F3(D,m,r) ( (I = ((m) - (D))), (I=rol(I,(r))), \ + (((s1[I >> 24] + s2[(I>>16)&0xff]) ^ s3[(I>>8)&0xff]) - s4[I&0xff]) ) + +static void +do_encrypt_block( CAST5_context *c, byte *outbuf, const byte *inbuf ) +{ + u32 l, r, t; + u32 I; /* used by the Fx macros */ + u32 *Km; + u32 Kr; + + Km = c->Km; + Kr = buf_get_le32(c->Kr + 0); + + /* (L0,R0) <-- (m1...m64). (Split the plaintext into left and + * right 32-bit halves L0 = m1...m32 and R0 = m33...m64.) + */ + l = buf_get_be32(inbuf + 0); + r = buf_get_be32(inbuf + 4); + + /* (16 rounds) for i from 1 to 16, compute Li and Ri as follows: + * Li = Ri-1; + * Ri = Li-1 ^ f(Ri-1,Kmi,Kri), where f is defined in Section 2.2 + * Rounds 1, 4, 7, 10, 13, and 16 use f function Type 1. + * Rounds 2, 5, 8, 11, and 14 use f function Type 2. + * Rounds 3, 6, 9, 12, and 15 use f function Type 3. + */ + + t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr = buf_get_le32(c->Kr + 4); + t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr = buf_get_le32(c->Kr + 8); + t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr = buf_get_le32(c->Kr + 12); + t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31); + + /* c1...c64 <-- (R16,L16). (Exchange final blocks L16, R16 and + * concatenate to form the ciphertext.) */ + buf_put_be32(outbuf + 0, r); + buf_put_be32(outbuf + 4, l); +} + +static unsigned int +encrypt_block (void *context , byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_encrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (20+4*sizeof(void*)); +} + + +static void +do_encrypt_block_3( CAST5_context *c, byte *outbuf, const byte *inbuf ) +{ + u32 l0, r0, t0, l1, r1, t1, l2, r2, t2; + u32 I; /* used by the Fx macros */ + u32 *Km; + u32 Kr; + + Km = c->Km; + Kr = buf_get_le32(c->Kr + 0); + + l0 = buf_get_be32(inbuf + 0); + r0 = buf_get_be32(inbuf + 4); + l1 = buf_get_be32(inbuf + 8); + r1 = buf_get_be32(inbuf + 12); + l2 = buf_get_be32(inbuf + 16); + r2 = buf_get_be32(inbuf + 20); + + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31); + Kr = buf_get_le32(c->Kr + 4); + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31); + Kr = buf_get_le32(c->Kr + 8); + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31); + Kr = buf_get_le32(c->Kr + 12); + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31); + + buf_put_be32(outbuf + 0, r0); + buf_put_be32(outbuf + 4, l0); + buf_put_be32(outbuf + 8, r1); + buf_put_be32(outbuf + 12, l1); + buf_put_be32(outbuf + 16, r2); + buf_put_be32(outbuf + 20, l2); +} + + +static void +do_decrypt_block (CAST5_context *c, byte *outbuf, const byte *inbuf ) +{ + u32 l, r, t; + u32 I; + u32 *Km; + u32 Kr; + + Km = c->Km; + Kr = buf_get_be32(c->Kr + 12); + + l = buf_get_be32(inbuf + 0); + r = buf_get_be32(inbuf + 4); + + t = l; l = r; r = t ^ F1(r, Km[15], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[14], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[13], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[12], Kr & 31); Kr = buf_get_be32(c->Kr + 8); + t = l; l = r; r = t ^ F3(r, Km[11], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[10], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[ 9], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[ 8], Kr & 31); Kr = buf_get_be32(c->Kr + 4); + t = l; l = r; r = t ^ F2(r, Km[ 7], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[ 6], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[ 5], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[ 4], Kr & 31); Kr = buf_get_be32(c->Kr + 0); + t = l; l = r; r = t ^ F1(r, Km[ 3], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F3(r, Km[ 2], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F2(r, Km[ 1], Kr & 31); Kr >>= 8; + t = l; l = r; r = t ^ F1(r, Km[ 0], Kr & 31); + + buf_put_be32(outbuf + 0, r); + buf_put_be32(outbuf + 4, l); +} + +static unsigned int +decrypt_block (void *context, byte *outbuf, const byte *inbuf) +{ + CAST5_context *c = (CAST5_context *) context; + do_decrypt_block (c, outbuf, inbuf); + return /*burn_stack*/ (20+4*sizeof(void*)); +} + + +static void +do_decrypt_block_3 (CAST5_context *c, byte *outbuf, const byte *inbuf ) +{ + u32 l0, r0, t0, l1, r1, t1, l2, r2, t2; + u32 I; + u32 *Km; + u32 Kr; + + Km = c->Km; + Kr = buf_get_be32(c->Kr + 12); + + l0 = buf_get_be32(inbuf + 0); + r0 = buf_get_be32(inbuf + 4); + l1 = buf_get_be32(inbuf + 8); + r1 = buf_get_be32(inbuf + 12); + l2 = buf_get_be32(inbuf + 16); + r2 = buf_get_be32(inbuf + 20); + + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[15], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[15], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[15], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[14], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[14], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[14], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[13], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[13], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[13], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[12], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[12], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[12], Kr & 31); + Kr = buf_get_be32(c->Kr + 8); + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[11], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[11], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[11], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[10], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[10], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[10], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 9], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 9], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 9], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 8], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 8], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 8], Kr & 31); + Kr = buf_get_be32(c->Kr + 4); + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 7], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 7], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 7], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 6], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 6], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 6], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 5], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 5], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 5], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 4], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 4], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 4], Kr & 31); + Kr = buf_get_be32(c->Kr + 0); + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 3], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 3], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 3], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F3(r0, Km[ 2], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F3(r1, Km[ 2], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F3(r2, Km[ 2], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F2(r0, Km[ 1], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F2(r1, Km[ 1], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F2(r2, Km[ 1], Kr & 31); + Kr >>= 8; + t0 = l0; l0 = r0; r0 = t0 ^ F1(r0, Km[ 0], Kr & 31); + t1 = l1; l1 = r1; r1 = t1 ^ F1(r1, Km[ 0], Kr & 31); + t2 = l2; l2 = r2; r2 = t2 ^ F1(r2, Km[ 0], Kr & 31); + + buf_put_be32(outbuf + 0, r0); + buf_put_be32(outbuf + 4, l0); + buf_put_be32(outbuf + 8, r1); + buf_put_be32(outbuf + 12, l1); + buf_put_be32(outbuf + 16, r2); + buf_put_be32(outbuf + 20, l2); +} + +#endif /*!USE_ARM_ASM*/ + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size CAST5_BLOCKSIZE. */ +static void +_gcry_cast5_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + CAST5_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[CAST5_BLOCKSIZE * 3]; + int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 8 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + cast5_amd64_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 4; + outbuf += 4 * CAST5_BLOCKSIZE; + inbuf += 4 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_cast5_arm_ctr_enc(ctx, outbuf, inbuf, ctr); + + nblocks -= 2; + outbuf += 2 * CAST5_BLOCKSIZE; + inbuf += 2 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) + for ( ;nblocks >= 3; nblocks -= 3) + { + /* Prepare the counter blocks. */ + cipher_block_cpy (tmpbuf + 0, ctr, CAST5_BLOCKSIZE); + cipher_block_cpy (tmpbuf + 8, ctr, CAST5_BLOCKSIZE); + cipher_block_cpy (tmpbuf + 16, ctr, CAST5_BLOCKSIZE); + cipher_block_add (tmpbuf + 8, 1, CAST5_BLOCKSIZE); + cipher_block_add (tmpbuf + 16, 2, CAST5_BLOCKSIZE); + cipher_block_add (ctr, 3, CAST5_BLOCKSIZE); + /* Encrypt the counter. */ + do_encrypt_block_3(ctx, tmpbuf, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + buf_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE * 3); + outbuf += CAST5_BLOCKSIZE * 3; + inbuf += CAST5_BLOCKSIZE * 3; + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + do_encrypt_block(ctx, tmpbuf, ctr); + /* XOR the input with the encrypted counter and store in output. */ + cipher_block_xor(outbuf, tmpbuf, inbuf, CAST5_BLOCKSIZE); + outbuf += CAST5_BLOCKSIZE; + inbuf += CAST5_BLOCKSIZE; + /* Increment the counter. */ + cipher_block_add (ctr, 1, CAST5_BLOCKSIZE); + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_cast5_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + CAST5_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[CAST5_BLOCKSIZE * 3]; + int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 8 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + cast5_amd64_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * CAST5_BLOCKSIZE; + inbuf += 4 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_cast5_arm_cbc_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * CAST5_BLOCKSIZE; + inbuf += 2 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) + for ( ;nblocks >= 3; nblocks -= 3) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + do_decrypt_block_3 (ctx, savebuf, inbuf); + + cipher_block_xor_1 (savebuf + 0, iv, CAST5_BLOCKSIZE); + cipher_block_xor_1 (savebuf + 8, inbuf, CAST5_BLOCKSIZE * 2); + cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE); + buf_cpy (outbuf, savebuf, CAST5_BLOCKSIZE * 3); + inbuf += CAST5_BLOCKSIZE * 3; + outbuf += CAST5_BLOCKSIZE * 3; + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + do_decrypt_block (ctx, savebuf, inbuf); + + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, CAST5_BLOCKSIZE); + inbuf += CAST5_BLOCKSIZE; + outbuf += CAST5_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_cast5_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + CAST5_context *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[CAST5_BLOCKSIZE * 3]; + int burn_stack_depth = (20 + 4 * sizeof(void*)) + 4 * CAST5_BLOCKSIZE; + +#ifdef USE_AMD64_ASM + { + if (nblocks >= 4) + burn_stack_depth += 8 * sizeof(void*); + + /* Process data in 4 block chunks. */ + while (nblocks >= 4) + { + cast5_amd64_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 4; + outbuf += 4 * CAST5_BLOCKSIZE; + inbuf += 4 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#elif defined(USE_ARM_ASM) + { + /* Process data in 2 block chunks. */ + while (nblocks >= 2) + { + _gcry_cast5_arm_cfb_dec(ctx, outbuf, inbuf, iv); + + nblocks -= 2; + outbuf += 2 * CAST5_BLOCKSIZE; + inbuf += 2 * CAST5_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + +#if !defined(USE_AMD64_ASM) && !defined(USE_ARM_ASM) + for ( ;nblocks >= 3; nblocks -= 3 ) + { + cipher_block_cpy (tmpbuf + 0, iv, CAST5_BLOCKSIZE); + cipher_block_cpy (tmpbuf + 8, inbuf + 0, CAST5_BLOCKSIZE * 2); + cipher_block_cpy (iv, inbuf + 16, CAST5_BLOCKSIZE); + do_encrypt_block_3 (ctx, tmpbuf, tmpbuf); + buf_xor (outbuf, inbuf, tmpbuf, CAST5_BLOCKSIZE * 3); + outbuf += CAST5_BLOCKSIZE * 3; + inbuf += CAST5_BLOCKSIZE * 3; + } +#endif + + for ( ;nblocks; nblocks-- ) + { + do_encrypt_block(ctx, iv, iv); + cipher_block_xor_n_copy(outbuf, iv, inbuf, CAST5_BLOCKSIZE); + outbuf += CAST5_BLOCKSIZE; + inbuf += CAST5_BLOCKSIZE; + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Run the self-tests for CAST5-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 4+1; + const int blocksize = CAST5_BLOCKSIZE; + const int context_size = sizeof(CAST5_context); + + return _gcry_selftest_helper_ctr("CAST5", &cast_setkey, + &encrypt_block, nblocks, blocksize, context_size); +} + + +/* Run the self-tests for CAST5-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 4+2; + const int blocksize = CAST5_BLOCKSIZE; + const int context_size = sizeof(CAST5_context); + + return _gcry_selftest_helper_cbc("CAST5", &cast_setkey, + &encrypt_block, nblocks, blocksize, context_size); +} + + +/* Run the self-tests for CAST5-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 4+2; + const int blocksize = CAST5_BLOCKSIZE; + const int context_size = sizeof(CAST5_context); + + return _gcry_selftest_helper_cfb("CAST5", &cast_setkey, + &encrypt_block, nblocks, blocksize, context_size); +} + + +static const char* +selftest(void) +{ + CAST5_context c; + cipher_bulk_ops_t bulk_ops; + static const byte key[16] = + { 0x01, 0x23, 0x45, 0x67, 0x12, 0x34, 0x56, 0x78, + 0x23, 0x45, 0x67, 0x89, 0x34, 0x56, 0x78, 0x9A }; + static const byte plain[8] = + { 0x01, 0x23, 0x45, 0x67, 0x89, 0xAB, 0xCD, 0xEF }; + static const byte cipher[8] = + { 0x23, 0x8B, 0x4F, 0xE5, 0x84, 0x7E, 0x44, 0xB2 }; + byte buffer[8]; + const char *r; + + cast_setkey( &c, key, 16, &bulk_ops ); + encrypt_block( &c, buffer, plain ); + if( memcmp( buffer, cipher, 8 ) ) + return "1"; + decrypt_block( &c, buffer, buffer ); + if( memcmp( buffer, plain, 8 ) ) + return "2"; + +#if 0 /* full maintenance test */ + { + int i; + byte a0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78, + 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A }; + byte b0[16] = { 0x01,0x23,0x45,0x67,0x12,0x34,0x56,0x78, + 0x23,0x45,0x67,0x89,0x34,0x56,0x78,0x9A }; + byte a1[16] = { 0xEE,0xA9,0xD0,0xA2,0x49,0xFD,0x3B,0xA6, + 0xB3,0x43,0x6F,0xB8,0x9D,0x6D,0xCA,0x92 }; + byte b1[16] = { 0xB2,0xC9,0x5E,0xB0,0x0C,0x31,0xAD,0x71, + 0x80,0xAC,0x05,0xB8,0xE8,0x3D,0x69,0x6E }; + + for(i=0; i < 1000000; i++ ) { + cast_setkey( &c, b0, 16, &bulk_ops ); + encrypt_block( &c, a0, a0 ); + encrypt_block( &c, a0+8, a0+8 ); + cast_setkey( &c, a0, 16, &bulk_ops ); + encrypt_block( &c, b0, b0 ); + encrypt_block( &c, b0+8, b0+8 ); + } + if( memcmp( a0, a1, 16 ) || memcmp( b0, b1, 16 ) ) + return "3"; + + } +#endif + + if ( (r = selftest_cbc ()) ) + return r; + + if ( (r = selftest_cfb ()) ) + return r; + + if ( (r = selftest_ctr ()) ) + return r; + + return NULL; +} + + +static void +key_schedule( u32 *x, u32 *z, u32 *k ) +{ + +#define xi(i) ((x[(i)/4] >> (8*(3-((i)%4)))) & 0xff) +#define zi(i) ((z[(i)/4] >> (8*(3-((i)%4)))) & 0xff) + + z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)]; + z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)]; + z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)]; + z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)]; + k[0] = s5[zi( 8)]^s6[zi( 9)]^s7[zi( 7)]^s8[zi( 6)]^s5[zi( 2)]; + k[1] = s5[zi(10)]^s6[zi(11)]^s7[zi( 5)]^s8[zi( 4)]^s6[zi( 6)]; + k[2] = s5[zi(12)]^s6[zi(13)]^s7[zi( 3)]^s8[zi( 2)]^s7[zi( 9)]; + k[3] = s5[zi(14)]^s6[zi(15)]^s7[zi( 1)]^s8[zi( 0)]^s8[zi(12)]; + + x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)]; + x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)]; + x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)]; + x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)]; + k[4] = s5[xi( 3)]^s6[xi( 2)]^s7[xi(12)]^s8[xi(13)]^s5[xi( 8)]; + k[5] = s5[xi( 1)]^s6[xi( 0)]^s7[xi(14)]^s8[xi(15)]^s6[xi(13)]; + k[6] = s5[xi( 7)]^s6[xi( 6)]^s7[xi( 8)]^s8[xi( 9)]^s7[xi( 3)]; + k[7] = s5[xi( 5)]^s6[xi( 4)]^s7[xi(10)]^s8[xi(11)]^s8[xi( 7)]; + + z[0] = x[0] ^ s5[xi(13)]^s6[xi(15)]^s7[xi(12)]^s8[xi(14)]^s7[xi( 8)]; + z[1] = x[2] ^ s5[zi( 0)]^s6[zi( 2)]^s7[zi( 1)]^s8[zi( 3)]^s8[xi(10)]; + z[2] = x[3] ^ s5[zi( 7)]^s6[zi( 6)]^s7[zi( 5)]^s8[zi( 4)]^s5[xi( 9)]; + z[3] = x[1] ^ s5[zi(10)]^s6[zi( 9)]^s7[zi(11)]^s8[zi( 8)]^s6[xi(11)]; + k[8] = s5[zi( 3)]^s6[zi( 2)]^s7[zi(12)]^s8[zi(13)]^s5[zi( 9)]; + k[9] = s5[zi( 1)]^s6[zi( 0)]^s7[zi(14)]^s8[zi(15)]^s6[zi(12)]; + k[10]= s5[zi( 7)]^s6[zi( 6)]^s7[zi( 8)]^s8[zi( 9)]^s7[zi( 2)]; + k[11]= s5[zi( 5)]^s6[zi( 4)]^s7[zi(10)]^s8[zi(11)]^s8[zi( 6)]; + + x[0] = z[2] ^ s5[zi( 5)]^s6[zi( 7)]^s7[zi( 4)]^s8[zi( 6)]^s7[zi( 0)]; + x[1] = z[0] ^ s5[xi( 0)]^s6[xi( 2)]^s7[xi( 1)]^s8[xi( 3)]^s8[zi( 2)]; + x[2] = z[1] ^ s5[xi( 7)]^s6[xi( 6)]^s7[xi( 5)]^s8[xi( 4)]^s5[zi( 1)]; + x[3] = z[3] ^ s5[xi(10)]^s6[xi( 9)]^s7[xi(11)]^s8[xi( 8)]^s6[zi( 3)]; + k[12]= s5[xi( 8)]^s6[xi( 9)]^s7[xi( 7)]^s8[xi( 6)]^s5[xi( 3)]; + k[13]= s5[xi(10)]^s6[xi(11)]^s7[xi( 5)]^s8[xi( 4)]^s6[xi( 7)]; + k[14]= s5[xi(12)]^s6[xi(13)]^s7[xi( 3)]^s8[xi( 2)]^s7[xi( 8)]; + k[15]= s5[xi(14)]^s6[xi(15)]^s7[xi( 1)]^s8[xi( 0)]^s8[xi(13)]; + +#undef xi +#undef zi +} + + +static gcry_err_code_t +do_cast_setkey( CAST5_context *c, const byte *key, unsigned keylen ) +{ + static int initialized; + static const char* selftest_failed; + int i; + u32 x[4]; + u32 z[4]; + u32 k[16]; + + if( !initialized ) + { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error ("CAST5 selftest failed (%s).\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + if( keylen != 16 ) + return GPG_ERR_INV_KEYLEN; + + x[0] = buf_get_be32(key + 0); + x[1] = buf_get_be32(key + 4); + x[2] = buf_get_be32(key + 8); + x[3] = buf_get_be32(key + 12); + + key_schedule( x, z, k ); + for(i=0; i < 16; i++ ) + c->Km[i] = k[i]; + key_schedule( x, z, k ); + for(i=0; i < 16; i++ ) + c->Kr[i] = k[i] & 0x1f; + +#ifdef USE_ARM_ASM + for (i = 0; i < 4; i++) + { + byte Kr_arm[4]; + + /* Convert rotate left to rotate right and add shift left + * by 2. */ + Kr_arm[0] = ((32 - c->Kr[4 * i + 0]) - 2) & 0x1f; + Kr_arm[1] = ((32 - c->Kr[4 * i + 1]) - 2) & 0x1f; + Kr_arm[2] = ((32 - c->Kr[4 * i + 2]) - 2) & 0x1f; + Kr_arm[3] = ((32 - c->Kr[4 * i + 3]) - 2) & 0x1f; + + /* Endian friendly store. */ + c->Kr_arm_enc[i] = Kr_arm[0] | + (Kr_arm[1] << 8) | + (Kr_arm[2] << 16) | + (Kr_arm[3] << 24); + c->Kr_arm_dec[i] = Kr_arm[3] | + (Kr_arm[2] << 8) | + (Kr_arm[1] << 16) | + (Kr_arm[0] << 24); + + wipememory(Kr_arm, sizeof(Kr_arm)); + } +#endif + + wipememory(x, sizeof x); + wipememory(z, sizeof z); + wipememory(k, sizeof k); + +#undef xi +#undef zi + return GPG_ERR_NO_ERROR; +} + +static gcry_err_code_t +cast_setkey (void *context, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops) +{ + CAST5_context *c = (CAST5_context *) context; + gcry_err_code_t rc = do_cast_setkey (c, key, keylen); + + /* Setup bulk encryption routines. */ + memset (bulk_ops, 0, sizeof(*bulk_ops)); + bulk_ops->cfb_dec = _gcry_cast5_cfb_dec; + bulk_ops->cbc_dec = _gcry_cast5_cbc_dec; + bulk_ops->ctr_enc = _gcry_cast5_ctr_enc; + + return rc; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_cast5 = + { + GCRY_CIPHER_CAST5, {0, 0}, + "CAST5", NULL, NULL, CAST5_BLOCKSIZE, 128, sizeof (CAST5_context), + cast_setkey, encrypt_block, decrypt_block + }; diff --git a/comm/third_party/libgcrypt/cipher/chacha20-aarch64.S b/comm/third_party/libgcrypt/cipher/chacha20-aarch64.S new file mode 100644 index 0000000000..b8f9724a37 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20-aarch64.S @@ -0,0 +1,648 @@ +/* chacha20-aarch64.S - ARMv8/AArch64 accelerated chacha20 blocks function + * + * Copyright (C) 2017-2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* + * Based on D. J. Bernstein reference implementation at + * http://cr.yp.to/chacha.html: + * + * chacha-regs.c version 20080118 + * D. J. Bernstein + * Public domain. + */ + +#include "asm-common-aarch64.h" + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) && \ + defined(USE_CHACHA20) + +.cpu generic+simd + +.text + +#include "asm-poly1305-aarch64.h" + +/* register macros */ +#define INPUT x0 +#define DST x1 +#define SRC x2 +#define NBLKS x3 +#define ROUND x4 +#define INPUT_CTR x5 +#define INPUT_POS x6 +#define CTR x7 + +/* vector registers */ +#define X0 v16 +#define X1 v17 +#define X2 v18 +#define X3 v19 +#define X4 v20 +#define X5 v21 +#define X6 v22 +#define X7 v23 +#define X8 v24 +#define X9 v25 +#define X10 v26 +#define X11 v27 +#define X12 v28 +#define X13 v29 +#define X14 v30 +#define X15 v31 + +#define VCTR v0 +#define VTMP0 v1 +#define VTMP1 v2 +#define VTMP2 v3 +#define VTMP3 v4 +#define X12_TMP v5 +#define X13_TMP v6 +#define ROT8 v7 + +/********************************************************************** + helper macros + **********************************************************************/ + +#define _(...) __VA_ARGS__ + +#define vpunpckldq(s1, s2, dst) \ + zip1 dst.4s, s2.4s, s1.4s; + +#define vpunpckhdq(s1, s2, dst) \ + zip2 dst.4s, s2.4s, s1.4s; + +#define vpunpcklqdq(s1, s2, dst) \ + zip1 dst.2d, s2.2d, s1.2d; + +#define vpunpckhqdq(s1, s2, dst) \ + zip2 dst.2d, s2.2d, s1.2d; + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ + vpunpckhdq(x1, x0, t2); \ + vpunpckldq(x1, x0, x0); \ + \ + vpunpckldq(x3, x2, t1); \ + vpunpckhdq(x3, x2, x2); \ + \ + vpunpckhqdq(t1, x0, x1); \ + vpunpcklqdq(t1, x0, x0); \ + \ + vpunpckhqdq(x2, t2, x3); \ + vpunpcklqdq(x2, t2, x2); + +#define clear(x) \ + eor x.16b, x.16b, x.16b; + +/********************************************************************** + 4-way chacha20 + **********************************************************************/ + +#define XOR(d,s1,s2) \ + eor d.16b, s2.16b, s1.16b; + +#define PLUS(ds,s) \ + add ds.4s, ds.4s, s.4s; + +#define ROTATE4(dst1,dst2,dst3,dst4,c,src1,src2,src3,src4,iop1,iop2,iop3) \ + shl dst1.4s, src1.4s, #(c); \ + shl dst2.4s, src2.4s, #(c); \ + iop1; \ + shl dst3.4s, src3.4s, #(c); \ + shl dst4.4s, src4.4s, #(c); \ + iop2; \ + sri dst1.4s, src1.4s, #(32 - (c)); \ + sri dst2.4s, src2.4s, #(32 - (c)); \ + iop3; \ + sri dst3.4s, src3.4s, #(32 - (c)); \ + sri dst4.4s, src4.4s, #(32 - (c)); + +#define ROTATE4_8(dst1,dst2,dst3,dst4,src1,src2,src3,src4,iop1,iop2,iop3) \ + tbl dst1.16b, {src1.16b}, ROT8.16b; \ + iop1; \ + tbl dst2.16b, {src2.16b}, ROT8.16b; \ + iop2; \ + tbl dst3.16b, {src3.16b}, ROT8.16b; \ + iop3; \ + tbl dst4.16b, {src4.16b}, ROT8.16b; + +#define ROTATE4_16(dst1,dst2,dst3,dst4,src1,src2,src3,src4,iop1) \ + rev32 dst1.8h, src1.8h; \ + rev32 dst2.8h, src2.8h; \ + iop1; \ + rev32 dst3.8h, src3.8h; \ + rev32 dst4.8h, src4.8h; + +#define QUARTERROUND4(a1,b1,c1,d1,a2,b2,c2,d2,a3,b3,c3,d3,a4,b4,c4,d4,ign,tmp1,tmp2,tmp3,tmp4,\ + iop1,iop2,iop3,iop4,iop5,iop6,iop7,iop8,iop9,iop10,iop11,iop12,iop13,iop14,\ + iop15,iop16,iop17,iop18,iop19,iop20,iop21,iop22,iop23,iop24,iop25,iop26,\ + iop27,iop28,iop29) \ + PLUS(a1,b1); PLUS(a2,b2); iop1; \ + PLUS(a3,b3); PLUS(a4,b4); iop2; \ + XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); iop3; \ + XOR(tmp3,d3,a3); XOR(tmp4,d4,a4); iop4; \ + ROTATE4_16(d1, d2, d3, d4, tmp1, tmp2, tmp3, tmp4, _(iop5)); \ + iop6; \ + PLUS(c1,d1); PLUS(c2,d2); iop7; \ + PLUS(c3,d3); PLUS(c4,d4); iop8; \ + XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); iop9; \ + XOR(tmp3,b3,c3); XOR(tmp4,b4,c4); iop10; \ + ROTATE4(b1, b2, b3, b4, 12, tmp1, tmp2, tmp3, tmp4, \ + _(iop11), _(iop12), _(iop13)); iop14; \ + PLUS(a1,b1); PLUS(a2,b2); iop15; \ + PLUS(a3,b3); PLUS(a4,b4); iop16; \ + XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); iop17; \ + XOR(tmp3,d3,a3); XOR(tmp4,d4,a4); iop18; \ + ROTATE4_8(d1, d2, d3, d4, tmp1, tmp2, tmp3, tmp4, \ + _(iop19), _(iop20), _(iop21)); iop22; \ + PLUS(c1,d1); PLUS(c2,d2); iop23; \ + PLUS(c3,d3); PLUS(c4,d4); iop24; \ + XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); iop25; \ + XOR(tmp3,b3,c3); XOR(tmp4,b4,c4); iop26; \ + ROTATE4(b1, b2, b3, b4, 7, tmp1, tmp2, tmp3, tmp4, \ + _(iop27), _(iop28), _(iop29)); + +.align 4 +.globl _gcry_chacha20_aarch64_blocks4_data_inc_counter +_gcry_chacha20_aarch64_blocks4_data_inc_counter: + .long 0,1,2,3 + +.align 4 +.globl _gcry_chacha20_aarch64_blocks4_data_rot8 +_gcry_chacha20_aarch64_blocks4_data_rot8: + .byte 3,0,1,2 + .byte 7,4,5,6 + .byte 11,8,9,10 + .byte 15,12,13,14 + +.align 3 +.globl _gcry_chacha20_aarch64_blocks4 +ELF(.type _gcry_chacha20_aarch64_blocks4,%function;) + +_gcry_chacha20_aarch64_blocks4: + /* input: + * x0: input + * x1: dst + * x2: src + * x3: nblks (multiple of 4) + */ + CFI_STARTPROC() + + GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8); + add INPUT_CTR, INPUT, #(12*4); + ld1 {ROT8.16b}, [CTR]; + GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter); + mov INPUT_POS, INPUT; + ld1 {VCTR.16b}, [CTR]; + +.Loop4: + /* Construct counter vectors X12 and X13 */ + + ld1 {X15.16b}, [INPUT_CTR]; + mov ROUND, #20; + ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS]; + + dup X12.4s, X15.s[0]; + dup X13.4s, X15.s[1]; + ldr CTR, [INPUT_CTR]; + add X12.4s, X12.4s, VCTR.4s; + dup X0.4s, VTMP1.s[0]; + dup X1.4s, VTMP1.s[1]; + dup X2.4s, VTMP1.s[2]; + dup X3.4s, VTMP1.s[3]; + dup X14.4s, X15.s[2]; + cmhi VTMP0.4s, VCTR.4s, X12.4s; + dup X15.4s, X15.s[3]; + add CTR, CTR, #4; /* Update counter */ + dup X4.4s, VTMP2.s[0]; + dup X5.4s, VTMP2.s[1]; + dup X6.4s, VTMP2.s[2]; + dup X7.4s, VTMP2.s[3]; + sub X13.4s, X13.4s, VTMP0.4s; + dup X8.4s, VTMP3.s[0]; + dup X9.4s, VTMP3.s[1]; + dup X10.4s, VTMP3.s[2]; + dup X11.4s, VTMP3.s[3]; + mov X12_TMP.16b, X12.16b; + mov X13_TMP.16b, X13.16b; + str CTR, [INPUT_CTR]; + +.Lround2: + subs ROUND, ROUND, #2 + QUARTERROUND4(X0, X4, X8, X12, X1, X5, X9, X13, + X2, X6, X10, X14, X3, X7, X11, X15, + tmp:=,VTMP0,VTMP1,VTMP2,VTMP3, + ,,,,,,,,,,,,,,,,,,,,,,,,,,,,) + QUARTERROUND4(X0, X5, X10, X15, X1, X6, X11, X12, + X2, X7, X8, X13, X3, X4, X9, X14, + tmp:=,VTMP0,VTMP1,VTMP2,VTMP3, + ,,,,,,,,,,,,,,,,,,,,,,,,,,,,) + b.ne .Lround2; + + ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32; + + PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */ + PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */ + + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */ + PLUS(X0, VTMP2); + PLUS(X1, VTMP3); + PLUS(X2, X12_TMP); + PLUS(X3, X13_TMP); + + dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */ + dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */ + dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */ + dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */ + ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS]; + mov INPUT_POS, INPUT; + PLUS(X4, VTMP2); + PLUS(X5, VTMP3); + PLUS(X6, X12_TMP); + PLUS(X7, X13_TMP); + + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */ + dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */ + dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */ + PLUS(X8, VTMP2); + PLUS(X9, VTMP3); + PLUS(X10, X12_TMP); + PLUS(X11, X13_TMP); + PLUS(X14, VTMP0); + PLUS(X15, VTMP1); + + transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2); + transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2); + transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2); + transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2); + + subs NBLKS, NBLKS, #4; + + ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; + ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32; + eor VTMP0.16b, X0.16b, VTMP0.16b; + eor VTMP1.16b, X4.16b, VTMP1.16b; + eor VTMP2.16b, X8.16b, VTMP2.16b; + eor VTMP3.16b, X12.16b, VTMP3.16b; + eor X12_TMP.16b, X1.16b, X12_TMP.16b; + eor X13_TMP.16b, X5.16b, X13_TMP.16b; + st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; + ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; + st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32; + ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32; + eor VTMP0.16b, X9.16b, VTMP0.16b; + eor VTMP1.16b, X13.16b, VTMP1.16b; + eor VTMP2.16b, X2.16b, VTMP2.16b; + eor VTMP3.16b, X6.16b, VTMP3.16b; + eor X12_TMP.16b, X10.16b, X12_TMP.16b; + eor X13_TMP.16b, X14.16b, X13_TMP.16b; + st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; + ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; + st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32; + eor VTMP0.16b, X3.16b, VTMP0.16b; + eor VTMP1.16b, X7.16b, VTMP1.16b; + eor VTMP2.16b, X11.16b, VTMP2.16b; + eor VTMP3.16b, X15.16b, VTMP3.16b; + st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; + + b.ne .Loop4; + + /* clear the used vector registers and stack */ + clear(VTMP0); + clear(VTMP1); + clear(VTMP2); + clear(VTMP3); + clear(X12_TMP); + clear(X13_TMP); + clear(X0); + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X8); + clear(X9); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + clear(X14); + clear(X15); + + eor x0, x0, x0 + ret + CFI_ENDPROC() +ELF(.size _gcry_chacha20_aarch64_blocks4, .-_gcry_chacha20_aarch64_blocks4;) + +/********************************************************************** + 4-way stitched chacha20-poly1305 + **********************************************************************/ + +.align 3 +.globl _gcry_chacha20_poly1305_aarch64_blocks4 +ELF(.type _gcry_chacha20_poly1305_aarch64_blocks4,%function;) + +_gcry_chacha20_poly1305_aarch64_blocks4: + /* input: + * x0: input + * x1: dst + * x2: src + * x3: nblks (multiple of 4) + * x4: poly1305-state + * x5: poly1305-src + */ + CFI_STARTPROC() + POLY1305_PUSH_REGS() + + mov POLY_RSTATE, x4; + mov POLY_RSRC, x5; + + GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_rot8); + add INPUT_CTR, INPUT, #(12*4); + ld1 {ROT8.16b}, [CTR]; + GET_DATA_POINTER(CTR, _gcry_chacha20_aarch64_blocks4_data_inc_counter); + mov INPUT_POS, INPUT; + ld1 {VCTR.16b}, [CTR]; + + POLY1305_LOAD_STATE() + +.Loop_poly4: + /* Construct counter vectors X12 and X13 */ + + ld1 {X15.16b}, [INPUT_CTR]; + ld1 {VTMP1.16b-VTMP3.16b}, [INPUT_POS]; + + dup X12.4s, X15.s[0]; + dup X13.4s, X15.s[1]; + ldr CTR, [INPUT_CTR]; + add X12.4s, X12.4s, VCTR.4s; + dup X0.4s, VTMP1.s[0]; + dup X1.4s, VTMP1.s[1]; + dup X2.4s, VTMP1.s[2]; + dup X3.4s, VTMP1.s[3]; + dup X14.4s, X15.s[2]; + cmhi VTMP0.4s, VCTR.4s, X12.4s; + dup X15.4s, X15.s[3]; + add CTR, CTR, #4; /* Update counter */ + dup X4.4s, VTMP2.s[0]; + dup X5.4s, VTMP2.s[1]; + dup X6.4s, VTMP2.s[2]; + dup X7.4s, VTMP2.s[3]; + sub X13.4s, X13.4s, VTMP0.4s; + dup X8.4s, VTMP3.s[0]; + dup X9.4s, VTMP3.s[1]; + dup X10.4s, VTMP3.s[2]; + dup X11.4s, VTMP3.s[3]; + mov X12_TMP.16b, X12.16b; + mov X13_TMP.16b, X13.16b; + str CTR, [INPUT_CTR]; + + mov ROUND, #20 +.Lround4_with_poly1305_outer: + mov POLY_CHACHA_ROUND, #6; +.Lround4_with_poly1305_inner1: + POLY1305_BLOCK_PART1(0 * 16) + QUARTERROUND4(X0, X4, X8, X12, X1, X5, X9, X13, + X2, X6, X10, X14, X3, X7, X11, X15, + tmp:=,VTMP0,VTMP1,VTMP2,VTMP3, + POLY1305_BLOCK_PART2(0 * 16), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8(), + POLY1305_BLOCK_PART9(), + POLY1305_BLOCK_PART10(), + POLY1305_BLOCK_PART11(), + POLY1305_BLOCK_PART12(), + POLY1305_BLOCK_PART13(), + POLY1305_BLOCK_PART14(), + POLY1305_BLOCK_PART15(), + POLY1305_BLOCK_PART16(), + POLY1305_BLOCK_PART17(), + POLY1305_BLOCK_PART18(), + POLY1305_BLOCK_PART19(), + POLY1305_BLOCK_PART20(), + POLY1305_BLOCK_PART21(), + POLY1305_BLOCK_PART22(), + POLY1305_BLOCK_PART23(), + POLY1305_BLOCK_PART24(), + POLY1305_BLOCK_PART25(), + POLY1305_BLOCK_PART26(), + POLY1305_BLOCK_PART27(), + POLY1305_BLOCK_PART28(), + POLY1305_BLOCK_PART29(), + POLY1305_BLOCK_PART1(1 * 16)) + POLY1305_BLOCK_PART2(1 * 16) + QUARTERROUND4(X0, X5, X10, X15, X1, X6, X11, X12, + X2, X7, X8, X13, X3, X4, X9, X14, + tmp:=,VTMP0,VTMP1,VTMP2,VTMP3, + _(add POLY_RSRC, POLY_RSRC, #(2*16)), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8(), + POLY1305_BLOCK_PART9(), + POLY1305_BLOCK_PART10(), + POLY1305_BLOCK_PART11(), + POLY1305_BLOCK_PART12(), + POLY1305_BLOCK_PART13(), + POLY1305_BLOCK_PART14(), + POLY1305_BLOCK_PART15(), + POLY1305_BLOCK_PART16(), + POLY1305_BLOCK_PART17(), + POLY1305_BLOCK_PART18(), + POLY1305_BLOCK_PART19(), + POLY1305_BLOCK_PART20(), + POLY1305_BLOCK_PART21(), + POLY1305_BLOCK_PART22(), + POLY1305_BLOCK_PART23(), + POLY1305_BLOCK_PART24(), + POLY1305_BLOCK_PART25(), + POLY1305_BLOCK_PART26(), + POLY1305_BLOCK_PART27(), + POLY1305_BLOCK_PART28(), + POLY1305_BLOCK_PART29(), + _(subs POLY_CHACHA_ROUND, POLY_CHACHA_ROUND, #2)); + b.ne .Lround4_with_poly1305_inner1; + + mov POLY_CHACHA_ROUND, #4; +.Lround4_with_poly1305_inner2: + POLY1305_BLOCK_PART1(0 * 16) + QUARTERROUND4(X0, X4, X8, X12, X1, X5, X9, X13, + X2, X6, X10, X14, X3, X7, X11, X15, + tmp:=,VTMP0,VTMP1,VTMP2,VTMP3,, + POLY1305_BLOCK_PART2(0 * 16),, + _(add POLY_RSRC, POLY_RSRC, #(1*16)),, + POLY1305_BLOCK_PART3(),, + POLY1305_BLOCK_PART4(),, + POLY1305_BLOCK_PART5(),, + POLY1305_BLOCK_PART6(),, + POLY1305_BLOCK_PART7(),, + POLY1305_BLOCK_PART8(),, + POLY1305_BLOCK_PART9(),, + POLY1305_BLOCK_PART10(),, + POLY1305_BLOCK_PART11(),, + POLY1305_BLOCK_PART12(),, + POLY1305_BLOCK_PART13(),, + POLY1305_BLOCK_PART14(),) + POLY1305_BLOCK_PART15() + QUARTERROUND4(X0, X5, X10, X15, X1, X6, X11, X12, + X2, X7, X8, X13, X3, X4, X9, X14, + tmp:=,VTMP0,VTMP1,VTMP2,VTMP3, + POLY1305_BLOCK_PART16(),, + POLY1305_BLOCK_PART17(),, + POLY1305_BLOCK_PART18(),, + POLY1305_BLOCK_PART19(),, + POLY1305_BLOCK_PART20(),, + POLY1305_BLOCK_PART21(),, + POLY1305_BLOCK_PART22(),, + POLY1305_BLOCK_PART23(),, + POLY1305_BLOCK_PART24(),, + POLY1305_BLOCK_PART25(),, + POLY1305_BLOCK_PART26(),, + POLY1305_BLOCK_PART27(),, + POLY1305_BLOCK_PART28(),, + POLY1305_BLOCK_PART29(), + _(subs POLY_CHACHA_ROUND, POLY_CHACHA_ROUND, #2),) + b.ne .Lround4_with_poly1305_inner2; + + subs ROUND, ROUND, #10 + b.ne .Lround4_with_poly1305_outer; + + ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS], #32; + + PLUS(X12, X12_TMP); /* INPUT + 12 * 4 + counter */ + PLUS(X13, X13_TMP); /* INPUT + 13 * 4 + counter */ + + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 0 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 1 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 2 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 3 * 4 */ + PLUS(X0, VTMP2); + PLUS(X1, VTMP3); + PLUS(X2, X12_TMP); + PLUS(X3, X13_TMP); + + dup VTMP2.4s, VTMP1.s[0]; /* INPUT + 4 * 4 */ + dup VTMP3.4s, VTMP1.s[1]; /* INPUT + 5 * 4 */ + dup X12_TMP.4s, VTMP1.s[2]; /* INPUT + 6 * 4 */ + dup X13_TMP.4s, VTMP1.s[3]; /* INPUT + 7 * 4 */ + ld1 {VTMP0.16b, VTMP1.16b}, [INPUT_POS]; + mov INPUT_POS, INPUT; + PLUS(X4, VTMP2); + PLUS(X5, VTMP3); + PLUS(X6, X12_TMP); + PLUS(X7, X13_TMP); + + dup VTMP2.4s, VTMP0.s[0]; /* INPUT + 8 * 4 */ + dup VTMP3.4s, VTMP0.s[1]; /* INPUT + 9 * 4 */ + dup X12_TMP.4s, VTMP0.s[2]; /* INPUT + 10 * 4 */ + dup X13_TMP.4s, VTMP0.s[3]; /* INPUT + 11 * 4 */ + dup VTMP0.4s, VTMP1.s[2]; /* INPUT + 14 * 4 */ + dup VTMP1.4s, VTMP1.s[3]; /* INPUT + 15 * 4 */ + PLUS(X8, VTMP2); + PLUS(X9, VTMP3); + PLUS(X10, X12_TMP); + PLUS(X11, X13_TMP); + PLUS(X14, VTMP0); + PLUS(X15, VTMP1); + + transpose_4x4(X0, X1, X2, X3, VTMP0, VTMP1, VTMP2); + transpose_4x4(X4, X5, X6, X7, VTMP0, VTMP1, VTMP2); + transpose_4x4(X8, X9, X10, X11, VTMP0, VTMP1, VTMP2); + transpose_4x4(X12, X13, X14, X15, VTMP0, VTMP1, VTMP2); + + subs NBLKS, NBLKS, #4; + + ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; + ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32; + eor VTMP0.16b, X0.16b, VTMP0.16b; + eor VTMP1.16b, X4.16b, VTMP1.16b; + eor VTMP2.16b, X8.16b, VTMP2.16b; + eor VTMP3.16b, X12.16b, VTMP3.16b; + eor X12_TMP.16b, X1.16b, X12_TMP.16b; + eor X13_TMP.16b, X5.16b, X13_TMP.16b; + st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; + ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; + st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32; + ld1 {X12_TMP.16b-X13_TMP.16b}, [SRC], #32; + eor VTMP0.16b, X9.16b, VTMP0.16b; + eor VTMP1.16b, X13.16b, VTMP1.16b; + eor VTMP2.16b, X2.16b, VTMP2.16b; + eor VTMP3.16b, X6.16b, VTMP3.16b; + eor X12_TMP.16b, X10.16b, X12_TMP.16b; + eor X13_TMP.16b, X14.16b, X13_TMP.16b; + st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; + ld1 {VTMP0.16b-VTMP3.16b}, [SRC], #64; + st1 {X12_TMP.16b-X13_TMP.16b}, [DST], #32; + eor VTMP0.16b, X3.16b, VTMP0.16b; + eor VTMP1.16b, X7.16b, VTMP1.16b; + eor VTMP2.16b, X11.16b, VTMP2.16b; + eor VTMP3.16b, X15.16b, VTMP3.16b; + st1 {VTMP0.16b-VTMP3.16b}, [DST], #64; + + b.ne .Loop_poly4; + + POLY1305_STORE_STATE() + + /* clear the used vector registers and stack */ + clear(VTMP0); + clear(VTMP1); + clear(VTMP2); + clear(VTMP3); + clear(X12_TMP); + clear(X13_TMP); + clear(X0); + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X8); + clear(X9); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + clear(X14); + clear(X15); + + eor x0, x0, x0 + POLY1305_POP_REGS() + ret + CFI_ENDPROC() +ELF(.size _gcry_chacha20_poly1305_aarch64_blocks4, .-_gcry_chacha20_poly1305_aarch64_blocks4;) + +#endif diff --git a/comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S b/comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S new file mode 100644 index 0000000000..51e107be83 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20-amd64-avx2.S @@ -0,0 +1,601 @@ +/* chacha20-amd64-avx2.S - AVX2 implementation of ChaCha20 cipher + * + * Copyright (C) 2017-2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* + * Based on D. J. Bernstein reference implementation at + * http://cr.yp.to/chacha.html: + * + * chacha-regs.c version 20080118 + * D. J. Bernstein + * Public domain. + */ + +#ifdef __x86_64 +#include +#if defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +.text + +#include "asm-common-amd64.h" +#include "asm-poly1305-amd64.h" + +/* register macros */ +#define INPUT %rdi +#define DST %rsi +#define SRC %rdx +#define NBLKS %rcx +#define ROUND %eax + +/* stack structure */ +#define STACK_VEC_X12 (32) +#define STACK_VEC_X13 (32 + STACK_VEC_X12) +#define STACK_TMP (32 + STACK_VEC_X13) +#define STACK_TMP1 (32 + STACK_TMP) + +#define STACK_MAX (32 + STACK_TMP1) + +/* vector registers */ +#define X0 %ymm0 +#define X1 %ymm1 +#define X2 %ymm2 +#define X3 %ymm3 +#define X4 %ymm4 +#define X5 %ymm5 +#define X6 %ymm6 +#define X7 %ymm7 +#define X8 %ymm8 +#define X9 %ymm9 +#define X10 %ymm10 +#define X11 %ymm11 +#define X12 %ymm12 +#define X13 %ymm13 +#define X14 %ymm14 +#define X15 %ymm15 + +#define X0h %xmm0 +#define X1h %xmm1 +#define X2h %xmm2 +#define X3h %xmm3 +#define X4h %xmm4 +#define X5h %xmm5 +#define X6h %xmm6 +#define X7h %xmm7 +#define X8h %xmm8 +#define X9h %xmm9 +#define X10h %xmm10 +#define X11h %xmm11 +#define X12h %xmm12 +#define X13h %xmm13 +#define X14h %xmm14 +#define X15h %xmm15 + +/********************************************************************** + helper macros + **********************************************************************/ + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4(x0,x1,x2,x3,t1,t2) \ + vpunpckhdq x1, x0, t2; \ + vpunpckldq x1, x0, x0; \ + \ + vpunpckldq x3, x2, t1; \ + vpunpckhdq x3, x2, x2; \ + \ + vpunpckhqdq t1, x0, x1; \ + vpunpcklqdq t1, x0, x0; \ + \ + vpunpckhqdq x2, t2, x3; \ + vpunpcklqdq x2, t2, x2; + +/* 2x2 128-bit matrix transpose */ +#define transpose_16byte_2x2(x0,x1,t1) \ + vmovdqa x0, t1; \ + vperm2i128 $0x20, x1, x0, x0; \ + vperm2i128 $0x31, x1, t1, x1; + +/* xor register with unaligned src and save to unaligned dst */ +#define xor_src_dst(dst, src, offset, xreg) \ + vpxor offset(src), xreg, xreg; \ + vmovdqu xreg, offset(dst); + +/********************************************************************** + 8-way chacha20 + **********************************************************************/ + +#define ROTATE2(v1,v2,c,tmp) \ + vpsrld $(32 - (c)), v1, tmp; \ + vpslld $(c), v1, v1; \ + vpaddb tmp, v1, v1; \ + vpsrld $(32 - (c)), v2, tmp; \ + vpslld $(c), v2, v2; \ + vpaddb tmp, v2, v2; + +#define ROTATE_SHUF_2(v1,v2,shuf) \ + vpshufb shuf, v1, v1; \ + vpshufb shuf, v2, v2; + +#define XOR(ds,s) \ + vpxor s, ds, ds; + +#define PLUS(ds,s) \ + vpaddd s, ds, ds; + +#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,\ + interleave_op1,interleave_op2,\ + interleave_op3,interleave_op4) \ + vbroadcasti128 .Lshuf_rol16 rRIP, tmp1; \ + interleave_op1; \ + PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \ + ROTATE_SHUF_2(d1, d2, tmp1); \ + interleave_op2; \ + PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ + ROTATE2(b1, b2, 12, tmp1); \ + vbroadcasti128 .Lshuf_rol8 rRIP, tmp1; \ + interleave_op3; \ + PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \ + ROTATE_SHUF_2(d1, d2, tmp1); \ + interleave_op4; \ + PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ + ROTATE2(b1, b2, 7, tmp1); + +.align 32 +chacha20_data: +.Lshuf_rol16: + .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +.Lshuf_rol8: + .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +.Linc_counter: + .byte 0,1,2,3,4,5,6,7 +.Lunsigned_cmp: + .long 0x80000000 + +.align 8 +.globl _gcry_chacha20_amd64_avx2_blocks8 +ELF(.type _gcry_chacha20_amd64_avx2_blocks8,@function;) + +_gcry_chacha20_amd64_avx2_blocks8: + /* input: + * %rdi: input + * %rsi: dst + * %rdx: src + * %rcx: nblks (multiple of 8) + */ + CFI_STARTPROC(); + + vzeroupper; + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + subq $STACK_MAX, %rsp; + andq $~31, %rsp; + +.Loop8: + mov $20, ROUND; + + /* Construct counter vectors X12 and X13 */ + vpmovzxbd .Linc_counter rRIP, X0; + vpbroadcastd .Lunsigned_cmp rRIP, X2; + vpbroadcastd (12 * 4)(INPUT), X12; + vpbroadcastd (13 * 4)(INPUT), X13; + vpaddd X0, X12, X12; + vpxor X2, X0, X0; + vpxor X2, X12, X1; + vpcmpgtd X1, X0, X0; + vpsubd X0, X13, X13; + vmovdqa X12, (STACK_VEC_X12)(%rsp); + vmovdqa X13, (STACK_VEC_X13)(%rsp); + + /* Load vectors */ + vpbroadcastd (0 * 4)(INPUT), X0; + vpbroadcastd (1 * 4)(INPUT), X1; + vpbroadcastd (2 * 4)(INPUT), X2; + vpbroadcastd (3 * 4)(INPUT), X3; + vpbroadcastd (4 * 4)(INPUT), X4; + vpbroadcastd (5 * 4)(INPUT), X5; + vpbroadcastd (6 * 4)(INPUT), X6; + vpbroadcastd (7 * 4)(INPUT), X7; + vpbroadcastd (8 * 4)(INPUT), X8; + vpbroadcastd (9 * 4)(INPUT), X9; + vpbroadcastd (10 * 4)(INPUT), X10; + vpbroadcastd (11 * 4)(INPUT), X11; + vpbroadcastd (14 * 4)(INPUT), X14; + vpbroadcastd (15 * 4)(INPUT), X15; + vmovdqa X15, (STACK_TMP)(%rsp); + +.Lround2: + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X15,,,,) + vmovdqa (STACK_TMP)(%rsp), X15; + vmovdqa X8, (STACK_TMP)(%rsp); + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,,,,) + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,,,,) + vmovdqa (STACK_TMP)(%rsp), X8; + vmovdqa X15, (STACK_TMP)(%rsp); + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X15,,,,) + sub $2, ROUND; + jnz .Lround2; + + vmovdqa X8, (STACK_TMP1)(%rsp); + + /* tmp := X15 */ + vpbroadcastd (0 * 4)(INPUT), X15; + PLUS(X0, X15); + vpbroadcastd (1 * 4)(INPUT), X15; + PLUS(X1, X15); + vpbroadcastd (2 * 4)(INPUT), X15; + PLUS(X2, X15); + vpbroadcastd (3 * 4)(INPUT), X15; + PLUS(X3, X15); + vpbroadcastd (4 * 4)(INPUT), X15; + PLUS(X4, X15); + vpbroadcastd (5 * 4)(INPUT), X15; + PLUS(X5, X15); + vpbroadcastd (6 * 4)(INPUT), X15; + PLUS(X6, X15); + vpbroadcastd (7 * 4)(INPUT), X15; + PLUS(X7, X15); + transpose_4x4(X0, X1, X2, X3, X8, X15); + transpose_4x4(X4, X5, X6, X7, X8, X15); + vmovdqa (STACK_TMP1)(%rsp), X8; + transpose_16byte_2x2(X0, X4, X15); + transpose_16byte_2x2(X1, X5, X15); + transpose_16byte_2x2(X2, X6, X15); + transpose_16byte_2x2(X3, X7, X15); + vmovdqa (STACK_TMP)(%rsp), X15; + xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1); + vpbroadcastd (8 * 4)(INPUT), X0; + PLUS(X8, X0); + vpbroadcastd (9 * 4)(INPUT), X0; + PLUS(X9, X0); + vpbroadcastd (10 * 4)(INPUT), X0; + PLUS(X10, X0); + vpbroadcastd (11 * 4)(INPUT), X0; + PLUS(X11, X0); + vmovdqa (STACK_VEC_X12)(%rsp), X0; + PLUS(X12, X0); + vmovdqa (STACK_VEC_X13)(%rsp), X0; + PLUS(X13, X0); + vpbroadcastd (14 * 4)(INPUT), X0; + PLUS(X14, X0); + vpbroadcastd (15 * 4)(INPUT), X0; + PLUS(X15, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3); + + /* Update counter */ + addq $8, (12 * 4)(INPUT); + + transpose_4x4(X8, X9, X10, X11, X0, X1); + transpose_4x4(X12, X13, X14, X15, X0, X1); + xor_src_dst(DST, SRC, (64 * 4 + 16 * 0), X4); + xor_src_dst(DST, SRC, (64 * 5 + 16 * 0), X5); + transpose_16byte_2x2(X8, X12, X0); + transpose_16byte_2x2(X9, X13, X0); + transpose_16byte_2x2(X10, X14, X0); + transpose_16byte_2x2(X11, X15, X0); + xor_src_dst(DST, SRC, (64 * 6 + 16 * 0), X6); + xor_src_dst(DST, SRC, (64 * 7 + 16 * 0), X7); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11); + xor_src_dst(DST, SRC, (64 * 4 + 16 * 2), X12); + xor_src_dst(DST, SRC, (64 * 5 + 16 * 2), X13); + xor_src_dst(DST, SRC, (64 * 6 + 16 * 2), X14); + xor_src_dst(DST, SRC, (64 * 7 + 16 * 2), X15); + + sub $8, NBLKS; + lea (8 * 64)(DST), DST; + lea (8 * 64)(SRC), SRC; + jnz .Loop8; + + /* clear the used vector registers and stack */ + vpxor X0, X0, X0; + vmovdqa X0, (STACK_VEC_X12)(%rsp); + vmovdqa X0, (STACK_VEC_X13)(%rsp); + vmovdqa X0, (STACK_TMP)(%rsp); + vmovdqa X0, (STACK_TMP1)(%rsp); + vzeroall; + + /* eax zeroed by round loop. */ + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_amd64_avx2_blocks8, + .-_gcry_chacha20_amd64_avx2_blocks8;) + +/********************************************************************** + 8-way stitched chacha20-poly1305 + **********************************************************************/ + +#define _ /*_*/ + +.align 8 +.globl _gcry_chacha20_poly1305_amd64_avx2_blocks8 +ELF(.type _gcry_chacha20_poly1305_amd64_avx2_blocks8,@function;) + +_gcry_chacha20_poly1305_amd64_avx2_blocks8: + /* input: + * %rdi: input + * %rsi: dst + * %rdx: src + * %rcx: nblks (multiple of 8) + * %r9: poly1305-state + * %r8: poly1305-src + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + vzeroupper; + + subq $(9 * 8) + STACK_MAX + 32, %rsp; + andq $~31, %rsp; + + movq %rbx, (STACK_MAX + 0 * 8)(%rsp); + movq %r12, (STACK_MAX + 1 * 8)(%rsp); + movq %r13, (STACK_MAX + 2 * 8)(%rsp); + movq %r14, (STACK_MAX + 3 * 8)(%rsp); + movq %r15, (STACK_MAX + 4 * 8)(%rsp); + CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8); + CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8); + CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8); + CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8); + CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8); + + movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC + movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST + movq %rcx, (STACK_MAX + 7 * 8)(%rsp); # NBLKS + + /* Load state */ + POLY1305_LOAD_STATE(); + +.Loop_poly8: + + /* Construct counter vectors X12 and X13 */ + vpmovzxbd .Linc_counter rRIP, X0; + vpbroadcastd .Lunsigned_cmp rRIP, X2; + vpbroadcastd (12 * 4)(INPUT), X12; + vpbroadcastd (13 * 4)(INPUT), X13; + vpaddd X0, X12, X12; + vpxor X2, X0, X0; + vpxor X2, X12, X1; + vpcmpgtd X1, X0, X0; + vpsubd X0, X13, X13; + vmovdqa X12, (STACK_VEC_X12)(%rsp); + vmovdqa X13, (STACK_VEC_X13)(%rsp); + + /* Load vectors */ + vpbroadcastd (0 * 4)(INPUT), X0; + vpbroadcastd (1 * 4)(INPUT), X1; + vpbroadcastd (2 * 4)(INPUT), X2; + vpbroadcastd (3 * 4)(INPUT), X3; + vpbroadcastd (4 * 4)(INPUT), X4; + vpbroadcastd (5 * 4)(INPUT), X5; + vpbroadcastd (6 * 4)(INPUT), X6; + vpbroadcastd (7 * 4)(INPUT), X7; + vpbroadcastd (8 * 4)(INPUT), X8; + vpbroadcastd (9 * 4)(INPUT), X9; + vpbroadcastd (10 * 4)(INPUT), X10; + vpbroadcastd (11 * 4)(INPUT), X11; + vpbroadcastd (14 * 4)(INPUT), X14; + vpbroadcastd (15 * 4)(INPUT), X15; + vmovdqa X15, (STACK_TMP)(%rsp); + + /* Process eight ChaCha20 blocks and 32 Poly1305 blocks. */ + + movl $20, (STACK_MAX + 8 * 8 + 4)(%rsp); +.Lround8_with_poly1305_outer: + movl $6, (STACK_MAX + 8 * 8)(%rsp); +.Lround8_with_poly1305_inner1: + /* rounds 0-5 & 10-15 */ + POLY1305_BLOCK_PART1(0 * 16) + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X15, + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5()) + vmovdqa (STACK_TMP)(%rsp), X15; + vmovdqa X8, (STACK_TMP)(%rsp); + POLY1305_BLOCK_PART1(1 * 16) + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8, + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5()) + POLY1305_BLOCK_PART1(2 * 16) + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8, + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5()) + vmovdqa (STACK_TMP)(%rsp), X8; + vmovdqa X15, (STACK_TMP)(%rsp); + POLY1305_BLOCK_PART1(3 * 16) + lea (4 * 16)(POLY_RSRC), POLY_RSRC; + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X15, + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5()) + + subl $2, (STACK_MAX + 8 * 8)(%rsp); + jnz .Lround8_with_poly1305_inner1; + + movl $4, (STACK_MAX + 8 * 8)(%rsp); +.Lround8_with_poly1305_inner2: + /* rounds 6-9 & 16-19 */ + POLY1305_BLOCK_PART1(0 * 16) + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X15, + POLY1305_BLOCK_PART2(), + _, + POLY1305_BLOCK_PART3(), + _) + vmovdqa (STACK_TMP)(%rsp), X15; + vmovdqa X8, (STACK_TMP)(%rsp); + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8, + _, + POLY1305_BLOCK_PART4(), + _, + POLY1305_BLOCK_PART5()) + POLY1305_BLOCK_PART1(1 * 16); + lea (2 * 16)(POLY_RSRC), POLY_RSRC; + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8, + _, + POLY1305_BLOCK_PART2(), + _, + POLY1305_BLOCK_PART3()) + vmovdqa (STACK_TMP)(%rsp), X8; + vmovdqa X15, (STACK_TMP)(%rsp); + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X15, + POLY1305_BLOCK_PART4(), + _, + POLY1305_BLOCK_PART5(), + _) + + subl $2, (STACK_MAX + 8 * 8)(%rsp); + jnz .Lround8_with_poly1305_inner2; + + subl $10, (STACK_MAX + 8 * 8 + 4)(%rsp); + jnz .Lround8_with_poly1305_outer; + + movq (STACK_MAX + 5 * 8)(%rsp), SRC; + movq (STACK_MAX + 6 * 8)(%rsp), DST; + + vmovdqa X8, (STACK_TMP1)(%rsp); + + /* tmp := X15 */ + vpbroadcastd (0 * 4)(INPUT), X15; + PLUS(X0, X15); + vpbroadcastd (1 * 4)(INPUT), X15; + PLUS(X1, X15); + vpbroadcastd (2 * 4)(INPUT), X15; + PLUS(X2, X15); + vpbroadcastd (3 * 4)(INPUT), X15; + PLUS(X3, X15); + vpbroadcastd (4 * 4)(INPUT), X15; + PLUS(X4, X15); + vpbroadcastd (5 * 4)(INPUT), X15; + PLUS(X5, X15); + vpbroadcastd (6 * 4)(INPUT), X15; + PLUS(X6, X15); + vpbroadcastd (7 * 4)(INPUT), X15; + PLUS(X7, X15); + transpose_4x4(X0, X1, X2, X3, X8, X15); + transpose_4x4(X4, X5, X6, X7, X8, X15); + vmovdqa (STACK_TMP1)(%rsp), X8; + transpose_16byte_2x2(X0, X4, X15); + transpose_16byte_2x2(X1, X5, X15); + transpose_16byte_2x2(X2, X6, X15); + transpose_16byte_2x2(X3, X7, X15); + vmovdqa (STACK_TMP)(%rsp), X15; + xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1); + vpbroadcastd (8 * 4)(INPUT), X0; + PLUS(X8, X0); + vpbroadcastd (9 * 4)(INPUT), X0; + PLUS(X9, X0); + vpbroadcastd (10 * 4)(INPUT), X0; + PLUS(X10, X0); + vpbroadcastd (11 * 4)(INPUT), X0; + PLUS(X11, X0); + vmovdqa (STACK_VEC_X12)(%rsp), X0; + PLUS(X12, X0); + vmovdqa (STACK_VEC_X13)(%rsp), X0; + PLUS(X13, X0); + vpbroadcastd (14 * 4)(INPUT), X0; + PLUS(X14, X0); + vpbroadcastd (15 * 4)(INPUT), X0; + PLUS(X15, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3); + + /* Update counter */ + addq $8, (12 * 4)(INPUT); + + transpose_4x4(X8, X9, X10, X11, X0, X1); + transpose_4x4(X12, X13, X14, X15, X0, X1); + xor_src_dst(DST, SRC, (64 * 4 + 16 * 0), X4); + xor_src_dst(DST, SRC, (64 * 5 + 16 * 0), X5); + transpose_16byte_2x2(X8, X12, X0); + transpose_16byte_2x2(X9, X13, X0); + transpose_16byte_2x2(X10, X14, X0); + transpose_16byte_2x2(X11, X15, X0); + xor_src_dst(DST, SRC, (64 * 6 + 16 * 0), X6); + xor_src_dst(DST, SRC, (64 * 7 + 16 * 0), X7); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11); + xor_src_dst(DST, SRC, (64 * 4 + 16 * 2), X12); + xor_src_dst(DST, SRC, (64 * 5 + 16 * 2), X13); + xor_src_dst(DST, SRC, (64 * 6 + 16 * 2), X14); + xor_src_dst(DST, SRC, (64 * 7 + 16 * 2), X15); + + subq $8, (STACK_MAX + 7 * 8)(%rsp); # NBLKS + + lea (8 * 64)(DST), DST; + lea (8 * 64)(SRC), SRC; + movq SRC, (STACK_MAX + 5 * 8)(%rsp); + movq DST, (STACK_MAX + 6 * 8)(%rsp); + + jnz .Loop_poly8; + + /* Store state */ + POLY1305_STORE_STATE(); + + /* clear the used vector registers and stack */ + vpxor X0, X0, X0; + vmovdqa X0, (STACK_VEC_X12)(%rsp); + vmovdqa X0, (STACK_VEC_X13)(%rsp); + vmovdqa X0, (STACK_TMP)(%rsp); + vmovdqa X0, (STACK_TMP1)(%rsp); + vzeroall; + + movq (STACK_MAX + 0 * 8)(%rsp), %rbx; + movq (STACK_MAX + 1 * 8)(%rsp), %r12; + movq (STACK_MAX + 2 * 8)(%rsp), %r13; + movq (STACK_MAX + 3 * 8)(%rsp), %r14; + movq (STACK_MAX + 4 * 8)(%rsp), %r15; + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); + + xorl %eax, %eax; + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_poly1305_amd64_avx2_blocks8, + .-_gcry_chacha20_poly1305_amd64_avx2_blocks8;) + +#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S b/comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S new file mode 100644 index 0000000000..9cdb69ae6d --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20-amd64-ssse3.S @@ -0,0 +1,1012 @@ +/* chacha20-amd64-ssse3.S - SSSE3 implementation of ChaCha20 cipher + * + * Copyright (C) 2017-2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* + * Based on D. J. Bernstein reference implementation at + * http://cr.yp.to/chacha.html: + * + * chacha-regs.c version 20080118 + * D. J. Bernstein + * Public domain. + */ + +#ifdef __x86_64 +#include +#if defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +.text + +#include "asm-common-amd64.h" +#include "asm-poly1305-amd64.h" + +/* register macros */ +#define INPUT %rdi +#define DST %rsi +#define SRC %rdx +#define NBLKS %rcx +#define ROUND %eax + +/* stack structure */ +#define STACK_VEC_X12 (16) +#define STACK_VEC_X13 (16 + STACK_VEC_X12) +#define STACK_TMP (16 + STACK_VEC_X13) +#define STACK_TMP1 (16 + STACK_TMP) +#define STACK_TMP2 (16 + STACK_TMP1) + +#define STACK_MAX (16 + STACK_TMP2) + +/* vector registers */ +#define X0 %xmm0 +#define X1 %xmm1 +#define X2 %xmm2 +#define X3 %xmm3 +#define X4 %xmm4 +#define X5 %xmm5 +#define X6 %xmm6 +#define X7 %xmm7 +#define X8 %xmm8 +#define X9 %xmm9 +#define X10 %xmm10 +#define X11 %xmm11 +#define X12 %xmm12 +#define X13 %xmm13 +#define X14 %xmm14 +#define X15 %xmm15 + +/********************************************************************** + helper macros + **********************************************************************/ + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4(x0, x1, x2, x3, t1, t2, t3) \ + movdqa x0, t2; \ + punpckhdq x1, t2; \ + punpckldq x1, x0; \ + \ + movdqa x2, t1; \ + punpckldq x3, t1; \ + punpckhdq x3, x2; \ + \ + movdqa x0, x1; \ + punpckhqdq t1, x1; \ + punpcklqdq t1, x0; \ + \ + movdqa t2, x3; \ + punpckhqdq x2, x3; \ + punpcklqdq x2, t2; \ + movdqa t2, x2; + +/* fill xmm register with 32-bit value from memory */ +#define pbroadcastd(mem32, xreg) \ + movd mem32, xreg; \ + pshufd $0, xreg, xreg; + +/* xor with unaligned memory operand */ +#define pxor_u(umem128, xreg, t) \ + movdqu umem128, t; \ + pxor t, xreg; + +/* xor register with unaligned src and save to unaligned dst */ +#define xor_src_dst(dst, src, offset, xreg, t) \ + pxor_u(offset(src), xreg, t); \ + movdqu xreg, offset(dst); + +#define clear(x) pxor x,x; + +/********************************************************************** + 4-way chacha20 + **********************************************************************/ + +#define ROTATE2(v1,v2,c,tmp1,tmp2) \ + movdqa v1, tmp1; \ + movdqa v2, tmp2; \ + psrld $(32 - (c)), v1; \ + pslld $(c), tmp1; \ + paddb tmp1, v1; \ + psrld $(32 - (c)), v2; \ + pslld $(c), tmp2; \ + paddb tmp2, v2; + +#define ROTATE_SHUF_2(v1,v2,shuf) \ + pshufb shuf, v1; \ + pshufb shuf, v2; + +#define XOR(ds,s) \ + pxor s, ds; + +#define PLUS(ds,s) \ + paddd s, ds; + +#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2,\ + interleave_op1,interleave_op2) \ + movdqa .Lshuf_rol16 rRIP, tmp1; \ + interleave_op1; \ + PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \ + ROTATE_SHUF_2(d1, d2, tmp1); \ + PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ + ROTATE2(b1, b2, 12, tmp1, tmp2); \ + movdqa .Lshuf_rol8 rRIP, tmp1; \ + interleave_op2; \ + PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \ + ROTATE_SHUF_2(d1, d2, tmp1); \ + PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ + ROTATE2(b1, b2, 7, tmp1, tmp2); + +chacha20_data: +.align 16 +.Lshuf_rol16: + .byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 +.Lshuf_rol8: + .byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 +.Lcounter1: + .long 1,0,0,0 +.Linc_counter: + .long 0,1,2,3 +.Lunsigned_cmp: + .long 0x80000000,0x80000000,0x80000000,0x80000000 + +.align 8 +.globl _gcry_chacha20_amd64_ssse3_blocks4 +ELF(.type _gcry_chacha20_amd64_ssse3_blocks4,@function;) + +_gcry_chacha20_amd64_ssse3_blocks4: + /* input: + * %rdi: input + * %rsi: dst + * %rdx: src + * %rcx: nblks (multiple of 4) + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + subq $STACK_MAX, %rsp; + andq $~15, %rsp; + +.Loop4: + mov $20, ROUND; + + /* Construct counter vectors X12 and X13 */ + movdqa .Linc_counter rRIP, X0; + movdqa .Lunsigned_cmp rRIP, X2; + pbroadcastd((12 * 4)(INPUT), X12); + pbroadcastd((13 * 4)(INPUT), X13); + paddd X0, X12; + movdqa X12, X1; + pxor X2, X0; + pxor X2, X1; + pcmpgtd X1, X0; + psubd X0, X13; + movdqa X12, (STACK_VEC_X12)(%rsp); + movdqa X13, (STACK_VEC_X13)(%rsp); + + /* Load vectors */ + pbroadcastd((0 * 4)(INPUT), X0); + pbroadcastd((1 * 4)(INPUT), X1); + pbroadcastd((2 * 4)(INPUT), X2); + pbroadcastd((3 * 4)(INPUT), X3); + pbroadcastd((4 * 4)(INPUT), X4); + pbroadcastd((5 * 4)(INPUT), X5); + pbroadcastd((6 * 4)(INPUT), X6); + pbroadcastd((7 * 4)(INPUT), X7); + pbroadcastd((8 * 4)(INPUT), X8); + pbroadcastd((9 * 4)(INPUT), X9); + pbroadcastd((10 * 4)(INPUT), X10); + pbroadcastd((11 * 4)(INPUT), X11); + pbroadcastd((14 * 4)(INPUT), X14); + pbroadcastd((15 * 4)(INPUT), X15); + movdqa X11, (STACK_TMP)(%rsp); + movdqa X15, (STACK_TMP1)(%rsp); + +.Lround2_4: + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15,,) + movdqa (STACK_TMP)(%rsp), X11; + movdqa (STACK_TMP1)(%rsp), X15; + movdqa X8, (STACK_TMP)(%rsp); + movdqa X9, (STACK_TMP1)(%rsp); + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9,,) + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9,,) + movdqa (STACK_TMP)(%rsp), X8; + movdqa (STACK_TMP1)(%rsp), X9; + movdqa X11, (STACK_TMP)(%rsp); + movdqa X15, (STACK_TMP1)(%rsp); + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15,,) + sub $2, ROUND; + jnz .Lround2_4; + + /* tmp := X15 */ + movdqa (STACK_TMP)(%rsp), X11; + pbroadcastd((0 * 4)(INPUT), X15); + PLUS(X0, X15); + pbroadcastd((1 * 4)(INPUT), X15); + PLUS(X1, X15); + pbroadcastd((2 * 4)(INPUT), X15); + PLUS(X2, X15); + pbroadcastd((3 * 4)(INPUT), X15); + PLUS(X3, X15); + pbroadcastd((4 * 4)(INPUT), X15); + PLUS(X4, X15); + pbroadcastd((5 * 4)(INPUT), X15); + PLUS(X5, X15); + pbroadcastd((6 * 4)(INPUT), X15); + PLUS(X6, X15); + pbroadcastd((7 * 4)(INPUT), X15); + PLUS(X7, X15); + pbroadcastd((8 * 4)(INPUT), X15); + PLUS(X8, X15); + pbroadcastd((9 * 4)(INPUT), X15); + PLUS(X9, X15); + pbroadcastd((10 * 4)(INPUT), X15); + PLUS(X10, X15); + pbroadcastd((11 * 4)(INPUT), X15); + PLUS(X11, X15); + movdqa (STACK_VEC_X12)(%rsp), X15; + PLUS(X12, X15); + movdqa (STACK_VEC_X13)(%rsp), X15; + PLUS(X13, X15); + movdqa X13, (STACK_TMP)(%rsp); + pbroadcastd((14 * 4)(INPUT), X15); + PLUS(X14, X15); + movdqa (STACK_TMP1)(%rsp), X15; + movdqa X14, (STACK_TMP1)(%rsp); + pbroadcastd((15 * 4)(INPUT), X13); + PLUS(X15, X13); + movdqa X15, (STACK_TMP2)(%rsp); + + /* Update counter */ + addq $4, (12 * 4)(INPUT); + + transpose_4x4(X0, X1, X2, X3, X13, X14, X15); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0, X15); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1, X15); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2, X15); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3, X15); + transpose_4x4(X4, X5, X6, X7, X0, X1, X2); + movdqa (STACK_TMP)(%rsp), X13; + movdqa (STACK_TMP1)(%rsp), X14; + movdqa (STACK_TMP2)(%rsp), X15; + xor_src_dst(DST, SRC, (64 * 0 + 16 * 1), X4, X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 1), X5, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 1), X6, X0); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 1), X7, X0); + transpose_4x4(X8, X9, X10, X11, X0, X1, X2); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8, X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10, X0); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11, X0); + transpose_4x4(X12, X13, X14, X15, X0, X1, X2); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 3), X12, X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 3), X13, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 3), X14, X0); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 3), X15, X0); + + sub $4, NBLKS; + lea (4 * 64)(DST), DST; + lea (4 * 64)(SRC), SRC; + jnz .Loop4; + + /* clear the used vector registers and stack */ + clear(X0); + movdqa X0, (STACK_VEC_X12)(%rsp); + movdqa X0, (STACK_VEC_X13)(%rsp); + movdqa X0, (STACK_TMP)(%rsp); + movdqa X0, (STACK_TMP1)(%rsp); + movdqa X0, (STACK_TMP2)(%rsp); + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X8); + clear(X9); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + clear(X14); + clear(X15); + + /* eax zeroed by round loop. */ + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_amd64_ssse3_blocks4, + .-_gcry_chacha20_amd64_ssse3_blocks4;) + +/********************************************************************** + 2-way && 1-way chacha20 + **********************************************************************/ + +#define ROTATE_SHUF(v1,shuf) \ + pshufb shuf, v1; + +#define ROTATE(v1,c,tmp1) \ + movdqa v1, tmp1; \ + psrld $(32 - (c)), v1; \ + pslld $(c), tmp1; \ + paddb tmp1, v1; + +#define WORD_SHUF(v1,shuf) \ + pshufd $shuf, v1, v1; + +#define QUARTERROUND4(x0,x1,x2,x3,shuf_rol8,shuf_rol16,tmp1,shuf_x1,\ + shuf_x2,shuf_x3) \ + PLUS(x0, x1); XOR(x3, x0); ROTATE_SHUF(x3, shuf_rol16); \ + PLUS(x2, x3); XOR(x1, x2); ROTATE(x1, 12, tmp1); \ + PLUS(x0, x1); XOR(x3, x0); ROTATE_SHUF(x3, shuf_rol8); \ + PLUS(x2, x3); \ + WORD_SHUF(x3, shuf_x3); \ + XOR(x1, x2); \ + WORD_SHUF(x2, shuf_x2); \ + ROTATE(x1, 7, tmp1); \ + WORD_SHUF(x1, shuf_x1); + +.align 8 +.globl _gcry_chacha20_amd64_ssse3_blocks1 +ELF(.type _gcry_chacha20_amd64_ssse3_blocks1,@function;) + +_gcry_chacha20_amd64_ssse3_blocks1: + /* input: + * %rdi: input + * %rsi: dst + * %rdx: src + * %rcx: nblks + */ + CFI_STARTPROC(); + + /* Load constants */ + movdqa .Lcounter1 rRIP, X4; + movdqa .Lshuf_rol8 rRIP, X5; + movdqa .Lshuf_rol16 rRIP, X6; + + /* Load state */ + movdqu (0 * 4)(INPUT), X10; + movdqu (4 * 4)(INPUT), X11; + movdqu (8 * 4)(INPUT), X12; + movdqu (12 * 4)(INPUT), X13; + + cmp $2, NBLKS; + jb .Loop1; + + mov $20, ROUND; + + movdqa X10, X0; + movdqa X11, X1; + movdqa X12, X2; + movdqa X13, X3; + + movdqa X10, X8; + movdqa X11, X9; + movdqa X12, X14; + movdqa X13, X15; + paddq X4, X15; + +.Lround2_2: + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39); + sub $2, ROUND; + jnz .Lround2_2; + + PLUS(X0, X10); + PLUS(X1, X11); + PLUS(X2, X12); + PLUS(X3, X13); + + /* Update counter */ + paddq X4, X13; + + PLUS(X8, X10); + PLUS(X9, X11); + PLUS(X14, X12); + PLUS(X15, X13); + + /* Update counter */ + paddq X4, X13; + + xor_src_dst(DST, SRC, 0 * 4, X0, X7); + xor_src_dst(DST, SRC, 4 * 4, X1, X7); + xor_src_dst(DST, SRC, 8 * 4, X2, X7); + xor_src_dst(DST, SRC, 12 * 4, X3, X7); + xor_src_dst(DST, SRC, 16 * 4, X8, X7); + xor_src_dst(DST, SRC, 20 * 4, X9, X7); + xor_src_dst(DST, SRC, 24 * 4, X14, X7); + xor_src_dst(DST, SRC, 28 * 4, X15, X7); + + lea (2 * 64)(DST), DST; + lea (2 * 64)(SRC), SRC; + + clear(X8); + clear(X9); + clear(X14); + clear(X15); + + sub $2, NBLKS; + jz .Ldone1; + +.Loop1: + mov $20, ROUND; + + movdqa X10, X0; + movdqa X11, X1; + movdqa X12, X2; + movdqa X13, X3; + +.Lround2_1: + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + sub $2, ROUND; + jnz .Lround2_1; + + PLUS(X0, X10); + PLUS(X1, X11); + PLUS(X2, X12); + PLUS(X3, X13); + + /* Update counter */ + paddq X4, X13; + + xor_src_dst(DST, SRC, 0 * 4, X0, X7); + xor_src_dst(DST, SRC, 4 * 4, X1, X7); + xor_src_dst(DST, SRC, 8 * 4, X2, X7); + xor_src_dst(DST, SRC, 12 * 4, X3, X7); + + lea (64)(DST), DST; + lea (64)(SRC), SRC; + + sub $1, NBLKS; + jnz .Loop1; + +.Ldone1: + /* Store counter */ + movdqu X13, (12 * 4)(INPUT); + + /* clear the used vector registers */ + clear(X0); + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + + /* eax zeroed by round loop. */ + ret; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_amd64_ssse3_blocks1, + .-_gcry_chacha20_amd64_ssse3_blocks1;) + +/********************************************************************** + 4-way stitched chacha20-poly1305 + **********************************************************************/ + +#define _ /*_*/ + +.align 8 +.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks4 +ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks4,@function;) + +_gcry_chacha20_poly1305_amd64_ssse3_blocks4: + /* input: + * %rdi: input + * %rsi: dst + * %rdx: src + * %rcx: nblks (multiple of 4) + * %r9: poly1305-state + * %r8: poly1305-src + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + subq $(9 * 8) + STACK_MAX + 16, %rsp; + andq $~15, %rsp; + + movq %rbx, (STACK_MAX + 0 * 8)(%rsp); + movq %r12, (STACK_MAX + 1 * 8)(%rsp); + movq %r13, (STACK_MAX + 2 * 8)(%rsp); + movq %r14, (STACK_MAX + 3 * 8)(%rsp); + movq %r15, (STACK_MAX + 4 * 8)(%rsp); + CFI_REG_ON_STACK(rbx, STACK_MAX + 0 * 8); + CFI_REG_ON_STACK(r12, STACK_MAX + 1 * 8); + CFI_REG_ON_STACK(r13, STACK_MAX + 2 * 8); + CFI_REG_ON_STACK(r14, STACK_MAX + 3 * 8); + CFI_REG_ON_STACK(r15, STACK_MAX + 4 * 8); + + movq %rdx, (STACK_MAX + 5 * 8)(%rsp); # SRC + movq %rsi, (STACK_MAX + 6 * 8)(%rsp); # DST + movq %rcx, (STACK_MAX + 7 * 8)(%rsp); # NBLKS + + /* Load state */ + POLY1305_LOAD_STATE(); + +.Loop_poly4: + + /* Construct counter vectors X12 and X13 */ + movdqa .Linc_counter rRIP, X0; + movdqa .Lunsigned_cmp rRIP, X2; + pbroadcastd((12 * 4)(INPUT), X12); + pbroadcastd((13 * 4)(INPUT), X13); + paddd X0, X12; + movdqa X12, X1; + pxor X2, X0; + pxor X2, X1; + pcmpgtd X1, X0; + psubd X0, X13; + movdqa X12, (STACK_VEC_X12)(%rsp); + movdqa X13, (STACK_VEC_X13)(%rsp); + + /* Load vectors */ + pbroadcastd((0 * 4)(INPUT), X0); + pbroadcastd((1 * 4)(INPUT), X1); + pbroadcastd((2 * 4)(INPUT), X2); + pbroadcastd((3 * 4)(INPUT), X3); + pbroadcastd((4 * 4)(INPUT), X4); + pbroadcastd((5 * 4)(INPUT), X5); + pbroadcastd((6 * 4)(INPUT), X6); + pbroadcastd((7 * 4)(INPUT), X7); + pbroadcastd((8 * 4)(INPUT), X8); + pbroadcastd((9 * 4)(INPUT), X9); + pbroadcastd((10 * 4)(INPUT), X10); + pbroadcastd((11 * 4)(INPUT), X11); + pbroadcastd((14 * 4)(INPUT), X14); + pbroadcastd((15 * 4)(INPUT), X15); + movdqa X11, (STACK_TMP)(%rsp); + movdqa X15, (STACK_TMP1)(%rsp); + + /* Process four ChaCha20 blocks and sixteen Poly1305 blocks. */ + + movl $20, (STACK_MAX + 8 * 8 + 4)(%rsp); +.Lround4_with_poly1305_outer: + movl $6, (STACK_MAX + 8 * 8)(%rsp); +.Lround4_with_poly1305_inner1: + /* rounds 0-5 & 10-15 */ + POLY1305_BLOCK_PART1(0 * 16) + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15, + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3()) + movdqa (STACK_TMP)(%rsp), X11; + movdqa (STACK_TMP1)(%rsp), X15; + movdqa X8, (STACK_TMP)(%rsp); + movdqa X9, (STACK_TMP1)(%rsp); + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9, + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5()) + POLY1305_BLOCK_PART1(1 * 16) + lea (2 * 16)(POLY_RSRC), POLY_RSRC; + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9, + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3()) + movdqa (STACK_TMP)(%rsp), X8; + movdqa (STACK_TMP1)(%rsp), X9; + movdqa X11, (STACK_TMP)(%rsp); + movdqa X15, (STACK_TMP1)(%rsp); + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15, + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5()) + + subl $2, (STACK_MAX + 8 * 8)(%rsp); + jnz .Lround4_with_poly1305_inner1; + + movl $4, (STACK_MAX + 8 * 8)(%rsp); +.Lround4_with_poly1305_inner2: + /* rounds 6-9 & 16-19 */ + POLY1305_BLOCK_PART1(0 * 16) + lea (1 * 16)(POLY_RSRC), POLY_RSRC; + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15, + POLY1305_BLOCK_PART2(), + _) + movdqa (STACK_TMP)(%rsp), X11; + movdqa (STACK_TMP1)(%rsp), X15; + movdqa X8, (STACK_TMP)(%rsp); + movdqa X9, (STACK_TMP1)(%rsp); + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9, + POLY1305_BLOCK_PART3(), + _) + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9, + POLY1305_BLOCK_PART4(), + _) + movdqa (STACK_TMP)(%rsp), X8; + movdqa (STACK_TMP1)(%rsp), X9; + movdqa X11, (STACK_TMP)(%rsp); + movdqa X15, (STACK_TMP1)(%rsp); + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15, + POLY1305_BLOCK_PART5(), + _) + + subl $2, (STACK_MAX + 8 * 8)(%rsp); + jnz .Lround4_with_poly1305_inner2; + + subl $10, (STACK_MAX + 8 * 8 + 4)(%rsp); + jnz .Lround4_with_poly1305_outer; + + /* tmp := X15 */ + movdqa (STACK_TMP)(%rsp), X11; + pbroadcastd((0 * 4)(INPUT), X15); + PLUS(X0, X15); + pbroadcastd((1 * 4)(INPUT), X15); + PLUS(X1, X15); + pbroadcastd((2 * 4)(INPUT), X15); + PLUS(X2, X15); + pbroadcastd((3 * 4)(INPUT), X15); + PLUS(X3, X15); + pbroadcastd((4 * 4)(INPUT), X15); + PLUS(X4, X15); + pbroadcastd((5 * 4)(INPUT), X15); + PLUS(X5, X15); + pbroadcastd((6 * 4)(INPUT), X15); + PLUS(X6, X15); + pbroadcastd((7 * 4)(INPUT), X15); + PLUS(X7, X15); + pbroadcastd((8 * 4)(INPUT), X15); + PLUS(X8, X15); + pbroadcastd((9 * 4)(INPUT), X15); + PLUS(X9, X15); + pbroadcastd((10 * 4)(INPUT), X15); + PLUS(X10, X15); + pbroadcastd((11 * 4)(INPUT), X15); + PLUS(X11, X15); + movdqa (STACK_VEC_X12)(%rsp), X15; + PLUS(X12, X15); + movdqa (STACK_VEC_X13)(%rsp), X15; + PLUS(X13, X15); + movdqa X13, (STACK_TMP)(%rsp); + pbroadcastd((14 * 4)(INPUT), X15); + PLUS(X14, X15); + movdqa (STACK_TMP1)(%rsp), X15; + movdqa X14, (STACK_TMP1)(%rsp); + pbroadcastd((15 * 4)(INPUT), X13); + PLUS(X15, X13); + movdqa X15, (STACK_TMP2)(%rsp); + + /* Update counter */ + addq $4, (12 * 4)(INPUT); + + movq (STACK_MAX + 5 * 8)(%rsp), SRC; + movq (STACK_MAX + 6 * 8)(%rsp), DST; + + transpose_4x4(X0, X1, X2, X3, X13, X14, X15); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 0), X0, X15); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 0), X1, X15); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 0), X2, X15); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 0), X3, X15); + transpose_4x4(X4, X5, X6, X7, X0, X1, X2); + movdqa (STACK_TMP)(%rsp), X13; + movdqa (STACK_TMP1)(%rsp), X14; + movdqa (STACK_TMP2)(%rsp), X15; + xor_src_dst(DST, SRC, (64 * 0 + 16 * 1), X4, X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 1), X5, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 1), X6, X0); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 1), X7, X0); + transpose_4x4(X8, X9, X10, X11, X0, X1, X2); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 2), X8, X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 2), X9, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 2), X10, X0); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 2), X11, X0); + transpose_4x4(X12, X13, X14, X15, X0, X1, X2); + xor_src_dst(DST, SRC, (64 * 0 + 16 * 3), X12, X0); + xor_src_dst(DST, SRC, (64 * 1 + 16 * 3), X13, X0); + xor_src_dst(DST, SRC, (64 * 2 + 16 * 3), X14, X0); + xor_src_dst(DST, SRC, (64 * 3 + 16 * 3), X15, X0); + + subq $4, (STACK_MAX + 7 * 8)(%rsp); # NBLKS + + lea (4 * 64)(DST), DST; + lea (4 * 64)(SRC), SRC; + movq SRC, (STACK_MAX + 5 * 8)(%rsp); + movq DST, (STACK_MAX + 6 * 8)(%rsp); + + jnz .Loop_poly4; + + /* Store state */ + POLY1305_STORE_STATE(); + + /* clear the used vector registers and stack */ + clear(X0); + movdqa X0, (STACK_VEC_X12)(%rsp); + movdqa X0, (STACK_VEC_X13)(%rsp); + movdqa X0, (STACK_TMP)(%rsp); + movdqa X0, (STACK_TMP1)(%rsp); + movdqa X0, (STACK_TMP2)(%rsp); + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X8); + clear(X9); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + clear(X14); + clear(X15); + + movq (STACK_MAX + 0 * 8)(%rsp), %rbx; + movq (STACK_MAX + 1 * 8)(%rsp), %r12; + movq (STACK_MAX + 2 * 8)(%rsp), %r13; + movq (STACK_MAX + 3 * 8)(%rsp), %r14; + movq (STACK_MAX + 4 * 8)(%rsp), %r15; + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); + + xorl %eax, %eax; + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks4, + .-_gcry_chacha20_poly1305_amd64_ssse3_blocks4;) + +/********************************************************************** + 2-way && 1-way stitched chacha20-poly1305 + **********************************************************************/ + +.align 8 +.globl _gcry_chacha20_poly1305_amd64_ssse3_blocks1 +ELF(.type _gcry_chacha20_poly1305_amd64_ssse3_blocks1,@function;) + +_gcry_chacha20_poly1305_amd64_ssse3_blocks1: + /* input: + * %rdi: chacha20-state + * %rsi: dst + * %rdx: src + * %rcx: nblks + * %r9: poly1305-state + * %r8: poly1305-src + */ + CFI_STARTPROC(); + + pushq %rbp; + CFI_PUSH(%rbp); + movq %rsp, %rbp; + CFI_DEF_CFA_REGISTER(%rbp); + + subq $(9 * 8), %rsp; + movq %rbx, (0 * 8)(%rsp); + movq %r12, (1 * 8)(%rsp); + movq %r13, (2 * 8)(%rsp); + movq %r14, (3 * 8)(%rsp); + movq %r15, (4 * 8)(%rsp); + CFI_REG_ON_STACK(rbx, 0 * 8); + CFI_REG_ON_STACK(r12, 1 * 8); + CFI_REG_ON_STACK(r13, 2 * 8); + CFI_REG_ON_STACK(r14, 3 * 8); + CFI_REG_ON_STACK(r15, 4 * 8); + + movq %rdx, (5 * 8)(%rsp); # SRC + movq %rsi, (6 * 8)(%rsp); # DST + movq %rcx, (7 * 8)(%rsp); # NBLKS + + /* Load constants */ + movdqa .Lcounter1 rRIP, X4; + movdqa .Lshuf_rol8 rRIP, X5; + movdqa .Lshuf_rol16 rRIP, X6; + + /* Load state */ + movdqu (0 * 4)(INPUT), X10; + movdqu (4 * 4)(INPUT), X11; + movdqu (8 * 4)(INPUT), X12; + movdqu (12 * 4)(INPUT), X13; + + POLY1305_LOAD_STATE(); + + cmpq $2, (7 * 8)(%rsp); #NBLKS + jb .Loop_poly1; + + movdqa X10, X0; + movdqa X11, X1; + movdqa X12, X2; + movdqa X13, X3; + + movdqa X10, X8; + movdqa X11, X9; + movdqa X12, X14; + movdqa X13, X15; + paddq X4, X15; + + /* Process two ChaCha20 blocks and eight Poly1305 blocks. */ + + movl $20, (8 * 8 + 4)(%rsp); +.Lround2_with_poly1305_outer: + movl $8, (8 * 8)(%rsp); +.Lround2_with_poly1305_inner: + POLY1305_BLOCK_PART1(0 * 16); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + lea (1 * 16)(POLY_RSRC), POLY_RSRC; + POLY1305_BLOCK_PART2(); + QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93); + POLY1305_BLOCK_PART3(); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + POLY1305_BLOCK_PART4(); + QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39); + POLY1305_BLOCK_PART5(); + + subl $2, (8 * 8)(%rsp); + jnz .Lround2_with_poly1305_inner; + + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x39, 0x4e, 0x93); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + QUARTERROUND4(X8, X9, X14, X15, X5, X6, X7, 0x93, 0x4e, 0x39); + + subl $10, (8 * 8 + 4)(%rsp); + jnz .Lround2_with_poly1305_outer; + + movq (5 * 8)(%rsp), SRC; + movq (6 * 8)(%rsp), DST; + + PLUS(X0, X10); + PLUS(X1, X11); + PLUS(X2, X12); + PLUS(X3, X13); + + /* Update counter */ + paddq X4, X13; + + PLUS(X8, X10); + PLUS(X9, X11); + PLUS(X14, X12); + PLUS(X15, X13); + + /* Update counter */ + paddq X4, X13; + + xor_src_dst(DST, SRC, 0 * 4, X0, X7); + xor_src_dst(DST, SRC, 4 * 4, X1, X7); + xor_src_dst(DST, SRC, 8 * 4, X2, X7); + xor_src_dst(DST, SRC, 12 * 4, X3, X7); + xor_src_dst(DST, SRC, 16 * 4, X8, X7); + xor_src_dst(DST, SRC, 20 * 4, X9, X7); + xor_src_dst(DST, SRC, 24 * 4, X14, X7); + xor_src_dst(DST, SRC, 28 * 4, X15, X7); + + clear(X8); + clear(X9); + clear(X14); + clear(X15); + + subq $2, (7 * 8)(%rsp); # NBLKS + lea (2 * 64)(SRC), SRC; + lea (2 * 64)(DST), DST; + movq SRC, (5 * 8)(%rsp); + movq DST, (6 * 8)(%rsp); + jz .Ldone_poly1; + +.Loop_poly1: + movdqa X10, X0; + movdqa X11, X1; + movdqa X12, X2; + movdqa X13, X3; + + /* Process one ChaCha20 block and four Poly1305 blocks. */ + + movl $20, (8 * 8 + 4)(%rsp); +.Lround1_with_poly1305_outer: + movl $8, (8 * 8)(%rsp); +.Lround1_with_poly1305_inner: + POLY1305_BLOCK_PART1(0 * 16); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + POLY1305_BLOCK_PART2(); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + lea (1 * 16)(POLY_RSRC), POLY_RSRC; + + POLY1305_BLOCK_PART3(); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + POLY1305_BLOCK_PART4(); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + POLY1305_BLOCK_PART5(); + + subl $4, (8 * 8)(%rsp); + jnz .Lround1_with_poly1305_inner; + + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x39, 0x4e, 0x93); + QUARTERROUND4(X0, X1, X2, X3, X5, X6, X7, 0x93, 0x4e, 0x39); + + subl $10, (8 * 8 + 4)(%rsp); + jnz .Lround1_with_poly1305_outer; + + movq (5 * 8)(%rsp), SRC; + movq (6 * 8)(%rsp), DST; + + PLUS(X0, X10); + PLUS(X1, X11); + PLUS(X2, X12); + PLUS(X3, X13); + + /* Update counter */ + paddq X4, X13; + + xor_src_dst(DST, SRC, 0 * 4, X0, X7); + xor_src_dst(DST, SRC, 4 * 4, X1, X7); + xor_src_dst(DST, SRC, 8 * 4, X2, X7); + xor_src_dst(DST, SRC, 12 * 4, X3, X7); + + subq $1, (7 * 8)(%rsp); # NBLKS + lea (64)(SRC), SRC; + lea (64)(DST), DST; + movq SRC, (5 * 8)(%rsp); + movq DST, (6 * 8)(%rsp); + + jnz .Loop_poly1; + +.Ldone_poly1: + /* Store state */ + POLY1305_STORE_STATE(); + + movdqu X13, (12 * 4)(INPUT); + + /* clear the used vector registers */ + clear(X0); + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + + movq (0 * 8)(%rsp), %rbx; + movq (1 * 8)(%rsp), %r12; + movq (2 * 8)(%rsp), %r13; + movq (3 * 8)(%rsp), %r14; + movq (4 * 8)(%rsp), %r15; + CFI_RESTORE(%rbx); + CFI_RESTORE(%r12); + CFI_RESTORE(%r13); + CFI_RESTORE(%r14); + CFI_RESTORE(%r15); + + xorl %eax, %eax; + leave; + CFI_LEAVE(); + ret; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_poly1305_amd64_ssse3_blocks1, + .-_gcry_chacha20_poly1305_amd64_ssse3_blocks1;) + +#endif /*defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS)*/ +#endif /*__x86_64*/ diff --git a/comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S b/comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S new file mode 100644 index 0000000000..33a43df1f3 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20-armv7-neon.S @@ -0,0 +1,393 @@ +/* chacha20-armv7-neon.S - ARMv7 NEON implementation of ChaCha20 cipher + * + * Copyright (C) 2017,2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* + * Based on D. J. Bernstein reference implementation at + * http://cr.yp.to/chacha.html: + * + * chacha-regs.c version 20080118 + * D. J. Bernstein + * Public domain. + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +.syntax unified +.fpu neon +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + +/* register macros */ +#define INPUT r0 +#define DST r1 +#define SRC r2 +#define NBLKS r3 +#define ROUND r4 + +/* stack structure */ +#define STACK_VEC_X12 (16) +#define STACK_VEC_X13 (STACK_VEC_X12 + 16) +#define STACK_TMP (STACK_VEC_X13 + 16) +#define STACK_TMP1 (16 + STACK_TMP) +#define STACK_TMP2 (16 + STACK_TMP1) + +#define STACK_MAX (16 + STACK_TMP2) + +/* vector registers */ +#define X0 q0 +#define X1 q1 +#define X2 q2 +#define X3 q3 +#define X4 q4 +#define X5 q5 +#define X6 q6 +#define X7 q7 +#define X8 q8 +#define X9 q9 +#define X10 q10 +#define X11 q11 +#define X12 q12 +#define X13 q13 +#define X14 q14 +#define X15 q15 + +#define X0l d0 +#define X1l d2 +#define X2l d4 +#define X3l d6 +#define X4l d8 +#define X5l d10 +#define X6l d12 +#define X7l d14 +#define X8l d16 +#define X9l d18 +#define X10l d20 +#define X11l d22 +#define X12l d24 +#define X13l d26 +#define X14l d28 +#define X15l d30 + +#define X0h d1 +#define X1h d3 +#define X2h d5 +#define X3h d7 +#define X4h d9 +#define X5h d11 +#define X6h d13 +#define X7h d15 +#define X8h d17 +#define X9h d19 +#define X10h d21 +#define X11h d23 +#define X12h d25 +#define X13h d27 +#define X14h d29 +#define X15h d31 + +/********************************************************************** + helper macros + **********************************************************************/ + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4_part1(_q0, _q1, _q2, _q3) \ + vtrn.32 _q0, _q1; \ + vtrn.32 _q2, _q3; +#define transpose_4x4_part2(_q0, _q1, _q2, _q3) \ + vswp _q0##h, _q2##l; \ + vswp _q1##h, _q3##l; + +#define clear(x) veor x,x,x; + +/********************************************************************** + 4-way chacha20 + **********************************************************************/ + +#define ROTATE2(dst1,dst2,c,src1,src2) \ + vshl.u32 dst1, src1, #(c); \ + vshl.u32 dst2, src2, #(c); \ + vsri.u32 dst1, src1, #(32 - (c)); \ + vsri.u32 dst2, src2, #(32 - (c)); + +#define ROTATE2_16(dst1,dst2,src1,src2) \ + vrev32.16 dst1, src1; \ + vrev32.16 dst2, src2; + +#define XOR(d,s1,s2) \ + veor d, s2, s1; + +#define PLUS(ds,s) \ + vadd.u32 ds, ds, s; + +#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2,ign,tmp1,tmp2) \ + PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); \ + ROTATE2_16(d1, d2, tmp1, tmp2); \ + PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); \ + ROTATE2(b1, b2, 12, tmp1, tmp2); \ + PLUS(a1,b1); PLUS(a2,b2); XOR(tmp1,d1,a1); XOR(tmp2,d2,a2); \ + ROTATE2(d1, d2, 8, tmp1, tmp2); \ + PLUS(c1,d1); PLUS(c2,d2); XOR(tmp1,b1,c1); XOR(tmp2,b2,c2); \ + ROTATE2(b1, b2, 7, tmp1, tmp2); + +chacha20_data: +.align 4 +.Linc_counter: + .long 0,1,2,3 + +.align 3 +.globl _gcry_chacha20_armv7_neon_blocks4 +.type _gcry_chacha20_armv7_neon_blocks4,%function; + +_gcry_chacha20_armv7_neon_blocks4: + /* input: + * r0: input + * r1: dst + * r2: src + * r3: nblks (multiple of 4) + */ + + vpush {q4-q7}; + push {r4-r12,lr}; + + mov r12, sp + + mov r6, sp; + sub r6, r6, #(STACK_MAX); + and r6, r6, #(~15); + mov sp, r6; + GET_DATA_POINTER(r9, .Linc_counter, lr); + add lr, INPUT, #(12*4); + add r8, sp, #STACK_VEC_X12; + +.Loop4: + mov ROUND, #20; + + /* Construct counter vectors X12 and X13 */ + + vld1.8 {X15}, [lr]; + mov lr, INPUT; + vld1.8 {X8}, [r9]; + vdup.32 X12, X15l[0]; + vdup.32 X13, X15l[1]; + vld1.8 {X3}, [lr]!; + vadd.u32 X12, X12, X8; + vdup.32 X0, X3l[0]; + vdup.32 X1, X3l[1]; + vdup.32 X2, X3h[0]; + vcgt.u32 X8, X8, X12; + vdup.32 X3, X3h[1]; + vdup.32 X14, X15h[0]; + vdup.32 X15, X15h[1]; + vsub.u32 X13, X13, X8; + vld1.8 {X7}, [lr]!; + vld1.8 {X11}, [lr]; + vst1.8 {X12, X13}, [r8]; + vdup.32 X4, X7l[0]; + vdup.32 X5, X7l[1]; + vdup.32 X6, X7h[0]; + vdup.32 X7, X7h[1]; + vdup.32 X8, X11l[0]; + vdup.32 X9, X11l[1]; + vdup.32 X10, X11h[0]; + vdup.32 X11, X11h[1]; + + add r7, sp, #STACK_TMP2; + add r6, sp, #STACK_TMP1; + add r5, sp, #STACK_TMP; + vst1.8 {X15}, [r6]; + vst1.8 {X11}, [r5]; + + mov lr, INPUT; +.Lround2: + subs ROUND, ROUND, #2 + QUARTERROUND2(X0, X4, X8, X12, X1, X5, X9, X13, tmp:=,X11,X15) + vld1.8 {X11}, [r5]; + vld1.8 {X15}, [r6]; + vst1.8 {X8}, [r5]; + vst1.8 {X9}, [r6]; + QUARTERROUND2(X2, X6, X10, X14, X3, X7, X11, X15, tmp:=,X8,X9) + QUARTERROUND2(X0, X5, X10, X15, X1, X6, X11, X12, tmp:=,X8,X9) + vld1.8 {X8}, [r5]; + vld1.8 {X9}, [r6]; + vst1.8 {X11}, [r5]; + vst1.8 {X15}, [r6]; + QUARTERROUND2(X2, X7, X8, X13, X3, X4, X9, X14, tmp:=,X11,X15) + bne .Lround2; + + vld1.8 {X11}, [lr]!; + vst1.8 {X14}, [r7]; + + vdup.32 X14, X11l[0]; /* INPUT + 0 * 4 */ + vdup.32 X15, X11l[1]; /* INPUT + 1 * 4 */ + PLUS(X0, X14); + PLUS(X1, X15); + vdup.32 X14, X11h[0]; /* INPUT + 2 * 4 */ + vdup.32 X15, X11h[1]; /* INPUT + 3 * 4 */ + PLUS(X2, X14); + PLUS(X3, X15); + + vld1.8 {X11}, [r5]; + vld1.8 {X15}, [r6]; + vst1.8 {X0}, [r5]; + vld1.8 {X0}, [lr]!; + vst1.8 {X1}, [r6]; + + vdup.32 X14, X0l[0]; /* INPUT + 4 * 4 */ + vdup.32 X1, X0l[1]; /* INPUT + 5 * 4 */ + PLUS(X4, X14); + PLUS(X5, X1); + vdup.32 X14, X0h[0]; /* INPUT + 6 * 4 */ + vdup.32 X1, X0h[1]; /* INPUT + 7 * 4 */ + PLUS(X6, X14); + PLUS(X7, X1); + + vld1.8 {X0}, [lr]!; + + vdup.32 X14, X0l[0]; /* INPUT + 8 * 4 */ + vdup.32 X1, X0l[1]; /* INPUT + 9 * 4 */ + PLUS(X8, X14); + PLUS(X9, X1); + vdup.32 X14, X0h[0]; /* INPUT + 10 * 4 */ + vdup.32 X1, X0h[1]; /* INPUT + 11 * 4 */ + PLUS(X10, X14); + PLUS(X11, X1); + + vld1.8 {X0}, [lr]; + add lr, INPUT, #(12*4) + vld1.8 {X14}, [r7]; + + vdup.32 X1, X0h[0]; /* INPUT + 10 * 4 */ + ldm lr, {r10, r11}; /* Update counter */ + vdup.32 X0, X0h[1]; /* INPUT + 11 * 4 */ + PLUS(X14, X1); + PLUS(X15, X0); + adds r10, r10, #4; /* Update counter */ + vld1.8 {X0, X1}, [r8]; + + PLUS(X12, X0); + vld1.8 {X0}, [r5]; + PLUS(X13, X1); + adc r11, r11, #0; /* Update counter */ + + vld1.8 {X1}, [r6]; + stm lr, {r10, r11}; /* Update counter */ + transpose_4x4_part1(X0, X1, X2, X3); + transpose_4x4_part1(X4, X5, X6, X7); + transpose_4x4_part1(X8, X9, X10, X11); + transpose_4x4_part1(X12, X13, X14, X15); + transpose_4x4_part2(X0, X1, X2, X3); + transpose_4x4_part2(X4, X5, X6, X7); + transpose_4x4_part2(X8, X9, X10, X11); + transpose_4x4_part2(X12, X13, X14, X15); + + subs NBLKS, NBLKS, #4; + + vst1.8 {X10}, [r5]; + add lr, INPUT, #(12*4) + vst1.8 {X11}, [r6]; + vld1.8 {X10, X11}, [SRC]!; + veor X10, X0, X10; + vld1.8 {X0}, [SRC]!; + veor X11, X4, X11; + vld1.8 {X4}, [SRC]!; + vst1.8 {X10, X11}, [DST]!; + vld1.8 {X10, X11}, [SRC]!; + veor X0, X8, X0; + veor X4, X12, X4; + veor X10, X1, X10; + veor X11, X5, X11; + vst1.8 {X0}, [DST]!; + vld1.8 {X0, X1}, [SRC]!; + vst1.8 {X4}, [DST]!; + vld1.8 {X4, X5}, [SRC]!; + vst1.8 {X10, X11}, [DST]!; + vld1.8 {X10}, [r5]; + vld1.8 {X11}, [r6]; + veor X0, X9, X0; + vld1.8 {X8, X9}, [SRC]!; + veor X1, X13, X1; + vld1.8 {X12, X13}, [SRC]!; + veor X4, X2, X4; + veor X5, X6, X5; + vst1.8 {X0, X1}, [DST]!; + vld1.8 {X0, X1}, [SRC]!; + vst1.8 {X4, X5}, [DST]!; + veor X8, X10, X8; + veor X9, X14, X9; + veor X12, X3, X12; + veor X13, X7, X13; + veor X0, X11, X0; + veor X1, X15, X1; + vst1.8 {X8, X9}, [DST]!; + vst1.8 {X12, X13}, [DST]!; + vst1.8 {X0, X1}, [DST]!; + + bne .Loop4; + + /* clear the used vector registers and stack */ + clear(X0); + vst1.8 {X0}, [r5]; + vst1.8 {X0}, [r6]; + vst1.8 {X0}, [r7]; + vst1.8 {X0}, [r8]!; + vst1.8 {X0}, [r8]; + + mov sp, r12 + clear(X1); + clear(X2); + clear(X3); + clear(X4); + clear(X5); + clear(X6); + clear(X7); + clear(X8); + clear(X9); + clear(X10); + clear(X11); + clear(X12); + clear(X13); + clear(X14); + clear(X15); + + pop {r4-r12,lr} + vpop {q4-q7} + eor r0, r0, r0 + bx lr +.size _gcry_chacha20_armv7_neon_blocks4, .-_gcry_chacha20_armv7_neon_blocks4; + +#endif diff --git a/comm/third_party/libgcrypt/cipher/chacha20-ppc.c b/comm/third_party/libgcrypt/cipher/chacha20-ppc.c new file mode 100644 index 0000000000..4a21b837d1 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20-ppc.c @@ -0,0 +1,646 @@ +/* chacha20-ppc.c - PowerPC vector implementation of ChaCha20 + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \ + defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \ + defined(USE_CHACHA20) && \ + __GNUC__ >= 4 + +#include +#include "bufhelp.h" +#include "poly1305-internal.h" + +#include "mpi-internal.h" +#include "longlong.h" + + +typedef vector unsigned char vector16x_u8; +typedef vector unsigned int vector4x_u32; +typedef vector unsigned long long vector2x_u64; + + +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE +#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE + + +#ifdef WORDS_BIGENDIAN +static const vector16x_u8 le_bswap_const = + { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 }; +#endif + + +static ASM_FUNC_ATTR_INLINE vector4x_u32 +vec_rol_elems(vector4x_u32 v, unsigned int idx) +{ +#ifndef WORDS_BIGENDIAN + return vec_sld (v, v, (16 - (4 * idx)) & 15); +#else + return vec_sld (v, v, (4 * idx) & 15); +#endif +} + + +static ASM_FUNC_ATTR_INLINE vector4x_u32 +vec_load_le(unsigned long offset, const unsigned char *ptr) +{ + vector4x_u32 vec; + vec = vec_vsx_ld (offset, (const u32 *)ptr); +#ifdef WORDS_BIGENDIAN + vec = (vector4x_u32)vec_perm((vector16x_u8)vec, (vector16x_u8)vec, + le_bswap_const); +#endif + return vec; +} + + +static ASM_FUNC_ATTR_INLINE void +vec_store_le(vector4x_u32 vec, unsigned long offset, unsigned char *ptr) +{ +#ifdef WORDS_BIGENDIAN + vec = (vector4x_u32)vec_perm((vector16x_u8)vec, (vector16x_u8)vec, + le_bswap_const); +#endif + vec_vsx_st (vec, offset, (u32 *)ptr); +} + + +static ASM_FUNC_ATTR_INLINE vector4x_u32 +vec_add_ctr_u64(vector4x_u32 v, vector4x_u32 a) +{ +#ifdef WORDS_BIGENDIAN + static const vector16x_u8 swap32 = + { 4, 5, 6, 7, 0, 1, 2, 3, 12, 13, 14, 15, 8, 9, 10, 11 }; + vector2x_u64 vec, add, sum; + + vec = (vector2x_u64)vec_perm((vector16x_u8)v, (vector16x_u8)v, swap32); + add = (vector2x_u64)vec_perm((vector16x_u8)a, (vector16x_u8)a, swap32); + sum = vec + add; + return (vector4x_u32)vec_perm((vector16x_u8)sum, (vector16x_u8)sum, swap32); +#else + return (vector4x_u32)((vector2x_u64)(v) + (vector2x_u64)(a)); +#endif +} + + +/********************************************************************** + 2-way && 1-way chacha20 + **********************************************************************/ + +#define ROTATE(v1,rolv) \ + __asm__ ("vrlw %0,%1,%2\n\t" : "=v" (v1) : "v" (v1), "v" (rolv)) + +#define WORD_ROL(v1,c) \ + ((v1) = vec_rol_elems((v1), (c))) + +#define XOR(ds,s) \ + ((ds) ^= (s)) + +#define PLUS(ds,s) \ + ((ds) += (s)) + +#define QUARTERROUND4(x0,x1,x2,x3,rol_x1,rol_x2,rol_x3) \ + PLUS(x0, x1); XOR(x3, x0); ROTATE(x3, rotate_16); \ + PLUS(x2, x3); XOR(x1, x2); ROTATE(x1, rotate_12); \ + PLUS(x0, x1); XOR(x3, x0); ROTATE(x3, rotate_8); \ + PLUS(x2, x3); \ + WORD_ROL(x3, rol_x3); \ + XOR(x1, x2); \ + WORD_ROL(x2, rol_x2); \ + ROTATE(x1, rotate_7); \ + WORD_ROL(x1, rol_x1); + +#define ADD_U64(v,a) \ + (v = vec_add_ctr_u64(v, a)) + +unsigned int ASM_FUNC_ATTR +_gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + vector4x_u32 counter_1 = { 1, 0, 0, 0 }; + vector4x_u32 rotate_16 = { 16, 16, 16, 16 }; + vector4x_u32 rotate_12 = { 12, 12, 12, 12 }; + vector4x_u32 rotate_8 = { 8, 8, 8, 8 }; + vector4x_u32 rotate_7 = { 7, 7, 7, 7 }; + vector4x_u32 state0, state1, state2, state3; + vector4x_u32 v0, v1, v2, v3; + vector4x_u32 v4, v5, v6, v7; + int i; + + /* force preload of constants to vector registers */ + __asm__ ("": "+v" (counter_1) :: "memory"); + __asm__ ("": "+v" (rotate_16) :: "memory"); + __asm__ ("": "+v" (rotate_12) :: "memory"); + __asm__ ("": "+v" (rotate_8) :: "memory"); + __asm__ ("": "+v" (rotate_7) :: "memory"); + + state0 = vec_vsx_ld(0 * 16, state); + state1 = vec_vsx_ld(1 * 16, state); + state2 = vec_vsx_ld(2 * 16, state); + state3 = vec_vsx_ld(3 * 16, state); + + while (nblks >= 2) + { + v0 = state0; + v1 = state1; + v2 = state2; + v3 = state3; + + v4 = state0; + v5 = state1; + v6 = state2; + v7 = state3; + ADD_U64(v7, counter_1); + + for (i = 20; i > 0; i -= 2) + { + QUARTERROUND4(v0, v1, v2, v3, 1, 2, 3); + QUARTERROUND4(v4, v5, v6, v7, 1, 2, 3); + QUARTERROUND4(v0, v1, v2, v3, 3, 2, 1); + QUARTERROUND4(v4, v5, v6, v7, 3, 2, 1); + } + + v0 += state0; + v1 += state1; + v2 += state2; + v3 += state3; + ADD_U64(state3, counter_1); /* update counter */ + v4 += state0; + v5 += state1; + v6 += state2; + v7 += state3; + ADD_U64(state3, counter_1); /* update counter */ + + v0 ^= vec_load_le(0 * 16, src); + v1 ^= vec_load_le(1 * 16, src); + v2 ^= vec_load_le(2 * 16, src); + v3 ^= vec_load_le(3 * 16, src); + vec_store_le(v0, 0 * 16, dst); + vec_store_le(v1, 1 * 16, dst); + vec_store_le(v2, 2 * 16, dst); + vec_store_le(v3, 3 * 16, dst); + src += 64; + dst += 64; + v4 ^= vec_load_le(0 * 16, src); + v5 ^= vec_load_le(1 * 16, src); + v6 ^= vec_load_le(2 * 16, src); + v7 ^= vec_load_le(3 * 16, src); + vec_store_le(v4, 0 * 16, dst); + vec_store_le(v5, 1 * 16, dst); + vec_store_le(v6, 2 * 16, dst); + vec_store_le(v7, 3 * 16, dst); + src += 64; + dst += 64; + + nblks -= 2; + } + + while (nblks) + { + v0 = state0; + v1 = state1; + v2 = state2; + v3 = state3; + + for (i = 20; i > 0; i -= 2) + { + QUARTERROUND4(v0, v1, v2, v3, 1, 2, 3); + QUARTERROUND4(v0, v1, v2, v3, 3, 2, 1); + } + + v0 += state0; + v1 += state1; + v2 += state2; + v3 += state3; + ADD_U64(state3, counter_1); /* update counter */ + + v0 ^= vec_load_le(0 * 16, src); + v1 ^= vec_load_le(1 * 16, src); + v2 ^= vec_load_le(2 * 16, src); + v3 ^= vec_load_le(3 * 16, src); + vec_store_le(v0, 0 * 16, dst); + vec_store_le(v1, 1 * 16, dst); + vec_store_le(v2, 2 * 16, dst); + vec_store_le(v3, 3 * 16, dst); + src += 64; + dst += 64; + + nblks--; + } + + vec_vsx_st(state3, 3 * 16, state); /* store counter */ + + return 0; +} + + +/********************************************************************** + 4-way chacha20 + **********************************************************************/ + +/* 4x4 32-bit integer matrix transpose */ +#define transpose_4x4(x0, x1, x2, x3) ({ \ + vector4x_u32 t1 = vec_mergeh(x0, x2); \ + vector4x_u32 t2 = vec_mergel(x0, x2); \ + vector4x_u32 t3 = vec_mergeh(x1, x3); \ + x3 = vec_mergel(x1, x3); \ + x0 = vec_mergeh(t1, t3); \ + x1 = vec_mergel(t1, t3); \ + x2 = vec_mergeh(t2, x3); \ + x3 = vec_mergel(t2, x3); \ + }) + +#define QUARTERROUND2(a1,b1,c1,d1,a2,b2,c2,d2) \ + PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \ + ROTATE(d1, rotate_16); ROTATE(d2, rotate_16); \ + PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ + ROTATE(b1, rotate_12); ROTATE(b2, rotate_12); \ + PLUS(a1,b1); PLUS(a2,b2); XOR(d1,a1); XOR(d2,a2); \ + ROTATE(d1, rotate_8); ROTATE(d2, rotate_8); \ + PLUS(c1,d1); PLUS(c2,d2); XOR(b1,c1); XOR(b2,c2); \ + ROTATE(b1, rotate_7); ROTATE(b2, rotate_7); + +unsigned int ASM_FUNC_ATTR +_gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks) +{ + vector4x_u32 counters_0123 = { 0, 1, 2, 3 }; + vector4x_u32 counter_4 = { 4, 0, 0, 0 }; + vector4x_u32 rotate_16 = { 16, 16, 16, 16 }; + vector4x_u32 rotate_12 = { 12, 12, 12, 12 }; + vector4x_u32 rotate_8 = { 8, 8, 8, 8 }; + vector4x_u32 rotate_7 = { 7, 7, 7, 7 }; + vector4x_u32 state0, state1, state2, state3; + vector4x_u32 v0, v1, v2, v3, v4, v5, v6, v7; + vector4x_u32 v8, v9, v10, v11, v12, v13, v14, v15; + vector4x_u32 tmp; + int i; + + /* force preload of constants to vector registers */ + __asm__ ("": "+v" (counters_0123) :: "memory"); + __asm__ ("": "+v" (counter_4) :: "memory"); + __asm__ ("": "+v" (rotate_16) :: "memory"); + __asm__ ("": "+v" (rotate_12) :: "memory"); + __asm__ ("": "+v" (rotate_8) :: "memory"); + __asm__ ("": "+v" (rotate_7) :: "memory"); + + state0 = vec_vsx_ld(0 * 16, state); + state1 = vec_vsx_ld(1 * 16, state); + state2 = vec_vsx_ld(2 * 16, state); + state3 = vec_vsx_ld(3 * 16, state); + + do + { + v0 = vec_splat(state0, 0); + v1 = vec_splat(state0, 1); + v2 = vec_splat(state0, 2); + v3 = vec_splat(state0, 3); + v4 = vec_splat(state1, 0); + v5 = vec_splat(state1, 1); + v6 = vec_splat(state1, 2); + v7 = vec_splat(state1, 3); + v8 = vec_splat(state2, 0); + v9 = vec_splat(state2, 1); + v10 = vec_splat(state2, 2); + v11 = vec_splat(state2, 3); + v12 = vec_splat(state3, 0); + v13 = vec_splat(state3, 1); + v14 = vec_splat(state3, 2); + v15 = vec_splat(state3, 3); + + v12 += counters_0123; + v13 -= vec_cmplt(v12, counters_0123); + + for (i = 20; i > 0; i -= 2) + { + QUARTERROUND2(v0, v4, v8, v12, v1, v5, v9, v13) + QUARTERROUND2(v2, v6, v10, v14, v3, v7, v11, v15) + QUARTERROUND2(v0, v5, v10, v15, v1, v6, v11, v12) + QUARTERROUND2(v2, v7, v8, v13, v3, v4, v9, v14) + } + + v0 += vec_splat(state0, 0); + v1 += vec_splat(state0, 1); + v2 += vec_splat(state0, 2); + v3 += vec_splat(state0, 3); + v4 += vec_splat(state1, 0); + v5 += vec_splat(state1, 1); + v6 += vec_splat(state1, 2); + v7 += vec_splat(state1, 3); + v8 += vec_splat(state2, 0); + v9 += vec_splat(state2, 1); + v10 += vec_splat(state2, 2); + v11 += vec_splat(state2, 3); + tmp = vec_splat(state3, 0); + tmp += counters_0123; + v12 += tmp; + v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123); + v14 += vec_splat(state3, 2); + v15 += vec_splat(state3, 3); + ADD_U64(state3, counter_4); /* update counter */ + + transpose_4x4(v0, v1, v2, v3); + transpose_4x4(v4, v5, v6, v7); + transpose_4x4(v8, v9, v10, v11); + transpose_4x4(v12, v13, v14, v15); + + v0 ^= vec_load_le((64 * 0 + 16 * 0), src); + v1 ^= vec_load_le((64 * 1 + 16 * 0), src); + v2 ^= vec_load_le((64 * 2 + 16 * 0), src); + v3 ^= vec_load_le((64 * 3 + 16 * 0), src); + + v4 ^= vec_load_le((64 * 0 + 16 * 1), src); + v5 ^= vec_load_le((64 * 1 + 16 * 1), src); + v6 ^= vec_load_le((64 * 2 + 16 * 1), src); + v7 ^= vec_load_le((64 * 3 + 16 * 1), src); + + v8 ^= vec_load_le((64 * 0 + 16 * 2), src); + v9 ^= vec_load_le((64 * 1 + 16 * 2), src); + v10 ^= vec_load_le((64 * 2 + 16 * 2), src); + v11 ^= vec_load_le((64 * 3 + 16 * 2), src); + + v12 ^= vec_load_le((64 * 0 + 16 * 3), src); + v13 ^= vec_load_le((64 * 1 + 16 * 3), src); + v14 ^= vec_load_le((64 * 2 + 16 * 3), src); + v15 ^= vec_load_le((64 * 3 + 16 * 3), src); + + vec_store_le(v0, (64 * 0 + 16 * 0), dst); + vec_store_le(v1, (64 * 1 + 16 * 0), dst); + vec_store_le(v2, (64 * 2 + 16 * 0), dst); + vec_store_le(v3, (64 * 3 + 16 * 0), dst); + + vec_store_le(v4, (64 * 0 + 16 * 1), dst); + vec_store_le(v5, (64 * 1 + 16 * 1), dst); + vec_store_le(v6, (64 * 2 + 16 * 1), dst); + vec_store_le(v7, (64 * 3 + 16 * 1), dst); + + vec_store_le(v8, (64 * 0 + 16 * 2), dst); + vec_store_le(v9, (64 * 1 + 16 * 2), dst); + vec_store_le(v10, (64 * 2 + 16 * 2), dst); + vec_store_le(v11, (64 * 3 + 16 * 2), dst); + + vec_store_le(v12, (64 * 0 + 16 * 3), dst); + vec_store_le(v13, (64 * 1 + 16 * 3), dst); + vec_store_le(v14, (64 * 2 + 16 * 3), dst); + vec_store_le(v15, (64 * 3 + 16 * 3), dst); + + src += 4*64; + dst += 4*64; + + nblks -= 4; + } + while (nblks); + + vec_vsx_st(state3, 3 * 16, state); /* store counter */ + + return 0; +} + + +#if SIZEOF_UNSIGNED_LONG == 8 + +/********************************************************************** + 4-way stitched chacha20-poly1305 + **********************************************************************/ + +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("addc %0, %3, %0\n" \ + "adde %1, %4, %1\n" \ + "adde %2, %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "r" (B0), "r" (B1), "r" (B2) \ + : "cc" ) + +#define MUL_MOD_1305_64_PART1(H2, H1, H0, R1, R0, R1_MULT5) do { \ + /* x = a * r (partial mod 2^130-5) */ \ + umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ + umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ + \ + umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \ + } while (0) + +#define MUL_MOD_1305_64_PART2(H2, H1, H0, R1, R0, R1_MULT5) do { \ + add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \ + umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \ + add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \ + \ + t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \ + t1_hi = H2 * R0; /* h2 * r0 */ \ + add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \ + \ + /* carry propagation */ \ + H2 = H0 & 3; \ + H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ + ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ + } while (0) + +#define POLY1305_BLOCK_PART1(in_pos) do { \ + m0 = buf_get_le64(poly1305_src + (in_pos) + 0); \ + m1 = buf_get_le64(poly1305_src + (in_pos) + 8); \ + /* a = h + m */ \ + ADD_1305_64(h2, h1, h0, m2, m1, m0); \ + /* h = a * r (partial mod 2^130-5) */ \ + MUL_MOD_1305_64_PART1(h2, h1, h0, r1, r0, r1_mult5); \ + } while (0) + +#define POLY1305_BLOCK_PART2(in_pos) do { \ + MUL_MOD_1305_64_PART2(h2, h1, h0, r1, r0, r1_mult5); \ + } while (0) + +unsigned int ASM_FUNC_ATTR +_gcry_chacha20_poly1305_ppc8_blocks4(u32 *state, byte *dst, const byte *src, + size_t nblks, POLY1305_STATE *st, + const byte *poly1305_src) +{ + vector4x_u32 counters_0123 = { 0, 1, 2, 3 }; + vector4x_u32 counter_4 = { 4, 0, 0, 0 }; + vector4x_u32 rotate_16 = { 16, 16, 16, 16 }; + vector4x_u32 rotate_12 = { 12, 12, 12, 12 }; + vector4x_u32 rotate_8 = { 8, 8, 8, 8 }; + vector4x_u32 rotate_7 = { 7, 7, 7, 7 }; + vector4x_u32 state0, state1, state2, state3; + vector4x_u32 v0, v1, v2, v3, v4, v5, v6, v7; + vector4x_u32 v8, v9, v10, v11, v12, v13, v14, v15; + vector4x_u32 tmp; + u64 r0, r1, r1_mult5; + u64 h0, h1, h2; + u64 m0, m1, m2; + u64 x0_lo, x0_hi, x1_lo, x1_hi; + u64 t0_lo, t0_hi, t1_lo, t1_hi; + unsigned int i, o; + + /* load poly1305 state */ + m2 = 1; + h0 = st->h[0] + ((u64)st->h[1] << 32); + h1 = st->h[2] + ((u64)st->h[3] << 32); + h2 = st->h[4]; + r0 = st->r[0] + ((u64)st->r[1] << 32); + r1 = st->r[2] + ((u64)st->r[3] << 32); + r1_mult5 = (r1 >> 2) + r1; + + /* force preload of constants to vector registers */ + __asm__ ("": "+v" (counters_0123) :: "memory"); + __asm__ ("": "+v" (counter_4) :: "memory"); + __asm__ ("": "+v" (rotate_16) :: "memory"); + __asm__ ("": "+v" (rotate_12) :: "memory"); + __asm__ ("": "+v" (rotate_8) :: "memory"); + __asm__ ("": "+v" (rotate_7) :: "memory"); + + state0 = vec_vsx_ld(0 * 16, state); + state1 = vec_vsx_ld(1 * 16, state); + state2 = vec_vsx_ld(2 * 16, state); + state3 = vec_vsx_ld(3 * 16, state); + + do + { + v0 = vec_splat(state0, 0); + v1 = vec_splat(state0, 1); + v2 = vec_splat(state0, 2); + v3 = vec_splat(state0, 3); + v4 = vec_splat(state1, 0); + v5 = vec_splat(state1, 1); + v6 = vec_splat(state1, 2); + v7 = vec_splat(state1, 3); + v8 = vec_splat(state2, 0); + v9 = vec_splat(state2, 1); + v10 = vec_splat(state2, 2); + v11 = vec_splat(state2, 3); + v12 = vec_splat(state3, 0); + v13 = vec_splat(state3, 1); + v14 = vec_splat(state3, 2); + v15 = vec_splat(state3, 3); + + v12 += counters_0123; + v13 -= vec_cmplt(v12, counters_0123); + + for (o = 20; o; o -= 10) + { + for (i = 8; i; i -= 2) + { + POLY1305_BLOCK_PART1(0 * 16); + QUARTERROUND2(v0, v4, v8, v12, v1, v5, v9, v13) + POLY1305_BLOCK_PART2(); + QUARTERROUND2(v2, v6, v10, v14, v3, v7, v11, v15) + POLY1305_BLOCK_PART1(1 * 16); + poly1305_src += 2 * 16; + QUARTERROUND2(v0, v5, v10, v15, v1, v6, v11, v12) + POLY1305_BLOCK_PART2(); + QUARTERROUND2(v2, v7, v8, v13, v3, v4, v9, v14) + } + + QUARTERROUND2(v0, v4, v8, v12, v1, v5, v9, v13) + QUARTERROUND2(v2, v6, v10, v14, v3, v7, v11, v15) + QUARTERROUND2(v0, v5, v10, v15, v1, v6, v11, v12) + QUARTERROUND2(v2, v7, v8, v13, v3, v4, v9, v14) + } + + v0 += vec_splat(state0, 0); + v1 += vec_splat(state0, 1); + v2 += vec_splat(state0, 2); + v3 += vec_splat(state0, 3); + v4 += vec_splat(state1, 0); + v5 += vec_splat(state1, 1); + v6 += vec_splat(state1, 2); + v7 += vec_splat(state1, 3); + v8 += vec_splat(state2, 0); + v9 += vec_splat(state2, 1); + v10 += vec_splat(state2, 2); + v11 += vec_splat(state2, 3); + tmp = vec_splat(state3, 0); + tmp += counters_0123; + v12 += tmp; + v13 += vec_splat(state3, 1) - vec_cmplt(tmp, counters_0123); + v14 += vec_splat(state3, 2); + v15 += vec_splat(state3, 3); + ADD_U64(state3, counter_4); /* update counter */ + + transpose_4x4(v0, v1, v2, v3); + transpose_4x4(v4, v5, v6, v7); + transpose_4x4(v8, v9, v10, v11); + transpose_4x4(v12, v13, v14, v15); + + v0 ^= vec_load_le((64 * 0 + 16 * 0), src); + v1 ^= vec_load_le((64 * 1 + 16 * 0), src); + v2 ^= vec_load_le((64 * 2 + 16 * 0), src); + v3 ^= vec_load_le((64 * 3 + 16 * 0), src); + + v4 ^= vec_load_le((64 * 0 + 16 * 1), src); + v5 ^= vec_load_le((64 * 1 + 16 * 1), src); + v6 ^= vec_load_le((64 * 2 + 16 * 1), src); + v7 ^= vec_load_le((64 * 3 + 16 * 1), src); + + v8 ^= vec_load_le((64 * 0 + 16 * 2), src); + v9 ^= vec_load_le((64 * 1 + 16 * 2), src); + v10 ^= vec_load_le((64 * 2 + 16 * 2), src); + v11 ^= vec_load_le((64 * 3 + 16 * 2), src); + + v12 ^= vec_load_le((64 * 0 + 16 * 3), src); + v13 ^= vec_load_le((64 * 1 + 16 * 3), src); + v14 ^= vec_load_le((64 * 2 + 16 * 3), src); + v15 ^= vec_load_le((64 * 3 + 16 * 3), src); + + vec_store_le(v0, (64 * 0 + 16 * 0), dst); + vec_store_le(v1, (64 * 1 + 16 * 0), dst); + vec_store_le(v2, (64 * 2 + 16 * 0), dst); + vec_store_le(v3, (64 * 3 + 16 * 0), dst); + + vec_store_le(v4, (64 * 0 + 16 * 1), dst); + vec_store_le(v5, (64 * 1 + 16 * 1), dst); + vec_store_le(v6, (64 * 2 + 16 * 1), dst); + vec_store_le(v7, (64 * 3 + 16 * 1), dst); + + vec_store_le(v8, (64 * 0 + 16 * 2), dst); + vec_store_le(v9, (64 * 1 + 16 * 2), dst); + vec_store_le(v10, (64 * 2 + 16 * 2), dst); + vec_store_le(v11, (64 * 3 + 16 * 2), dst); + + vec_store_le(v12, (64 * 0 + 16 * 3), dst); + vec_store_le(v13, (64 * 1 + 16 * 3), dst); + vec_store_le(v14, (64 * 2 + 16 * 3), dst); + vec_store_le(v15, (64 * 3 + 16 * 3), dst); + + src += 4*64; + dst += 4*64; + + nblks -= 4; + } + while (nblks); + + vec_vsx_st(state3, 3 * 16, state); /* store counter */ + + /* store poly1305 state */ + st->h[0] = h0; + st->h[1] = h0 >> 32; + st->h[2] = h1; + st->h[3] = h1 >> 32; + st->h[4] = h2; + + return 0; +} + +#endif /* SIZEOF_UNSIGNED_LONG == 8 */ + +#endif /* ENABLE_PPC_CRYPTO_SUPPORT */ diff --git a/comm/third_party/libgcrypt/cipher/chacha20-s390x.S b/comm/third_party/libgcrypt/cipher/chacha20-s390x.S new file mode 100644 index 0000000000..9b1d59c6ad --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20-s390x.S @@ -0,0 +1,1561 @@ +/* chacha20-s390x.S - zSeries implementation of ChaCha20 cipher + * + * Copyright (C) 2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 +#include +#if defined(HAVE_GCC_INLINE_ASM_S390X_VX) + +#include "asm-common-s390x.h" +#include "asm-poly1305-s390x.h" + +.machine "z13+vx" +.text + +.balign 16 +.Lconsts: +.Lwordswap: + .byte 12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3 +.Lbswap128: + .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.Lbswap32: + .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.Lone: + .long 0, 0, 0, 1 +.Ladd_counter_0123: + .long 0, 1, 2, 3 +.Ladd_counter_4567: + .long 4, 5, 6, 7 + +/* register macros */ +#define INPUT %r2 +#define DST %r3 +#define SRC %r4 +#define NBLKS %r0 +#define ROUND %r1 + +/* stack structure */ + +#define STACK_FRAME_STD (8 * 16 + 8 * 4) +#define STACK_FRAME_F8_F15 (8 * 8) +#define STACK_FRAME_Y0_Y15 (16 * 16) +#define STACK_FRAME_CTR (4 * 16) +#define STACK_FRAME_PARAMS (6 * 8) + +#define STACK_MAX (STACK_FRAME_STD + STACK_FRAME_F8_F15 + \ + STACK_FRAME_Y0_Y15 + STACK_FRAME_CTR + \ + STACK_FRAME_PARAMS) + +#define STACK_F8 (STACK_MAX - STACK_FRAME_F8_F15) +#define STACK_F9 (STACK_F8 + 8) +#define STACK_F10 (STACK_F9 + 8) +#define STACK_F11 (STACK_F10 + 8) +#define STACK_F12 (STACK_F11 + 8) +#define STACK_F13 (STACK_F12 + 8) +#define STACK_F14 (STACK_F13 + 8) +#define STACK_F15 (STACK_F14 + 8) +#define STACK_Y0_Y15 (STACK_F8 - STACK_FRAME_Y0_Y15) +#define STACK_CTR (STACK_Y0_Y15 - STACK_FRAME_CTR) +#define STACK_INPUT (STACK_CTR - STACK_FRAME_PARAMS) +#define STACK_DST (STACK_INPUT + 8) +#define STACK_SRC (STACK_DST + 8) +#define STACK_NBLKS (STACK_SRC + 8) +#define STACK_POCTX (STACK_NBLKS + 8) +#define STACK_POSRC (STACK_POCTX + 8) + +#define STACK_G0_H3 STACK_Y0_Y15 + +/* vector registers */ +#define A0 %v0 +#define A1 %v1 +#define A2 %v2 +#define A3 %v3 + +#define B0 %v4 +#define B1 %v5 +#define B2 %v6 +#define B3 %v7 + +#define C0 %v8 +#define C1 %v9 +#define C2 %v10 +#define C3 %v11 + +#define D0 %v12 +#define D1 %v13 +#define D2 %v14 +#define D3 %v15 + +#define E0 %v16 +#define E1 %v17 +#define E2 %v18 +#define E3 %v19 + +#define F0 %v20 +#define F1 %v21 +#define F2 %v22 +#define F3 %v23 + +#define G0 %v24 +#define G1 %v25 +#define G2 %v26 +#define G3 %v27 + +#define H0 %v28 +#define H1 %v29 +#define H2 %v30 +#define H3 %v31 + +#define IO0 E0 +#define IO1 E1 +#define IO2 E2 +#define IO3 E3 +#define IO4 F0 +#define IO5 F1 +#define IO6 F2 +#define IO7 F3 + +#define S0 G0 +#define S1 G1 +#define S2 G2 +#define S3 G3 + +#define TMP0 H0 +#define TMP1 H1 +#define TMP2 H2 +#define TMP3 H3 + +#define X0 A0 +#define X1 A1 +#define X2 A2 +#define X3 A3 +#define X4 B0 +#define X5 B1 +#define X6 B2 +#define X7 B3 +#define X8 C0 +#define X9 C1 +#define X10 C2 +#define X11 C3 +#define X12 D0 +#define X13 D1 +#define X14 D2 +#define X15 D3 + +#define Y0 E0 +#define Y1 E1 +#define Y2 E2 +#define Y3 E3 +#define Y4 F0 +#define Y5 F1 +#define Y6 F2 +#define Y7 F3 +#define Y8 G0 +#define Y9 G1 +#define Y10 G2 +#define Y11 G3 +#define Y12 H0 +#define Y13 H1 +#define Y14 H2 +#define Y15 H3 + +/********************************************************************** + helper macros + **********************************************************************/ + +#define _ /*_*/ + +#define CLEAR(x,...) vzero x; + +#define START_STACK(last_r) \ + lgr %r0, %r15; \ + lghi %r1, ~15; \ + stmg %r6, last_r, 6 * 8(%r15); \ + aghi %r0, -STACK_MAX; \ + ngr %r0, %r1; \ + lgr %r1, %r15; \ + CFI_DEF_CFA_REGISTER(1); \ + lgr %r15, %r0; \ + stg %r1, 0(%r15); \ + CFI_CFA_ON_STACK(0, 0); \ + std %f8, STACK_F8(%r15); \ + std %f9, STACK_F9(%r15); \ + std %f10, STACK_F10(%r15); \ + std %f11, STACK_F11(%r15); \ + std %f12, STACK_F12(%r15); \ + std %f13, STACK_F13(%r15); \ + std %f14, STACK_F14(%r15); \ + std %f15, STACK_F15(%r15); + +#define END_STACK(last_r) \ + lg %r1, 0(%r15); \ + ld %f8, STACK_F8(%r15); \ + ld %f9, STACK_F9(%r15); \ + ld %f10, STACK_F10(%r15); \ + ld %f11, STACK_F11(%r15); \ + ld %f12, STACK_F12(%r15); \ + ld %f13, STACK_F13(%r15); \ + ld %f14, STACK_F14(%r15); \ + ld %f15, STACK_F15(%r15); \ + lmg %r6, last_r, 6 * 8(%r1); \ + lgr %r15, %r1; \ + CFI_DEF_CFA_REGISTER(DW_REGNO_SP); + +#define PLUS(dst,src) \ + vaf dst, dst, src; + +#define XOR(dst,src) \ + vx dst, dst, src; + +#define ROTATE(v1,c) \ + verllf v1, v1, (c)(0); + +#define WORD_ROTATE(v1,s) \ + vsldb v1, v1, v1, ((s) * 4); + +#define DST_1(OPER, I, J) \ + OPER(A##I, J); + +#define DST_2(OPER, I, J) \ + OPER(A##I, J); OPER(B##I, J); + +#define DST_4(OPER, I, J) \ + OPER(A##I, J); OPER(B##I, J); OPER(C##I, J); OPER(D##I, J); + +#define DST_8(OPER, I, J) \ + OPER(A##I, J); OPER(B##I, J); OPER(C##I, J); OPER(D##I, J); \ + OPER(E##I, J); OPER(F##I, J); OPER(G##I, J); OPER(H##I, J); + +#define DST_SRC_1(OPER, I, J) \ + OPER(A##I, A##J); + +#define DST_SRC_2(OPER, I, J) \ + OPER(A##I, A##J); OPER(B##I, B##J); + +#define DST_SRC_4(OPER, I, J) \ + OPER(A##I, A##J); OPER(B##I, B##J); OPER(C##I, C##J); \ + OPER(D##I, D##J); + +#define DST_SRC_8(OPER, I, J) \ + OPER(A##I, A##J); OPER(B##I, B##J); OPER(C##I, C##J); \ + OPER(D##I, D##J); OPER(E##I, E##J); OPER(F##I, F##J); \ + OPER(G##I, G##J); OPER(H##I, H##J); + +/********************************************************************** + round macros + **********************************************************************/ + +#define QUARTERROUND4_POLY(wrot_1,wrot_2,wrot_3,op1,op2) \ + op1; DST_SRC_1(PLUS, 0, 1); DST_SRC_1(XOR, 3, 0); DST_1(ROTATE, 3, 16); \ + DST_SRC_1(PLUS, 2, 3); DST_SRC_1(XOR, 1, 2); DST_1(ROTATE, 1, 12); \ + DST_SRC_1(PLUS, 0, 1); DST_SRC_1(XOR, 3, 0); DST_1(ROTATE, 3, 8); \ + op2; DST_SRC_1(PLUS, 2, 3); DST_SRC_1(XOR, 1, 2); DST_1(ROTATE, 1, 7); \ + DST_1(WORD_ROTATE, 3, wrot_3); \ + DST_1(WORD_ROTATE, 2, wrot_2); \ + DST_1(WORD_ROTATE, 1, wrot_1); + +#define QUARTERROUND4(wrot_1,wrot_2,wrot_3) \ + QUARTERROUND4_POLY(wrot_1,wrot_2,wrot_3,,) + +#define QUARTERROUND4_2_POLY(wrot_1,wrot_2,wrot_3,op1,op2,op3,op4) \ + op1; DST_SRC_2(PLUS, 0, 1); DST_SRC_2(XOR, 3, 0); DST_2(ROTATE, 3, 16); \ + DST_SRC_2(PLUS, 2, 3); op2; DST_SRC_2(XOR, 1, 2); DST_2(ROTATE, 1, 12); \ + DST_SRC_2(PLUS, 0, 1); DST_SRC_2(XOR, 3, 0); op3; DST_2(ROTATE, 3, 8); \ + DST_SRC_2(PLUS, 2, 3); DST_SRC_2(XOR, 1, 2); DST_2(ROTATE, 1, 7); op4; \ + DST_2(WORD_ROTATE, 3, wrot_3); \ + DST_2(WORD_ROTATE, 2, wrot_2); \ + DST_2(WORD_ROTATE, 1, wrot_1); + +#define QUARTERROUND4_2(wrot_1,wrot_2,wrot_3) \ + QUARTERROUND4_2_POLY(wrot_1,wrot_2,wrot_3,,,,) + +#define QUARTERROUND4_4_POLY(wrot_1,wrot_2,wrot_3,op1,op2,op3,op4,op5,op6) \ + DST_SRC_4(PLUS, 0, 1); DST_SRC_4(XOR, 3, 0); op1; DST_4(ROTATE, 3, 16); \ + DST_SRC_4(PLUS, 2, 3); op2; DST_SRC_4(XOR, 1, 2); DST_4(ROTATE, 1, 12); \ + op3; DST_SRC_4(PLUS, 0, 1); DST_SRC_4(XOR, 3, 0); op4; DST_4(ROTATE, 3, 8); \ + DST_SRC_4(PLUS, 2, 3); op5; DST_SRC_4(XOR, 1, 2); DST_4(ROTATE, 1, 7); \ + op6; \ + DST_4(WORD_ROTATE, 3, wrot_3); \ + DST_4(WORD_ROTATE, 2, wrot_2); \ + DST_4(WORD_ROTATE, 1, wrot_1); + +#define QUARTERROUND4_4(wrot_1,wrot_2,wrot_3) \ + QUARTERROUND4_4_POLY(wrot_1,wrot_2,wrot_3,,,,,,) + +/********************************************************************** + 4-way && 2-way && 1-way chacha20 ("horizontal") + **********************************************************************/ + +.balign 8 +.globl _gcry_chacha20_s390x_vx_blocks4_2_1 +ELF(.type _gcry_chacha20_s390x_vx_blocks4_2_1,@function;) + +_gcry_chacha20_s390x_vx_blocks4_2_1: + /* input: + * %r2: input + * %r3: dst + * %r4: src + * %r5: nblks + */ + CFI_STARTPROC(); + + START_STACK(%r7); + lgr NBLKS, %r5; + + /* Load constants. */ + larl %r7, .Lconsts; + vl TMP0, (.Lwordswap - .Lconsts)(%r7); + vl TMP1, (.Lone - .Lconsts)(%r7); + vl TMP2, (.Lbswap128 - .Lconsts)(%r7); + + /* Load state. */ + vlm S0, S3, 0(INPUT); + vperm S0, S0, S0, TMP0; + vperm S1, S1, S1, TMP0; + vperm S2, S2, S2, TMP0; + vperm S3, S3, S3, TMP0; + + clgijl NBLKS, 4, .Lloop2; + +.balign 4 +.Lloop4: + /* Process four chacha20 blocks. */ + vlr TMP3, S3; + lghi ROUND, (20 / 2); + vlr A0, S0; + vlr A1, S1; + vlr A2, S2; + vlr A3, TMP3; + vag TMP3, TMP3, TMP1; + vlr B0, S0; + vlr B1, S1; + vlr B2, S2; + vlr B3, TMP3; + vag TMP3, TMP3, TMP1; + vlr C0, S0; + vlr C1, S1; + vlr C2, S2; + vlr C3, TMP3; + vlr D0, S0; + vlr D1, S1; + vlr D2, S2; + vag D3, TMP3, TMP1; + + slgfi NBLKS, 4; + +.balign 4 +.Lround2_4: + QUARTERROUND4_4(3, 2, 1); + QUARTERROUND4_4(1, 2, 3); + brctg ROUND, .Lround2_4; + + vlm IO0, IO7, 0(SRC); + + PLUS(A0, S0); + PLUS(A1, S1); + PLUS(A2, S2); + PLUS(A3, S3); + vag S3, S3, TMP1; /* Update counter. */ + PLUS(B0, S0); + PLUS(B1, S1); + PLUS(B2, S2); + PLUS(B3, S3); + vag S3, S3, TMP1; /* Update counter. */ + vperm A0, A0, A0, TMP2; + vperm A1, A1, A1, TMP2; + vperm A2, A2, A2, TMP2; + vperm A3, A3, A3, TMP2; + vperm B0, B0, B0, TMP2; + vperm B1, B1, B1, TMP2; + vperm B2, B2, B2, TMP2; + vperm B3, B3, B3, TMP2; + PLUS(C0, S0); + PLUS(C1, S1); + PLUS(C2, S2); + PLUS(C3, S3); + vag S3, S3, TMP1; /* Update counter. */ + PLUS(D0, S0); + PLUS(D1, S1); + PLUS(D2, S2); + PLUS(D3, S3); + vag S3, S3, TMP1; /* Update counter. */ + vperm C0, C0, C0, TMP2; + vperm C1, C1, C1, TMP2; + vperm C2, C2, C2, TMP2; + vperm C3, C3, C3, TMP2; + vperm D0, D0, D0, TMP2; + vperm D1, D1, D1, TMP2; + vperm D2, D2, D2, TMP2; + vperm D3, D3, D3, TMP2; + + XOR(IO0, A0); + XOR(IO1, A1); + XOR(IO2, A2); + XOR(IO3, A3); + XOR(IO4, B0); + XOR(IO5, B1); + XOR(IO6, B2); + XOR(IO7, B3); + vlm A0, B3, 128(SRC); + vstm IO0, IO7, 0(DST); + XOR(A0, C0); + XOR(A1, C1); + XOR(A2, C2); + XOR(A3, C3); + XOR(B0, D0); + XOR(B1, D1); + XOR(B2, D2); + XOR(B3, D3); + vstm A0, B3, 128(DST); + + aghi SRC, 256; + aghi DST, 256; + + clgijhe NBLKS, 4, .Lloop4; + + CLEAR(C0); + CLEAR(C1); + CLEAR(C2); + CLEAR(C3); + CLEAR(D0); + CLEAR(D1); + CLEAR(D2); + CLEAR(D3); + +.balign 4 +.Lloop2: + clgijl NBLKS, 2, .Lloop1; + + /* Process two chacha20 blocks. */ + lghi ROUND, (20 / 2); + vlr A0, S0; + vlr A1, S1; + vlr A2, S2; + vlr A3, S3; + vlr B0, S0; + vlr B1, S1; + vlr B2, S2; + vag B3, S3, TMP1; + + slgfi NBLKS, 2; + +.balign 4 +.Lround2_2: + QUARTERROUND4_2(3, 2, 1); + QUARTERROUND4_2(1, 2, 3); + brctg ROUND, .Lround2_2; + + vlm IO0, IO7, 0(SRC); + + PLUS(A0, S0); + PLUS(A1, S1); + PLUS(A2, S2); + PLUS(A3, S3); + vag S3, S3, TMP1; /* Update counter. */ + PLUS(B0, S0); + PLUS(B1, S1); + PLUS(B2, S2); + PLUS(B3, S3); + vag S3, S3, TMP1; /* Update counter. */ + vperm A0, A0, A0, TMP2; + vperm A1, A1, A1, TMP2; + vperm A2, A2, A2, TMP2; + vperm A3, A3, A3, TMP2; + vperm B0, B0, B0, TMP2; + vperm B1, B1, B1, TMP2; + vperm B2, B2, B2, TMP2; + vperm B3, B3, B3, TMP2; + + XOR(IO0, A0); + XOR(IO1, A1); + XOR(IO2, A2); + XOR(IO3, A3); + XOR(IO4, B0); + XOR(IO5, B1); + XOR(IO6, B2); + XOR(IO7, B3); + vstm IO0, IO7, 0(DST); + + aghi SRC, 128; + aghi DST, 128; + + clgijhe NBLKS, 2, .Lloop2; + + CLEAR(B0); + CLEAR(B1); + CLEAR(B2); + CLEAR(B3); + +.balign 4 +.Lloop1: + clgijl NBLKS, 1, .Ldone; + + /* Process one chacha20 block.*/ + lghi ROUND, (20 / 2); + vlr A0, S0; + vlr A1, S1; + vlr A2, S2; + vlr A3, S3; + + slgfi NBLKS, 1; + +.balign 4 +.Lround2_1: + QUARTERROUND4(3, 2, 1); + QUARTERROUND4(1, 2, 3); + brct ROUND, .Lround2_1; + + vlm IO0, IO3, 0(SRC); + + PLUS(A0, S0); + PLUS(A1, S1); + PLUS(A2, S2); + PLUS(A3, S3); + vag S3, S3, TMP1; /* Update counter. */ + + vperm A0, A0, A0, TMP2; + vperm A1, A1, A1, TMP2; + vperm A2, A2, A2, TMP2; + vperm A3, A3, A3, TMP2; + XOR(IO0, A0); + XOR(IO1, A1); + XOR(IO2, A2); + XOR(IO3, A3); + vstm IO0, IO3, 0(DST); + + aghi SRC, 64; + aghi DST, 64; + + clgijhe NBLKS, 1, .Lloop1; + +.balign 4 +.Ldone: + /* Store counter. */ + vperm S3, S3, S3, TMP0; + vst S3, (48)(INPUT); + + /* Clear the used vector registers. */ + CLEAR(A0); + CLEAR(A1); + CLEAR(A2); + CLEAR(A3); + CLEAR(IO0); + CLEAR(IO1); + CLEAR(IO2); + CLEAR(IO3); + CLEAR(IO4); + CLEAR(IO5); + CLEAR(IO6); + CLEAR(IO7); + CLEAR(TMP0); + CLEAR(TMP1); + CLEAR(TMP2); + + END_STACK(%r7); + xgr %r2, %r2; + br %r14; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_s390x_vx_blocks4_2_1, + .-_gcry_chacha20_s390x_vx_blocks4_2_1;) + +/********************************************************************** + 4-way && 2-way && 1-way stitched chacha20-poly1305 ("horizontal") + **********************************************************************/ + +.balign 8 +.globl _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1 +ELF(.type _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1,@function;) + +_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1: + /* input: + * %r2: input + * %r3: dst + * %r4: src + * %r5: nblks + * %r6: poly1305 state + * 160(%r15): poly1305 src + */ + CFI_STARTPROC(); + + START_STACK(%r14); + lgr NBLKS, %r5; + + /* Load constants. */ + larl %r8, .Lconsts; + vl TMP0, (.Lwordswap - .Lconsts)(%r8); + vl TMP1, (.Lone - .Lconsts)(%r8); + vl TMP2, (.Lbswap128 - .Lconsts)(%r8); + + /* Load state. */ + vlm S0, S3, 0(INPUT); + vperm S0, S0, S0, TMP0; + vperm S1, S1, S1, TMP0; + vperm S2, S2, S2, TMP0; + vperm S3, S3, S3, TMP0; + + /* Store parameters to stack. */ + stmg %r2, %r6, STACK_INPUT(%r15); + + lgr POLY_RSTATE, %r6; + lgr NBLKS, %r5; + + lg POLY_RSRC, 0(%r15); + lg POLY_RSRC, 160(POLY_RSRC); + stg POLY_RSRC, STACK_POSRC(%r15); + + /* Load poly1305 state */ + POLY1305_LOAD_STATE(); + + clgijl NBLKS, 4, .Lloop2_poly; + +.balign 4 +.Lloop4_poly: + /* Process four chacha20 blocks and 16 poly1305 blocks. */ + vlr TMP3, S3; + lghi ROUND, (20 / 4); + vlr A0, S0; + vlr A1, S1; + vlr A2, S2; + vlr A3, TMP3; + vag TMP3, TMP3, TMP1; + vlr B0, S0; + vlr B1, S1; + vlr B2, S2; + vlr B3, TMP3; + vag TMP3, TMP3, TMP1; + vlr C0, S0; + vlr C1, S1; + vlr C2, S2; + vlr C3, TMP3; + vlr D0, S0; + vlr D1, S1; + vlr D2, S2; + vag D3, TMP3, TMP1; + + slgfi NBLKS, 4; + +.balign 4 +.Lround4_4_poly: + /* Total 15 poly1305 blocks processed by this loop. */ + QUARTERROUND4_4_POLY(3, 2, 1, + POLY1305_BLOCK_PART1(0 * 16), + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6()); + QUARTERROUND4_4_POLY(1, 2, 3, + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8(), + POLY1305_BLOCK_PART1(1 * 16), + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4()); + QUARTERROUND4_4_POLY(3, 2, 1, + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8(), + POLY1305_BLOCK_PART1(2 * 16); + INC_POLY1305_SRC(3 * 16), + POLY1305_BLOCK_PART2()); + QUARTERROUND4_4_POLY(1, 2, 3, + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8()); + brctg ROUND, .Lround4_4_poly; + + POLY1305_BLOCK_PART1(0 * 16); + INC_POLY1305_SRC(1 * 16); + stg POLY_RSRC, STACK_POSRC(%r15); + + lg %r14, STACK_SRC(%r15); + vlm IO0, IO7, 0(%r14); + + PLUS(A0, S0); + PLUS(A1, S1); + PLUS(A2, S2); + PLUS(A3, S3); + vag S3, S3, TMP1; /* Update counter. */ + POLY1305_BLOCK_PART2(); + PLUS(B0, S0); + PLUS(B1, S1); + PLUS(B2, S2); + PLUS(B3, S3); + vag S3, S3, TMP1; /* Update counter. */ + POLY1305_BLOCK_PART3(); + vperm A0, A0, A0, TMP2; + vperm A1, A1, A1, TMP2; + vperm A2, A2, A2, TMP2; + vperm A3, A3, A3, TMP2; + vperm B0, B0, B0, TMP2; + vperm B1, B1, B1, TMP2; + vperm B2, B2, B2, TMP2; + vperm B3, B3, B3, TMP2; + POLY1305_BLOCK_PART4(); + PLUS(C0, S0); + PLUS(C1, S1); + PLUS(C2, S2); + PLUS(C3, S3); + vag S3, S3, TMP1; /* Update counter. */ + PLUS(D0, S0); + PLUS(D1, S1); + PLUS(D2, S2); + PLUS(D3, S3); + vag S3, S3, TMP1; /* Update counter. */ + POLY1305_BLOCK_PART5(); + vperm C0, C0, C0, TMP2; + vperm C1, C1, C1, TMP2; + vperm C2, C2, C2, TMP2; + vperm C3, C3, C3, TMP2; + vperm D0, D0, D0, TMP2; + vperm D1, D1, D1, TMP2; + vperm D2, D2, D2, TMP2; + vperm D3, D3, D3, TMP2; + + POLY1305_BLOCK_PART6(); + XOR(IO0, A0); + XOR(IO1, A1); + XOR(IO2, A2); + XOR(IO3, A3); + XOR(IO4, B0); + XOR(IO5, B1); + XOR(IO6, B2); + XOR(IO7, B3); + vlm A0, B3, 128(%r14); + aghi %r14, 256; + stg %r14, STACK_SRC(%r15); + + lg %r14, STACK_DST(%r15); + POLY1305_BLOCK_PART7(); + vstm IO0, IO7, 0(%r14); + XOR(A0, C0); + XOR(A1, C1); + XOR(A2, C2); + XOR(A3, C3); + XOR(B0, D0); + XOR(B1, D1); + XOR(B2, D2); + XOR(B3, D3); + POLY1305_BLOCK_PART8(); + vstm A0, B3, 128(%r14); + aghi %r14, 256; + stg %r14, STACK_DST(%r15); + + lg POLY_RSRC, STACK_POSRC(%r15); + + clgijhe NBLKS, 4, .Lloop4_poly; + + CLEAR(C0); + CLEAR(C1); + CLEAR(C2); + CLEAR(C3); + CLEAR(D0); + CLEAR(D1); + CLEAR(D2); + CLEAR(D3); + +.balign 4 +.Lloop2_poly: + clgijl NBLKS, 2, .Lloop1_poly; + + /* Process two chacha20 and eight poly1305 blocks. */ + lghi ROUND, ((20 - 4) / 2); + vlr A0, S0; + vlr A1, S1; + vlr A2, S2; + vlr A3, S3; + vlr B0, S0; + vlr B1, S1; + vlr B2, S2; + vag B3, S3, TMP1; + + slgfi NBLKS, 2; + +.balign 4 +.Lround4_2_poly: + /* Total eight poly1305 blocks processed by this loop. */ + QUARTERROUND4_2_POLY(3, 2, 1, + POLY1305_BLOCK_PART1(0 * 16), + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4()); + INC_POLY1305_SRC(1 * 16); + QUARTERROUND4_2_POLY(1, 2, 3, + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8()); + brctg ROUND, .Lround4_2_poly; + + stg POLY_RSRC, STACK_POSRC(%r15); + lg %r14, STACK_SRC(%r15); + + QUARTERROUND4_2(3, 2, 1); + QUARTERROUND4_2(1, 2, 3); + QUARTERROUND4_2(3, 2, 1); + QUARTERROUND4_2(1, 2, 3); + + vlm IO0, IO7, 0(%r14); + aghi %r14, 128; + stg %r14, STACK_SRC(%r15); + + PLUS(A0, S0); + PLUS(A1, S1); + PLUS(A2, S2); + PLUS(A3, S3); + vag S3, S3, TMP1; /* Update counter. */ + PLUS(B0, S0); + PLUS(B1, S1); + PLUS(B2, S2); + PLUS(B3, S3); + vag S3, S3, TMP1; /* Update counter. */ + vperm A0, A0, A0, TMP2; + vperm A1, A1, A1, TMP2; + vperm A2, A2, A2, TMP2; + vperm A3, A3, A3, TMP2; + vperm B0, B0, B0, TMP2; + vperm B1, B1, B1, TMP2; + vperm B2, B2, B2, TMP2; + vperm B3, B3, B3, TMP2; + + lg %r14, STACK_DST(%r15); + XOR(IO0, A0); + XOR(IO1, A1); + XOR(IO2, A2); + XOR(IO3, A3); + XOR(IO4, B0); + XOR(IO5, B1); + XOR(IO6, B2); + XOR(IO7, B3); + vstm IO0, IO7, 0(%r14); + aghi %r14, 128; + stg %r14, STACK_DST(%r15); + + lg POLY_RSRC, STACK_POSRC(%r15); + + clgijhe NBLKS, 2, .Lloop2_poly; + + CLEAR(B0); + CLEAR(B1); + CLEAR(B2); + CLEAR(B3); + +.balign 4 +.Lloop1_poly: + clgijl NBLKS, 1, .Ldone_poly; + + /* Process one chacha20 block and four poly1305 blocks.*/ + lghi ROUND, ((20 - 4) / 4); + vlr A0, S0; + vlr A1, S1; + vlr A2, S2; + vlr A3, S3; + + slgfi NBLKS, 1; + +.balign 4 +.Lround4_1_poly: + /* Total four poly1305 blocks processed by this loop. */ + QUARTERROUND4_POLY(3, 2, 1, + POLY1305_BLOCK_PART1(0 * 16), + POLY1305_BLOCK_PART2()); + INC_POLY1305_SRC(1 * 16); + QUARTERROUND4_POLY(1, 2, 3, + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4()); + QUARTERROUND4_POLY(3, 2, 1, + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6()); + QUARTERROUND4_POLY(1, 2, 3, + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8()); + brct ROUND, .Lround4_1_poly; + + stg POLY_RSRC, STACK_POSRC(%r15); + lg %r14, STACK_SRC(%r15); + + QUARTERROUND4(3, 2, 1); + QUARTERROUND4(1, 2, 3); + QUARTERROUND4(3, 2, 1); + QUARTERROUND4(1, 2, 3); + + vlm IO0, IO3, 0(%r14); + aghi %r14, 64; + stg %r14, STACK_SRC(%r15); + + PLUS(A0, S0); + PLUS(A1, S1); + PLUS(A2, S2); + PLUS(A3, S3); + vag S3, S3, TMP1; /* Update counter. */ + + lg %r14, STACK_DST(%r15); + vperm A0, A0, A0, TMP2; + vperm A1, A1, A1, TMP2; + vperm A2, A2, A2, TMP2; + vperm A3, A3, A3, TMP2; + XOR(IO0, A0); + XOR(IO1, A1); + XOR(IO2, A2); + XOR(IO3, A3); + vstm IO0, IO3, 0(%r14); + aghi %r14, 64; + stg %r14, STACK_DST(%r15); + + lg POLY_RSRC, STACK_POSRC(%r15); + + clgijhe NBLKS, 1, .Lloop1_poly; + +.balign 4 +.Ldone_poly: + /* Store poly1305 state */ + lg POLY_RSTATE, STACK_POCTX(%r15); + POLY1305_STORE_STATE(); + + /* Store counter. */ + lg INPUT, STACK_INPUT(%r15); + vperm S3, S3, S3, TMP0; + vst S3, (48)(INPUT); + + /* Clear the used vector registers. */ + CLEAR(A0); + CLEAR(A1); + CLEAR(A2); + CLEAR(A3); + CLEAR(IO0); + CLEAR(IO1); + CLEAR(IO2); + CLEAR(IO3); + CLEAR(IO4); + CLEAR(IO5); + CLEAR(IO6); + CLEAR(IO7); + CLEAR(TMP0); + CLEAR(TMP1); + CLEAR(TMP2); + + END_STACK(%r14); + xgr %r2, %r2; + br %r14; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1, + .-_gcry_chacha20_poly1305_s390x_vx_blocks4_2_1;) + +/********************************************************************** + 8-way chacha20 ("vertical") + **********************************************************************/ + +#define QUARTERROUND4_V8_POLY(x0,x1,x2,x3,x4,x5,x6,x7,\ + x8,x9,x10,x11,x12,x13,x14,x15,\ + y0,y1,y2,y3,y4,y5,y6,y7,\ + y8,y9,y10,y11,y12,y13,y14,y15,\ + op1,op2,op3,op4,op5,op6,op7,op8,\ + op9,op10,op11,op12) \ + op1; \ + PLUS(x0, x1); PLUS(x4, x5); \ + PLUS(x8, x9); PLUS(x12, x13); \ + PLUS(y0, y1); PLUS(y4, y5); \ + PLUS(y8, y9); PLUS(y12, y13); \ + op2; \ + XOR(x3, x0); XOR(x7, x4); \ + XOR(x11, x8); XOR(x15, x12); \ + XOR(y3, y0); XOR(y7, y4); \ + XOR(y11, y8); XOR(y15, y12); \ + op3; \ + ROTATE(x3, 16); ROTATE(x7, 16); \ + ROTATE(x11, 16); ROTATE(x15, 16); \ + ROTATE(y3, 16); ROTATE(y7, 16); \ + ROTATE(y11, 16); ROTATE(y15, 16); \ + op4; \ + PLUS(x2, x3); PLUS(x6, x7); \ + PLUS(x10, x11); PLUS(x14, x15); \ + PLUS(y2, y3); PLUS(y6, y7); \ + PLUS(y10, y11); PLUS(y14, y15); \ + op5; \ + XOR(x1, x2); XOR(x5, x6); \ + XOR(x9, x10); XOR(x13, x14); \ + XOR(y1, y2); XOR(y5, y6); \ + XOR(y9, y10); XOR(y13, y14); \ + op6; \ + ROTATE(x1,12); ROTATE(x5,12); \ + ROTATE(x9,12); ROTATE(x13,12); \ + ROTATE(y1,12); ROTATE(y5,12); \ + ROTATE(y9,12); ROTATE(y13,12); \ + op7; \ + PLUS(x0, x1); PLUS(x4, x5); \ + PLUS(x8, x9); PLUS(x12, x13); \ + PLUS(y0, y1); PLUS(y4, y5); \ + PLUS(y8, y9); PLUS(y12, y13); \ + op8; \ + XOR(x3, x0); XOR(x7, x4); \ + XOR(x11, x8); XOR(x15, x12); \ + XOR(y3, y0); XOR(y7, y4); \ + XOR(y11, y8); XOR(y15, y12); \ + op9; \ + ROTATE(x3,8); ROTATE(x7,8); \ + ROTATE(x11,8); ROTATE(x15,8); \ + ROTATE(y3,8); ROTATE(y7,8); \ + ROTATE(y11,8); ROTATE(y15,8); \ + op10; \ + PLUS(x2, x3); PLUS(x6, x7); \ + PLUS(x10, x11); PLUS(x14, x15); \ + PLUS(y2, y3); PLUS(y6, y7); \ + PLUS(y10, y11); PLUS(y14, y15); \ + op11; \ + XOR(x1, x2); XOR(x5, x6); \ + XOR(x9, x10); XOR(x13, x14); \ + XOR(y1, y2); XOR(y5, y6); \ + XOR(y9, y10); XOR(y13, y14); \ + op12; \ + ROTATE(x1,7); ROTATE(x5,7); \ + ROTATE(x9,7); ROTATE(x13,7); \ + ROTATE(y1,7); ROTATE(y5,7); \ + ROTATE(y9,7); ROTATE(y13,7); + +#define QUARTERROUND4_V8(x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15,\ + y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15) \ + QUARTERROUND4_V8_POLY(x0,x1,x2,x3,x4,x5,x6,x7,\ + x8,x9,x10,x11,x12,x13,x14,x15,\ + y0,y1,y2,y3,y4,y5,y6,y7,\ + y8,y9,y10,y11,y12,y13,y14,y15,\ + ,,,,,,,,,,,) + +#define TRANSPOSE_4X4_2(v0,v1,v2,v3,va,vb,vc,vd,tmp0,tmp1,tmp2,tmpa,tmpb,tmpc) \ + vmrhf tmp0, v0, v1; \ + vmrhf tmp1, v2, v3; \ + vmrlf tmp2, v0, v1; \ + vmrlf v3, v2, v3; \ + vmrhf tmpa, va, vb; \ + vmrhf tmpb, vc, vd; \ + vmrlf tmpc, va, vb; \ + vmrlf vd, vc, vd; \ + vpdi v0, tmp0, tmp1, 0; \ + vpdi v1, tmp0, tmp1, 5; \ + vpdi v2, tmp2, v3, 0; \ + vpdi v3, tmp2, v3, 5; \ + vpdi va, tmpa, tmpb, 0; \ + vpdi vb, tmpa, tmpb, 5; \ + vpdi vc, tmpc, vd, 0; \ + vpdi vd, tmpc, vd, 5; + +.balign 8 +.globl _gcry_chacha20_s390x_vx_blocks8 +ELF(.type _gcry_chacha20_s390x_vx_blocks8,@function;) + +_gcry_chacha20_s390x_vx_blocks8: + /* input: + * %r2: input + * %r3: dst + * %r4: src + * %r5: nblks (multiple of 8) + */ + CFI_STARTPROC(); + + START_STACK(%r8); + lgr NBLKS, %r5; + + larl %r7, .Lconsts; + + /* Load counter. */ + lg %r8, (12 * 4)(INPUT); + rllg %r8, %r8, 32; + +.balign 4 + /* Process eight chacha20 blocks per loop. */ +.Lloop8: + vlm Y0, Y3, 0(INPUT); + + slgfi NBLKS, 8; + lghi ROUND, (20 / 2); + + /* Construct counter vectors X12/X13 & Y12/Y13. */ + vl X4, (.Ladd_counter_0123 - .Lconsts)(%r7); + vl Y4, (.Ladd_counter_4567 - .Lconsts)(%r7); + vrepf Y12, Y3, 0; + vrepf Y13, Y3, 1; + vaccf X5, Y12, X4; + vaccf Y5, Y12, Y4; + vaf X12, Y12, X4; + vaf Y12, Y12, Y4; + vaf X13, Y13, X5; + vaf Y13, Y13, Y5; + + vrepf X0, Y0, 0; + vrepf X1, Y0, 1; + vrepf X2, Y0, 2; + vrepf X3, Y0, 3; + vrepf X4, Y1, 0; + vrepf X5, Y1, 1; + vrepf X6, Y1, 2; + vrepf X7, Y1, 3; + vrepf X8, Y2, 0; + vrepf X9, Y2, 1; + vrepf X10, Y2, 2; + vrepf X11, Y2, 3; + vrepf X14, Y3, 2; + vrepf X15, Y3, 3; + + /* Store counters for blocks 0-7. */ + vstm X12, X13, (STACK_CTR + 0 * 16)(%r15); + vstm Y12, Y13, (STACK_CTR + 2 * 16)(%r15); + + vlr Y0, X0; + vlr Y1, X1; + vlr Y2, X2; + vlr Y3, X3; + vlr Y4, X4; + vlr Y5, X5; + vlr Y6, X6; + vlr Y7, X7; + vlr Y8, X8; + vlr Y9, X9; + vlr Y10, X10; + vlr Y11, X11; + vlr Y14, X14; + vlr Y15, X15; + + /* Update and store counter. */ + agfi %r8, 8; + rllg %r5, %r8, 32; + stg %r5, (12 * 4)(INPUT); + +.balign 4 +.Lround2_8: + QUARTERROUND4_V8(X0, X4, X8, X12, X1, X5, X9, X13, + X2, X6, X10, X14, X3, X7, X11, X15, + Y0, Y4, Y8, Y12, Y1, Y5, Y9, Y13, + Y2, Y6, Y10, Y14, Y3, Y7, Y11, Y15); + QUARTERROUND4_V8(X0, X5, X10, X15, X1, X6, X11, X12, + X2, X7, X8, X13, X3, X4, X9, X14, + Y0, Y5, Y10, Y15, Y1, Y6, Y11, Y12, + Y2, Y7, Y8, Y13, Y3, Y4, Y9, Y14); + brctg ROUND, .Lround2_8; + + /* Store blocks 4-7. */ + vstm Y0, Y15, STACK_Y0_Y15(%r15); + + /* Load counters for blocks 0-3. */ + vlm Y0, Y1, (STACK_CTR + 0 * 16)(%r15); + + lghi ROUND, 1; + j .Lfirst_output_4blks_8; + +.balign 4 +.Lsecond_output_4blks_8: + /* Load blocks 4-7. */ + vlm X0, X15, STACK_Y0_Y15(%r15); + + /* Load counters for blocks 4-7. */ + vlm Y0, Y1, (STACK_CTR + 2 * 16)(%r15); + + lghi ROUND, 0; + +.balign 4 + /* Output four chacha20 blocks per loop. */ +.Lfirst_output_4blks_8: + vlm Y12, Y15, 0(INPUT); + PLUS(X12, Y0); + PLUS(X13, Y1); + vrepf Y0, Y12, 0; + vrepf Y1, Y12, 1; + vrepf Y2, Y12, 2; + vrepf Y3, Y12, 3; + vrepf Y4, Y13, 0; + vrepf Y5, Y13, 1; + vrepf Y6, Y13, 2; + vrepf Y7, Y13, 3; + vrepf Y8, Y14, 0; + vrepf Y9, Y14, 1; + vrepf Y10, Y14, 2; + vrepf Y11, Y14, 3; + vrepf Y14, Y15, 2; + vrepf Y15, Y15, 3; + PLUS(X0, Y0); + PLUS(X1, Y1); + PLUS(X2, Y2); + PLUS(X3, Y3); + PLUS(X4, Y4); + PLUS(X5, Y5); + PLUS(X6, Y6); + PLUS(X7, Y7); + PLUS(X8, Y8); + PLUS(X9, Y9); + PLUS(X10, Y10); + PLUS(X11, Y11); + PLUS(X14, Y14); + PLUS(X15, Y15); + + vl Y15, (.Lbswap32 - .Lconsts)(%r7); + TRANSPOSE_4X4_2(X0, X1, X2, X3, X4, X5, X6, X7, + Y9, Y10, Y11, Y12, Y13, Y14); + TRANSPOSE_4X4_2(X8, X9, X10, X11, X12, X13, X14, X15, + Y9, Y10, Y11, Y12, Y13, Y14); + + vlm Y0, Y14, 0(SRC); + vperm X0, X0, X0, Y15; + vperm X1, X1, X1, Y15; + vperm X2, X2, X2, Y15; + vperm X3, X3, X3, Y15; + vperm X4, X4, X4, Y15; + vperm X5, X5, X5, Y15; + vperm X6, X6, X6, Y15; + vperm X7, X7, X7, Y15; + vperm X8, X8, X8, Y15; + vperm X9, X9, X9, Y15; + vperm X10, X10, X10, Y15; + vperm X11, X11, X11, Y15; + vperm X12, X12, X12, Y15; + vperm X13, X13, X13, Y15; + vperm X14, X14, X14, Y15; + vperm X15, X15, X15, Y15; + vl Y15, (15 * 16)(SRC); + + XOR(Y0, X0); + XOR(Y1, X4); + XOR(Y2, X8); + XOR(Y3, X12); + XOR(Y4, X1); + XOR(Y5, X5); + XOR(Y6, X9); + XOR(Y7, X13); + XOR(Y8, X2); + XOR(Y9, X6); + XOR(Y10, X10); + XOR(Y11, X14); + XOR(Y12, X3); + XOR(Y13, X7); + XOR(Y14, X11); + XOR(Y15, X15); + vstm Y0, Y15, 0(DST); + + aghi SRC, 256; + aghi DST, 256; + + clgije ROUND, 1, .Lsecond_output_4blks_8; + + clgijhe NBLKS, 8, .Lloop8; + + /* Clear the used vector registers. */ + DST_8(CLEAR, 0, _); + DST_8(CLEAR, 1, _); + DST_8(CLEAR, 2, _); + DST_8(CLEAR, 3, _); + + /* Clear sensitive data in stack. */ + vlm Y0, Y15, STACK_Y0_Y15(%r15); + vlm Y0, Y3, STACK_CTR(%r15); + + END_STACK(%r8); + xgr %r2, %r2; + br %r14; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_s390x_vx_blocks8, + .-_gcry_chacha20_s390x_vx_blocks8;) + +/********************************************************************** + 8-way stitched chacha20-poly1305 ("vertical") + **********************************************************************/ + +.balign 8 +.globl _gcry_chacha20_poly1305_s390x_vx_blocks8 +ELF(.type _gcry_chacha20_poly1305_s390x_vx_blocks8,@function;) + +_gcry_chacha20_poly1305_s390x_vx_blocks8: + /* input: + * %r2: input + * %r3: dst + * %r4: src + * %r5: nblks (multiple of 8) + * %r6: poly1305 state + * 160(%r15): poly1305 src + */ + CFI_STARTPROC(); + + START_STACK(%r14); + + /* Store parameters to stack. */ + stmg %r2, %r6, STACK_INPUT(%r15); + + lgr POLY_RSTATE, %r6; + lgr NBLKS, %r5; + + lg POLY_RSRC, 0(%r15); + lg POLY_RSRC, 160(POLY_RSRC); + stg POLY_RSRC, STACK_POSRC(%r15); + + /* Load poly1305 state */ + POLY1305_LOAD_STATE(); + +.balign 4 + /* Process eight chacha20 blocks and 32 poly1305 blocks per loop. */ +.Lloop8_poly: + lg INPUT, STACK_INPUT(%r15); + larl %r8, .Lconsts; + + vlm Y0, Y3, 0(INPUT); + + slgfi NBLKS, 8; + lghi ROUND, (20 / 2); + + /* Construct counter vectors X12/X13 & Y12/Y13. */ + vl X4, (.Ladd_counter_0123 - .Lconsts)(%r8); + vl Y4, (.Ladd_counter_4567 - .Lconsts)(%r8); + lg %r8, (12 * 4)(INPUT); /* Update counter. */ + vrepf Y12, Y3, 0; + vrepf Y13, Y3, 1; + vaccf X5, Y12, X4; + vaccf Y5, Y12, Y4; + vaf X12, Y12, X4; + vaf Y12, Y12, Y4; + vaf X13, Y13, X5; + vaf Y13, Y13, Y5; + rllg %r8, %r8, 32; + + vrepf X0, Y0, 0; + vrepf X1, Y0, 1; + vrepf X2, Y0, 2; + vrepf X3, Y0, 3; + vrepf X4, Y1, 0; + vrepf X5, Y1, 1; + vrepf X6, Y1, 2; + vrepf X7, Y1, 3; + vrepf X8, Y2, 0; + vrepf X9, Y2, 1; + vrepf X10, Y2, 2; + vrepf X11, Y2, 3; + vrepf X14, Y3, 2; + vrepf X15, Y3, 3; + agfi %r8, 8; + + /* Store counters for blocks 0-7. */ + vstm X12, X13, (STACK_CTR + 0 * 16)(%r15); + vstm Y12, Y13, (STACK_CTR + 2 * 16)(%r15); + rllg %r8, %r8, 32; + + vlr Y0, X0; + vlr Y1, X1; + vlr Y2, X2; + vlr Y3, X3; + vlr Y4, X4; + vlr Y5, X5; + vlr Y6, X6; + vlr Y7, X7; + vlr Y8, X8; + vlr Y9, X9; + vlr Y10, X10; + vlr Y11, X11; + vlr Y14, X14; + vlr Y15, X15; + stg %r8, (12 * 4)(INPUT); + +.balign 4 +.Lround2_8_poly: + /* Total 30 poly1305 blocks processed by this loop. */ + QUARTERROUND4_V8_POLY(X0, X4, X8, X12, X1, X5, X9, X13, + X2, X6, X10, X14, X3, X7, X11, X15, + Y0, Y4, Y8, Y12, Y1, Y5, Y9, Y13, + Y2, Y6, Y10, Y14, Y3, Y7, Y11, Y15, + POLY1305_BLOCK_PART1(0 * 16), + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8(), + POLY1305_BLOCK_PART1(1 * 16), + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4()); + QUARTERROUND4_V8_POLY(X0, X5, X10, X15, X1, X6, X11, X12, + X2, X7, X8, X13, X3, X4, X9, X14, + Y0, Y5, Y10, Y15, Y1, Y6, Y11, Y12, + Y2, Y7, Y8, Y13, Y3, Y4, Y9, Y14, + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8(), + POLY1305_BLOCK_PART1(2 * 16); + INC_POLY1305_SRC(3 * 16), + POLY1305_BLOCK_PART2(), + POLY1305_BLOCK_PART3(), + POLY1305_BLOCK_PART4(), + POLY1305_BLOCK_PART5(), + POLY1305_BLOCK_PART6(), + POLY1305_BLOCK_PART7(), + POLY1305_BLOCK_PART8()); + brctg ROUND, .Lround2_8_poly; + + POLY1305_BLOCK_PART1(0 * 16); + + /* Store blocks 4-7. */ + vstm Y0, Y15, STACK_Y0_Y15(%r15); + + /* Load counters for blocks 0-3. */ + vlm Y0, Y1, (STACK_CTR + 0 * 16)(%r15); + + stg POLY_RSRC, STACK_POSRC(%r15); /* %r14 used for INPUT/SRC/DST pointer. */ + + lghi ROUND, 1; + j .Lfirst_output_4blks_8_poly; + +.balign 4 +.Lsecond_output_4blks_8_poly: + + POLY1305_BLOCK_PART1(1 * 16); + + /* Load blocks 4-7. */ + vlm X0, X15, STACK_Y0_Y15(%r15); + + /* Load counters for blocks 4-7. */ + vlm Y0, Y1, (STACK_CTR + 2 * 16)(%r15); + + INC_POLY1305_SRC(2 * 16); + stg POLY_RSRC, STACK_POSRC(%r15); /* %r14 used for INPUT/SRC/DST pointer. */ + + lghi ROUND, 0; + +.balign 4 + /* Output four chacha20 blocks and one poly1305 block per loop. */ +.Lfirst_output_4blks_8_poly: + lg %r14, STACK_INPUT(%r15); + vlm Y12, Y15, 0(%r14); + POLY1305_BLOCK_PART2(); + PLUS(X12, Y0); + PLUS(X13, Y1); + vrepf Y0, Y12, 0; + vrepf Y1, Y12, 1; + vrepf Y2, Y12, 2; + vrepf Y3, Y12, 3; + vrepf Y4, Y13, 0; + vrepf Y5, Y13, 1; + vrepf Y6, Y13, 2; + vrepf Y7, Y13, 3; + vrepf Y8, Y14, 0; + vrepf Y9, Y14, 1; + vrepf Y10, Y14, 2; + vrepf Y11, Y14, 3; + vrepf Y14, Y15, 2; + vrepf Y15, Y15, 3; + POLY1305_BLOCK_PART3(); + PLUS(X0, Y0); + PLUS(X1, Y1); + PLUS(X2, Y2); + PLUS(X3, Y3); + PLUS(X4, Y4); + PLUS(X5, Y5); + PLUS(X6, Y6); + PLUS(X7, Y7); + PLUS(X8, Y8); + PLUS(X9, Y9); + PLUS(X10, Y10); + PLUS(X11, Y11); + PLUS(X14, Y14); + PLUS(X15, Y15); + POLY1305_BLOCK_PART4(); + + larl %r14, .Lconsts; + vl Y15, (.Lbswap32 - .Lconsts)(%r14); + TRANSPOSE_4X4_2(X0, X1, X2, X3, X4, X5, X6, X7, + Y9, Y10, Y11, Y12, Y13, Y14); + lg %r14, STACK_SRC(%r15); + POLY1305_BLOCK_PART5(); + TRANSPOSE_4X4_2(X8, X9, X10, X11, X12, X13, X14, X15, + Y9, Y10, Y11, Y12, Y13, Y14); + + vlm Y0, Y14, 0(%r14); + POLY1305_BLOCK_PART6(); + vperm X0, X0, X0, Y15; + vperm X1, X1, X1, Y15; + vperm X2, X2, X2, Y15; + vperm X3, X3, X3, Y15; + vperm X4, X4, X4, Y15; + vperm X5, X5, X5, Y15; + vperm X6, X6, X6, Y15; + vperm X7, X7, X7, Y15; + vperm X8, X8, X8, Y15; + vperm X9, X9, X9, Y15; + vperm X10, X10, X10, Y15; + vperm X11, X11, X11, Y15; + vperm X12, X12, X12, Y15; + vperm X13, X13, X13, Y15; + vperm X14, X14, X14, Y15; + vperm X15, X15, X15, Y15; + vl Y15, (15 * 16)(%r14); + POLY1305_BLOCK_PART7(); + + aghi %r14, 256; + stg %r14, STACK_SRC(%r15); + lg %r14, STACK_DST(%r15); + + XOR(Y0, X0); + XOR(Y1, X4); + XOR(Y2, X8); + XOR(Y3, X12); + XOR(Y4, X1); + XOR(Y5, X5); + XOR(Y6, X9); + XOR(Y7, X13); + XOR(Y8, X2); + XOR(Y9, X6); + XOR(Y10, X10); + XOR(Y11, X14); + XOR(Y12, X3); + XOR(Y13, X7); + XOR(Y14, X11); + XOR(Y15, X15); + POLY1305_BLOCK_PART8(); + vstm Y0, Y15, 0(%r14); + + aghi %r14, 256; + stg %r14, STACK_DST(%r15); + + lg POLY_RSRC, STACK_POSRC(%r15); + + clgije ROUND, 1, .Lsecond_output_4blks_8_poly; + + clgijhe NBLKS, 8, .Lloop8_poly; + + /* Store poly1305 state */ + lg POLY_RSTATE, STACK_POCTX(%r15); + POLY1305_STORE_STATE(); + + /* Clear the used vector registers */ + DST_8(CLEAR, 0, _); + DST_8(CLEAR, 1, _); + DST_8(CLEAR, 2, _); + DST_8(CLEAR, 3, _); + + /* Clear sensitive data in stack. */ + vlm Y0, Y15, STACK_Y0_Y15(%r15); + vlm Y0, Y3, STACK_CTR(%r15); + + END_STACK(%r14); + xgr %r2, %r2; + br %r14; + CFI_ENDPROC(); +ELF(.size _gcry_chacha20_poly1305_s390x_vx_blocks8, + .-_gcry_chacha20_poly1305_s390x_vx_blocks8;) + +#endif /*HAVE_GCC_INLINE_ASM_S390X_VX*/ +#endif /*__s390x__*/ diff --git a/comm/third_party/libgcrypt/cipher/chacha20.c b/comm/third_party/libgcrypt/cipher/chacha20.c new file mode 100644 index 0000000000..497594a0bb --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/chacha20.c @@ -0,0 +1,1306 @@ +/* chacha20.c - Bernstein's ChaCha20 cipher + * Copyright (C) 2014,2017-2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * For a description of the algorithm, see: + * http://cr.yp.to/chacha.html + */ + +/* + * Based on D. J. Bernstein reference implementation at + * http://cr.yp.to/chacha.html: + * + * chacha-regs.c version 20080118 + * D. J. Bernstein + * Public domain. + */ + +#include +#include +#include +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "cipher-internal.h" +#include "bufhelp.h" + + +#define CHACHA20_MIN_KEY_SIZE 16 /* Bytes. */ +#define CHACHA20_MAX_KEY_SIZE 32 /* Bytes. */ +#define CHACHA20_BLOCK_SIZE 64 /* Bytes. */ +#define CHACHA20_MIN_IV_SIZE 8 /* Bytes. */ +#define CHACHA20_MAX_IV_SIZE 12 /* Bytes. */ +#define CHACHA20_CTR_SIZE 16 /* Bytes. */ + + +/* USE_SSSE3 indicates whether to compile with Intel SSSE3 code. */ +#undef USE_SSSE3 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_SSSE3) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_SSSE3 1 +#endif + +/* USE_AVX2 indicates whether to compile with Intel AVX2 code. */ +#undef USE_AVX2 +#if defined(__x86_64__) && defined(HAVE_GCC_INLINE_ASM_AVX2) && \ + (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AVX2 1 +#endif + +/* USE_ARMV7_NEON indicates whether to enable ARMv7 NEON assembly code. */ +#undef USE_ARMV7_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_ARMV7_NEON 1 +# endif +#endif + +/* USE_AARCH64_SIMD indicates whether to enable ARMv8 SIMD assembly + * code. */ +#undef USE_AARCH64_SIMD +#ifdef ENABLE_NEON_SUPPORT +# if defined(__AARCH64EL__) \ + && defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH64_NEON) +# define USE_AARCH64_SIMD 1 +# endif +#endif + +/* USE_PPC_VEC indicates whether to enable PowerPC vector + * accelerated code. */ +#undef USE_PPC_VEC +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) +# if __GNUC__ >= 4 +# define USE_PPC_VEC 1 +# endif +# endif +#endif + +/* USE_S390X_VX indicates whether to enable zSeries code. */ +#undef USE_S390X_VX +#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 +# if defined(HAVE_GCC_INLINE_ASM_S390X_VX) +# define USE_S390X_VX 1 +# endif /* USE_S390X_VX */ +#endif + +/* Assembly implementations use SystemV ABI, ABI conversion and additional + * stack to store XMM6-XMM15 needed on Win64. */ +#undef ASM_FUNC_ABI +#undef ASM_EXTRA_STACK +#if defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS) +# define ASM_FUNC_ABI __attribute__((sysv_abi)) +#else +# define ASM_FUNC_ABI +#endif + + +typedef struct CHACHA20_context_s +{ + u32 input[16]; + unsigned char pad[CHACHA20_BLOCK_SIZE]; + unsigned int unused; /* bytes in the pad. */ + unsigned int use_ssse3:1; + unsigned int use_avx2:1; + unsigned int use_neon:1; + unsigned int use_ppc:1; + unsigned int use_s390x:1; +} CHACHA20_context_t; + + +#ifdef USE_SSSE3 + +unsigned int _gcry_chacha20_amd64_ssse3_blocks4(u32 *state, byte *dst, + const byte *src, + size_t nblks) ASM_FUNC_ABI; + +unsigned int _gcry_chacha20_amd64_ssse3_blocks1(u32 *state, byte *dst, + const byte *src, + size_t nblks) ASM_FUNC_ABI; + +unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks4( + u32 *state, byte *dst, const byte *src, size_t nblks, + void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; + +unsigned int _gcry_chacha20_poly1305_amd64_ssse3_blocks1( + u32 *state, byte *dst, const byte *src, size_t nblks, + void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; + +#endif /* USE_SSSE3 */ + +#ifdef USE_AVX2 + +unsigned int _gcry_chacha20_amd64_avx2_blocks8(u32 *state, byte *dst, + const byte *src, + size_t nblks) ASM_FUNC_ABI; + +unsigned int _gcry_chacha20_poly1305_amd64_avx2_blocks8( + u32 *state, byte *dst, const byte *src, size_t nblks, + void *poly1305_state, const byte *poly1305_src) ASM_FUNC_ABI; + +#endif /* USE_AVX2 */ + +#ifdef USE_PPC_VEC + +unsigned int _gcry_chacha20_ppc8_blocks4(u32 *state, byte *dst, + const byte *src, + size_t nblks); + +unsigned int _gcry_chacha20_ppc8_blocks1(u32 *state, byte *dst, + const byte *src, + size_t nblks); + +#undef USE_PPC_VEC_POLY1305 +#if SIZEOF_UNSIGNED_LONG == 8 +#define USE_PPC_VEC_POLY1305 1 +unsigned int _gcry_chacha20_poly1305_ppc8_blocks4( + u32 *state, byte *dst, const byte *src, size_t nblks, + POLY1305_STATE *st, const byte *poly1305_src); +#endif /* SIZEOF_UNSIGNED_LONG == 8 */ + +#endif /* USE_PPC_VEC */ + +#ifdef USE_S390X_VX + +unsigned int _gcry_chacha20_s390x_vx_blocks8(u32 *state, byte *dst, + const byte *src, size_t nblks); + +unsigned int _gcry_chacha20_s390x_vx_blocks4_2_1(u32 *state, byte *dst, + const byte *src, size_t nblks); + +#undef USE_S390X_VX_POLY1305 +#if SIZEOF_UNSIGNED_LONG == 8 +#define USE_S390X_VX_POLY1305 1 +unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks8( + u32 *state, byte *dst, const byte *src, size_t nblks, + POLY1305_STATE *st, const byte *poly1305_src); + +unsigned int _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( + u32 *state, byte *dst, const byte *src, size_t nblks, + POLY1305_STATE *st, const byte *poly1305_src); +#endif /* SIZEOF_UNSIGNED_LONG == 8 */ + +#endif /* USE_S390X_VX */ + +#ifdef USE_ARMV7_NEON + +unsigned int _gcry_chacha20_armv7_neon_blocks4(u32 *state, byte *dst, + const byte *src, + size_t nblks); + +#endif /* USE_ARMV7_NEON */ + +#ifdef USE_AARCH64_SIMD + +unsigned int _gcry_chacha20_aarch64_blocks4(u32 *state, byte *dst, + const byte *src, size_t nblks); + +unsigned int _gcry_chacha20_poly1305_aarch64_blocks4( + u32 *state, byte *dst, const byte *src, size_t nblks, + void *poly1305_state, const byte *poly1305_src); + +#endif /* USE_AARCH64_SIMD */ + + +static const char *selftest (void); + + +#define ROTATE(v,c) (rol(v,c)) +#define XOR(v,w) ((v) ^ (w)) +#define PLUS(v,w) ((u32)((v) + (w))) +#define PLUSONE(v) (PLUS((v),1)) + +#define QUARTERROUND(a,b,c,d) \ + a = PLUS(a,b); d = ROTATE(XOR(d,a),16); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c),12); \ + a = PLUS(a,b); d = ROTATE(XOR(d,a), 8); \ + c = PLUS(c,d); b = ROTATE(XOR(b,c), 7); + +#define BUF_XOR_LE32(dst, src, offset, x) \ + buf_put_le32((dst) + (offset), buf_get_le32((src) + (offset)) ^ (x)) + +static unsigned int +do_chacha20_blocks (u32 *input, byte *dst, const byte *src, size_t nblks) +{ + u32 x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15; + unsigned int i; + + while (nblks) + { + x0 = input[0]; + x1 = input[1]; + x2 = input[2]; + x3 = input[3]; + x4 = input[4]; + x5 = input[5]; + x6 = input[6]; + x7 = input[7]; + x8 = input[8]; + x9 = input[9]; + x10 = input[10]; + x11 = input[11]; + x12 = input[12]; + x13 = input[13]; + x14 = input[14]; + x15 = input[15]; + + for (i = 20; i > 0; i -= 2) + { + QUARTERROUND(x0, x4, x8, x12) + QUARTERROUND(x1, x5, x9, x13) + QUARTERROUND(x2, x6, x10, x14) + QUARTERROUND(x3, x7, x11, x15) + QUARTERROUND(x0, x5, x10, x15) + QUARTERROUND(x1, x6, x11, x12) + QUARTERROUND(x2, x7, x8, x13) + QUARTERROUND(x3, x4, x9, x14) + } + + x0 = PLUS(x0, input[0]); + x1 = PLUS(x1, input[1]); + x2 = PLUS(x2, input[2]); + x3 = PLUS(x3, input[3]); + x4 = PLUS(x4, input[4]); + x5 = PLUS(x5, input[5]); + x6 = PLUS(x6, input[6]); + x7 = PLUS(x7, input[7]); + x8 = PLUS(x8, input[8]); + x9 = PLUS(x9, input[9]); + x10 = PLUS(x10, input[10]); + x11 = PLUS(x11, input[11]); + x12 = PLUS(x12, input[12]); + x13 = PLUS(x13, input[13]); + x14 = PLUS(x14, input[14]); + x15 = PLUS(x15, input[15]); + + input[12] = PLUSONE(input[12]); + input[13] = PLUS(input[13], !input[12]); + + BUF_XOR_LE32(dst, src, 0, x0); + BUF_XOR_LE32(dst, src, 4, x1); + BUF_XOR_LE32(dst, src, 8, x2); + BUF_XOR_LE32(dst, src, 12, x3); + BUF_XOR_LE32(dst, src, 16, x4); + BUF_XOR_LE32(dst, src, 20, x5); + BUF_XOR_LE32(dst, src, 24, x6); + BUF_XOR_LE32(dst, src, 28, x7); + BUF_XOR_LE32(dst, src, 32, x8); + BUF_XOR_LE32(dst, src, 36, x9); + BUF_XOR_LE32(dst, src, 40, x10); + BUF_XOR_LE32(dst, src, 44, x11); + BUF_XOR_LE32(dst, src, 48, x12); + BUF_XOR_LE32(dst, src, 52, x13); + BUF_XOR_LE32(dst, src, 56, x14); + BUF_XOR_LE32(dst, src, 60, x15); + + src += CHACHA20_BLOCK_SIZE; + dst += CHACHA20_BLOCK_SIZE; + nblks--; + } + + /* burn_stack */ + return (17 * sizeof(u32) + 6 * sizeof(void *)); +} + + +static unsigned int +chacha20_blocks (CHACHA20_context_t *ctx, byte *dst, const byte *src, + size_t nblks) +{ +#ifdef USE_SSSE3 + if (ctx->use_ssse3) + { + return _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, dst, src, nblks); + } +#endif + +#ifdef USE_PPC_VEC + if (ctx->use_ppc) + { + return _gcry_chacha20_ppc8_blocks1(ctx->input, dst, src, nblks); + } +#endif + +#ifdef USE_S390X_VX + if (ctx->use_s390x) + { + return _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, dst, src, nblks); + } +#endif + + return do_chacha20_blocks (ctx->input, dst, src, nblks); +} + + +static void +chacha20_keysetup (CHACHA20_context_t *ctx, const byte *key, + unsigned int keylen) +{ + static const char sigma[16] = "expand 32-byte k"; + static const char tau[16] = "expand 16-byte k"; + const char *constants; + + ctx->input[4] = buf_get_le32(key + 0); + ctx->input[5] = buf_get_le32(key + 4); + ctx->input[6] = buf_get_le32(key + 8); + ctx->input[7] = buf_get_le32(key + 12); + if (keylen == CHACHA20_MAX_KEY_SIZE) /* 256 bits */ + { + key += 16; + constants = sigma; + } + else /* 128 bits */ + { + constants = tau; + } + ctx->input[8] = buf_get_le32(key + 0); + ctx->input[9] = buf_get_le32(key + 4); + ctx->input[10] = buf_get_le32(key + 8); + ctx->input[11] = buf_get_le32(key + 12); + ctx->input[0] = buf_get_le32(constants + 0); + ctx->input[1] = buf_get_le32(constants + 4); + ctx->input[2] = buf_get_le32(constants + 8); + ctx->input[3] = buf_get_le32(constants + 12); +} + + +static void +chacha20_ivsetup (CHACHA20_context_t * ctx, const byte *iv, size_t ivlen) +{ + if (ivlen == CHACHA20_CTR_SIZE) + { + ctx->input[12] = buf_get_le32 (iv + 0); + ctx->input[13] = buf_get_le32 (iv + 4); + ctx->input[14] = buf_get_le32 (iv + 8); + ctx->input[15] = buf_get_le32 (iv + 12); + } + else if (ivlen == CHACHA20_MAX_IV_SIZE) + { + ctx->input[12] = 0; + ctx->input[13] = buf_get_le32 (iv + 0); + ctx->input[14] = buf_get_le32 (iv + 4); + ctx->input[15] = buf_get_le32 (iv + 8); + } + else if (ivlen == CHACHA20_MIN_IV_SIZE) + { + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = buf_get_le32 (iv + 0); + ctx->input[15] = buf_get_le32 (iv + 4); + } + else + { + ctx->input[12] = 0; + ctx->input[13] = 0; + ctx->input[14] = 0; + ctx->input[15] = 0; + } +} + + +static void +chacha20_setiv (void *context, const byte *iv, size_t ivlen) +{ + CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; + + /* draft-nir-cfrg-chacha20-poly1305-02 defines 96-bit and 64-bit nonce. */ + if (iv && ivlen != CHACHA20_MAX_IV_SIZE && ivlen != CHACHA20_MIN_IV_SIZE + && ivlen != CHACHA20_CTR_SIZE) + log_info ("WARNING: chacha20_setiv: bad ivlen=%u\n", (u32) ivlen); + + if (iv && (ivlen == CHACHA20_MAX_IV_SIZE || ivlen == CHACHA20_MIN_IV_SIZE + || ivlen == CHACHA20_CTR_SIZE)) + chacha20_ivsetup (ctx, iv, ivlen); + else + chacha20_ivsetup (ctx, NULL, 0); + + /* Reset the unused pad bytes counter. */ + ctx->unused = 0; +} + + +static gcry_err_code_t +chacha20_do_setkey (CHACHA20_context_t *ctx, + const byte *key, unsigned int keylen) +{ + static int initialized; + static const char *selftest_failed; + unsigned int features = _gcry_get_hw_features (); + + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("CHACHA20 selftest failed (%s)\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + if (keylen != CHACHA20_MAX_KEY_SIZE && keylen != CHACHA20_MIN_KEY_SIZE) + return GPG_ERR_INV_KEYLEN; + +#ifdef USE_SSSE3 + ctx->use_ssse3 = (features & HWF_INTEL_SSSE3) != 0; +#endif +#ifdef USE_AVX2 + ctx->use_avx2 = (features & HWF_INTEL_AVX2) != 0; +#endif +#ifdef USE_ARMV7_NEON + ctx->use_neon = (features & HWF_ARM_NEON) != 0; +#endif +#ifdef USE_AARCH64_SIMD + ctx->use_neon = (features & HWF_ARM_NEON) != 0; +#endif +#ifdef USE_PPC_VEC + ctx->use_ppc = (features & HWF_PPC_ARCH_2_07) != 0; +#endif +#ifdef USE_S390X_VX + ctx->use_s390x = (features & HWF_S390X_VX) != 0; +#endif + + (void)features; + + chacha20_keysetup (ctx, key, keylen); + + /* We default to a zero nonce. */ + chacha20_setiv (ctx, NULL, 0); + + return 0; +} + + +static gcry_err_code_t +chacha20_setkey (void *context, const byte *key, unsigned int keylen, + cipher_bulk_ops_t *bulk_ops) +{ + CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; + gcry_err_code_t rc = chacha20_do_setkey (ctx, key, keylen); + (void)bulk_ops; + _gcry_burn_stack (4 + sizeof (void *) + 4 * sizeof (void *)); + return rc; +} + + +static unsigned int +do_chacha20_encrypt_stream_tail (CHACHA20_context_t *ctx, byte *outbuf, + const byte *inbuf, size_t length) +{ + static const unsigned char zero_pad[CHACHA20_BLOCK_SIZE] = { 0, }; + unsigned int nburn, burn = 0; + +#ifdef USE_AVX2 + if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 8; + nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, + nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_SSSE3 + if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, + nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_ARMV7_NEON + if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + nburn = _gcry_chacha20_armv7_neon_blocks4(ctx->input, outbuf, inbuf, + nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_AARCH64_SIMD + if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, + nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_PPC_VEC + if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_S390X_VX + if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 8) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 8; + nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, + nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + + if (length >= CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nburn = chacha20_blocks(ctx, outbuf, inbuf, nblocks); + burn = nburn > burn ? nburn : burn; + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } + + if (length > 0) + { + nburn = chacha20_blocks(ctx, ctx->pad, zero_pad, 1); + burn = nburn > burn ? nburn : burn; + + buf_xor (outbuf, inbuf, ctx->pad, length); + ctx->unused = CHACHA20_BLOCK_SIZE - length; + } + + if (burn) + burn += 5 * sizeof(void *); + + return burn; +} + + +static void +chacha20_encrypt_stream (void *context, byte *outbuf, const byte *inbuf, + size_t length) +{ + CHACHA20_context_t *ctx = (CHACHA20_context_t *) context; + unsigned int nburn, burn = 0; + + if (!length) + return; + + if (ctx->unused) + { + unsigned char *p = ctx->pad; + size_t n; + + gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); + + n = ctx->unused; + if (n > length) + n = length; + + buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); + length -= n; + outbuf += n; + inbuf += n; + ctx->unused -= n; + + if (!length) + return; + gcry_assert (!ctx->unused); + } + + nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, length); + burn = nburn > burn ? nburn : burn; + + if (burn) + _gcry_burn_stack (burn); +} + + +gcry_err_code_t +_gcry_chacha20_poly1305_encrypt(gcry_cipher_hd_t c, byte *outbuf, + const byte *inbuf, size_t length) +{ + CHACHA20_context_t *ctx = (void *) &c->context.c; + unsigned int nburn, burn = 0; + byte *authptr = NULL; + + if (!length) + return 0; + + if (ctx->unused) + { + unsigned char *p = ctx->pad; + size_t n; + + gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); + + n = ctx->unused; + if (n > length) + n = length; + + buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); + nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, n); + burn = nburn > burn ? nburn : burn; + length -= n; + outbuf += n; + inbuf += n; + ctx->unused -= n; + + if (!length) + { + if (burn) + _gcry_burn_stack (burn); + + return 0; + } + gcry_assert (!ctx->unused); + } + + gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); + + if (0) + { } +#ifdef USE_AVX2 + else if (ctx->use_avx2 && length >= CHACHA20_BLOCK_SIZE * 8) + { + nburn = _gcry_chacha20_amd64_avx2_blocks8(ctx->input, outbuf, inbuf, 8); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 8 * CHACHA20_BLOCK_SIZE; + outbuf += 8 * CHACHA20_BLOCK_SIZE; + inbuf += 8 * CHACHA20_BLOCK_SIZE; + } +#endif +#ifdef USE_SSSE3 + else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 4) + { + nburn = _gcry_chacha20_amd64_ssse3_blocks4(ctx->input, outbuf, inbuf, 4); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 4 * CHACHA20_BLOCK_SIZE; + outbuf += 4 * CHACHA20_BLOCK_SIZE; + inbuf += 4 * CHACHA20_BLOCK_SIZE; + } + else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE * 2) + { + nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 2); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 2 * CHACHA20_BLOCK_SIZE; + outbuf += 2 * CHACHA20_BLOCK_SIZE; + inbuf += 2 * CHACHA20_BLOCK_SIZE; + } + else if (ctx->use_ssse3 && length >= CHACHA20_BLOCK_SIZE) + { + nburn = _gcry_chacha20_amd64_ssse3_blocks1(ctx->input, outbuf, inbuf, 1); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 1 * CHACHA20_BLOCK_SIZE; + outbuf += 1 * CHACHA20_BLOCK_SIZE; + inbuf += 1 * CHACHA20_BLOCK_SIZE; + } +#endif +#ifdef USE_AARCH64_SIMD + else if (ctx->use_neon && length >= CHACHA20_BLOCK_SIZE * 4) + { + nburn = _gcry_chacha20_aarch64_blocks4(ctx->input, outbuf, inbuf, 4); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 4 * CHACHA20_BLOCK_SIZE; + outbuf += 4 * CHACHA20_BLOCK_SIZE; + inbuf += 4 * CHACHA20_BLOCK_SIZE; + } +#endif +#ifdef USE_PPC_VEC_POLY1305 + else if (ctx->use_ppc && length >= CHACHA20_BLOCK_SIZE * 4) + { + nburn = _gcry_chacha20_ppc8_blocks4(ctx->input, outbuf, inbuf, 4); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 4 * CHACHA20_BLOCK_SIZE; + outbuf += 4 * CHACHA20_BLOCK_SIZE; + inbuf += 4 * CHACHA20_BLOCK_SIZE; + } +#endif +#ifdef USE_S390X_VX_POLY1305 + else if (ctx->use_s390x && length >= 2 * CHACHA20_BLOCK_SIZE * 8) + { + nburn = _gcry_chacha20_s390x_vx_blocks8(ctx->input, outbuf, inbuf, 8); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 8 * CHACHA20_BLOCK_SIZE; + outbuf += 8 * CHACHA20_BLOCK_SIZE; + inbuf += 8 * CHACHA20_BLOCK_SIZE; + } + else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 4) + { + nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 4); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 4 * CHACHA20_BLOCK_SIZE; + outbuf += 4 * CHACHA20_BLOCK_SIZE; + inbuf += 4 * CHACHA20_BLOCK_SIZE; + } + else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE * 2) + { + nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 2); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 2 * CHACHA20_BLOCK_SIZE; + outbuf += 2 * CHACHA20_BLOCK_SIZE; + inbuf += 2 * CHACHA20_BLOCK_SIZE; + } + else if (ctx->use_s390x && length >= CHACHA20_BLOCK_SIZE) + { + nburn = _gcry_chacha20_s390x_vx_blocks4_2_1(ctx->input, outbuf, inbuf, 1); + burn = nburn > burn ? nburn : burn; + + authptr = outbuf; + length -= 1 * CHACHA20_BLOCK_SIZE; + outbuf += 1 * CHACHA20_BLOCK_SIZE; + inbuf += 1 * CHACHA20_BLOCK_SIZE; + } +#endif + + if (authptr) + { + size_t authoffset = outbuf - authptr; + +#ifdef USE_AVX2 + if (ctx->use_avx2 && + length >= 8 * CHACHA20_BLOCK_SIZE && + authoffset >= 8 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 8; + + nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_SSSE3 + if (ctx->use_ssse3) + { + if (length >= 4 * CHACHA20_BLOCK_SIZE && + authoffset >= 4 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + + nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } + + if (length >= CHACHA20_BLOCK_SIZE && + authoffset >= CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + + nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } + } +#endif + +#ifdef USE_AARCH64_SIMD + if (ctx->use_neon && + length >= 4 * CHACHA20_BLOCK_SIZE && + authoffset >= 4 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + + nburn = _gcry_chacha20_poly1305_aarch64_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_PPC_VEC_POLY1305 + if (ctx->use_ppc && + length >= 4 * CHACHA20_BLOCK_SIZE && + authoffset >= 4 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + + nburn = _gcry_chacha20_poly1305_ppc8_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_S390X_VX_POLY1305 + if (ctx->use_s390x) + { + if (length >= 8 * CHACHA20_BLOCK_SIZE && + authoffset >= 8 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 8; + + burn = _gcry_chacha20_poly1305_s390x_vx_blocks8( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } + + if (length >= CHACHA20_BLOCK_SIZE && + authoffset >= CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + + burn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, authptr); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + authptr += nblocks * CHACHA20_BLOCK_SIZE; + } + } +#endif + + if (authoffset > 0) + { + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, authptr, authoffset); + authptr += authoffset; + authoffset = 0; + } + + gcry_assert(authptr == outbuf); + } + + while (length) + { + size_t currlen = length; + + /* Since checksumming is done after encryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for checksumming. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); + burn = nburn > burn ? nburn : burn; + + nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, outbuf, + currlen); + burn = nburn > burn ? nburn : burn; + + outbuf += currlen; + inbuf += currlen; + length -= currlen; + } + + if (burn) + _gcry_burn_stack (burn); + + return 0; +} + + +gcry_err_code_t +_gcry_chacha20_poly1305_decrypt(gcry_cipher_hd_t c, byte *outbuf, + const byte *inbuf, size_t length) +{ + CHACHA20_context_t *ctx = (void *) &c->context.c; + unsigned int nburn, burn = 0; + + if (!length) + return 0; + + if (ctx->unused) + { + unsigned char *p = ctx->pad; + size_t n; + + gcry_assert (ctx->unused < CHACHA20_BLOCK_SIZE); + + n = ctx->unused; + if (n > length) + n = length; + + nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, n); + burn = nburn > burn ? nburn : burn; + buf_xor (outbuf, inbuf, p + CHACHA20_BLOCK_SIZE - ctx->unused, n); + length -= n; + outbuf += n; + inbuf += n; + ctx->unused -= n; + + if (!length) + { + if (burn) + _gcry_burn_stack (burn); + + return 0; + } + gcry_assert (!ctx->unused); + } + + gcry_assert (c->u_mode.poly1305.ctx.leftover == 0); + +#ifdef USE_AVX2 + if (ctx->use_avx2 && length >= 8 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 8; + + nburn = _gcry_chacha20_poly1305_amd64_avx2_blocks8( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_SSSE3 + if (ctx->use_ssse3) + { + if (length >= 4 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + + nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } + + if (length >= CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + + nburn = _gcry_chacha20_poly1305_amd64_ssse3_blocks1( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } + } +#endif + +#ifdef USE_AARCH64_SIMD + if (ctx->use_neon && length >= 4 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + + nburn = _gcry_chacha20_poly1305_aarch64_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_PPC_VEC_POLY1305 + if (ctx->use_ppc && length >= 4 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 4; + + nburn = _gcry_chacha20_poly1305_ppc8_blocks4( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } +#endif + +#ifdef USE_S390X_VX_POLY1305 + if (ctx->use_s390x) + { + if (length >= 8 * CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + nblocks -= nblocks % 8; + + nburn = _gcry_chacha20_poly1305_s390x_vx_blocks8( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } + + if (length >= CHACHA20_BLOCK_SIZE) + { + size_t nblocks = length / CHACHA20_BLOCK_SIZE; + + nburn = _gcry_chacha20_poly1305_s390x_vx_blocks4_2_1( + ctx->input, outbuf, inbuf, nblocks, + &c->u_mode.poly1305.ctx.state, inbuf); + burn = nburn > burn ? nburn : burn; + + length -= nblocks * CHACHA20_BLOCK_SIZE; + outbuf += nblocks * CHACHA20_BLOCK_SIZE; + inbuf += nblocks * CHACHA20_BLOCK_SIZE; + } + } +#endif + + while (length) + { + size_t currlen = length; + + /* Since checksumming is done before decryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for decryption. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + nburn = _gcry_poly1305_update_burn (&c->u_mode.poly1305.ctx, inbuf, + currlen); + burn = nburn > burn ? nburn : burn; + + nburn = do_chacha20_encrypt_stream_tail (ctx, outbuf, inbuf, currlen); + burn = nburn > burn ? nburn : burn; + + outbuf += currlen; + inbuf += currlen; + length -= currlen; + } + + if (burn) + _gcry_burn_stack (burn); + + return 0; +} + + +static const char * +selftest (void) +{ + byte ctxbuf[sizeof(CHACHA20_context_t) + 15]; + CHACHA20_context_t *ctx; + byte scratch[127 + 1]; + byte buf[512 + 64 + 4]; + int i; + + /* From draft-strombergson-chacha-test-vectors */ + static byte key_1[] = { + 0xc4, 0x6e, 0xc1, 0xb1, 0x8c, 0xe8, 0xa8, 0x78, + 0x72, 0x5a, 0x37, 0xe7, 0x80, 0xdf, 0xb7, 0x35, + 0x1f, 0x68, 0xed, 0x2e, 0x19, 0x4c, 0x79, 0xfb, + 0xc6, 0xae, 0xbe, 0xe1, 0xa6, 0x67, 0x97, 0x5d + }; + static const byte nonce_1[] = + { 0x1a, 0xda, 0x31, 0xd5, 0xcf, 0x68, 0x82, 0x21 }; + static const byte plaintext_1[127] = { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + static const byte ciphertext_1[127] = { + 0xf6, 0x3a, 0x89, 0xb7, 0x5c, 0x22, 0x71, 0xf9, + 0x36, 0x88, 0x16, 0x54, 0x2b, 0xa5, 0x2f, 0x06, + 0xed, 0x49, 0x24, 0x17, 0x92, 0x30, 0x2b, 0x00, + 0xb5, 0xe8, 0xf8, 0x0a, 0xe9, 0xa4, 0x73, 0xaf, + 0xc2, 0x5b, 0x21, 0x8f, 0x51, 0x9a, 0xf0, 0xfd, + 0xd4, 0x06, 0x36, 0x2e, 0x8d, 0x69, 0xde, 0x7f, + 0x54, 0xc6, 0x04, 0xa6, 0xe0, 0x0f, 0x35, 0x3f, + 0x11, 0x0f, 0x77, 0x1b, 0xdc, 0xa8, 0xab, 0x92, + 0xe5, 0xfb, 0xc3, 0x4e, 0x60, 0xa1, 0xd9, 0xa9, + 0xdb, 0x17, 0x34, 0x5b, 0x0a, 0x40, 0x27, 0x36, + 0x85, 0x3b, 0xf9, 0x10, 0xb0, 0x60, 0xbd, 0xf1, + 0xf8, 0x97, 0xb6, 0x29, 0x0f, 0x01, 0xd1, 0x38, + 0xae, 0x2c, 0x4c, 0x90, 0x22, 0x5b, 0xa9, 0xea, + 0x14, 0xd5, 0x18, 0xf5, 0x59, 0x29, 0xde, 0xa0, + 0x98, 0xca, 0x7a, 0x6c, 0xcf, 0xe6, 0x12, 0x27, + 0x05, 0x3c, 0x84, 0xe4, 0x9a, 0x4a, 0x33 + }; + + /* 16-byte alignment required for amd64 implementation. */ + ctx = (CHACHA20_context_t *)((uintptr_t)(ctxbuf + 15) & ~(uintptr_t)15); + + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + scratch[sizeof (scratch) - 1] = 0; + chacha20_encrypt_stream (ctx, scratch, plaintext_1, sizeof plaintext_1); + if (memcmp (scratch, ciphertext_1, sizeof ciphertext_1)) + return "ChaCha20 encryption test 1 failed."; + if (scratch[sizeof (scratch) - 1]) + return "ChaCha20 wrote too much."; + chacha20_setkey (ctx, key_1, sizeof (key_1), NULL); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, scratch, scratch, sizeof plaintext_1); + if (memcmp (scratch, plaintext_1, sizeof plaintext_1)) + return "ChaCha20 decryption test 1 failed."; + + for (i = 0; i < sizeof buf; i++) + buf[i] = i; + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + /*encrypt */ + chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); + /*decrypt */ + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, buf, buf, 1); + chacha20_encrypt_stream (ctx, buf + 1, buf + 1, (sizeof buf) - 1 - 1); + chacha20_encrypt_stream (ctx, buf + (sizeof buf) - 1, + buf + (sizeof buf) - 1, 1); + for (i = 0; i < sizeof buf; i++) + if (buf[i] != (byte) i) + return "ChaCha20 encryption test 2 failed."; + + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + /* encrypt */ + for (i = 0; i < sizeof buf; i++) + chacha20_encrypt_stream (ctx, &buf[i], &buf[i], 1); + /* decrypt */ + chacha20_setkey (ctx, key_1, sizeof key_1, NULL); + chacha20_setiv (ctx, nonce_1, sizeof nonce_1); + chacha20_encrypt_stream (ctx, buf, buf, sizeof buf); + for (i = 0; i < sizeof buf; i++) + if (buf[i] != (byte) i) + return "ChaCha20 encryption test 3 failed."; + + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_chacha20 = { + GCRY_CIPHER_CHACHA20, + {0, 0}, /* flags */ + "CHACHA20", /* name */ + NULL, /* aliases */ + NULL, /* oids */ + 1, /* blocksize in bytes. */ + CHACHA20_MAX_KEY_SIZE * 8, /* standard key length in bits. */ + sizeof (CHACHA20_context_t), + chacha20_setkey, + NULL, + NULL, + chacha20_encrypt_stream, + chacha20_encrypt_stream, + NULL, + NULL, + chacha20_setiv +}; diff --git a/comm/third_party/libgcrypt/cipher/cipher-aeswrap.c b/comm/third_party/libgcrypt/cipher/cipher-aeswrap.c new file mode 100644 index 0000000000..c182657e1f --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-aeswrap.c @@ -0,0 +1,209 @@ +/* cipher-aeswrap.c - Generic AESWRAP mode implementation + * Copyright (C) 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +/* Perform the AES-Wrap algorithm as specified by RFC3394. We + implement this as a mode usable with any cipher algorithm of + blocksize 128. */ +gcry_err_code_t +_gcry_cipher_aeswrap_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen ) +{ + int j, x; + size_t n, i; + unsigned char *r, *a, *b; + unsigned char t[8]; + unsigned int burn, nburn; + +#if MAX_BLOCKSIZE < 8 +#error Invalid block size +#endif + /* We require a cipher with a 128 bit block length. */ + if (c->spec->blocksize != 16) + return GPG_ERR_INV_LENGTH; + + /* The output buffer must be able to hold the input data plus one + additional block. */ + if (outbuflen < inbuflen + 8) + return GPG_ERR_BUFFER_TOO_SHORT; + /* Input data must be multiple of 64 bits. */ + if (inbuflen % 8) + return GPG_ERR_INV_ARG; + + n = inbuflen / 8; + + /* We need at least two 64 bit blocks. */ + if (n < 2) + return GPG_ERR_INV_ARG; + + burn = 0; + + r = outbuf; + a = outbuf; /* We store A directly in OUTBUF. */ + b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ + + /* Copy the inbuf to the outbuf. */ + memmove (r+8, inbuf, inbuflen); + + /* If an IV has been set we use that IV as the Alternative Initial + Value; if it has not been set we use the standard value. */ + if (c->marks.iv) + memcpy (a, c->u_iv.iv, 8); + else + memset (a, 0xa6, 8); + + memset (t, 0, sizeof t); /* t := 0. */ + + for (j = 0; j <= 5; j++) + { + for (i = 1; i <= n; i++) + { + /* B := AES_k( A | R[i] ) */ + memcpy (b, a, 8); + memcpy (b+8, r+i*8, 8); + nburn = c->spec->encrypt (&c->context.c, b, b); + burn = nburn > burn ? nburn : burn; + /* t := t + 1 */ + for (x = 7; x >= 0; x--) + { + t[x]++; + if (t[x]) + break; + } + /* A := MSB_64(B) ^ t */ + cipher_block_xor(a, b, t, 8); + /* R[i] := LSB_64(B) */ + memcpy (r+i*8, b+8, 8); + } + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + +/* Perform the AES-Unwrap algorithm as specified by RFC3394. We + implement this as a mode usable with any cipher algorithm of + blocksize 128. */ +gcry_err_code_t +_gcry_cipher_aeswrap_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + int j, x; + size_t n, i; + unsigned char *r, *a, *b; + unsigned char t[8]; + unsigned int burn, nburn; + +#if MAX_BLOCKSIZE < 8 +#error Invalid block size +#endif + /* We require a cipher with a 128 bit block length. */ + if (c->spec->blocksize != 16) + return GPG_ERR_INV_LENGTH; + + /* The output buffer must be able to hold the input data minus one + additional block. Fixme: The caller has more restrictive checks + - we may want to fix them for this mode. */ + if (outbuflen + 8 < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + /* Input data must be multiple of 64 bits. */ + if (inbuflen % 8) + return GPG_ERR_INV_ARG; + + n = inbuflen / 8; + + /* We need at least three 64 bit blocks. */ + if (n < 3) + return GPG_ERR_INV_ARG; + + burn = 0; + + r = outbuf; + a = c->lastiv; /* We use c->LASTIV as buffer for A. */ + b = c->u_ctr.ctr; /* B is also used to concatenate stuff. */ + + /* Copy the inbuf to the outbuf and save A. */ + memcpy (a, inbuf, 8); + memmove (r, inbuf+8, inbuflen-8); + n--; /* Reduce to actual number of data blocks. */ + + /* t := 6 * n */ + i = n * 6; /* The range is valid because: n = inbuflen / 8 - 1. */ + for (x=0; x < 8 && x < sizeof (i); x++) + t[7-x] = i >> (8*x); + for (; x < 8; x++) + t[7-x] = 0; + + for (j = 5; j >= 0; j--) + { + for (i = n; i >= 1; i--) + { + /* B := AES_k^1( (A ^ t)| R[i] ) */ + cipher_block_xor(b, a, t, 8); + memcpy (b+8, r+(i-1)*8, 8); + nburn = c->spec->decrypt (&c->context.c, b, b); + burn = nburn > burn ? nburn : burn; + /* t := t - 1 */ + for (x = 7; x >= 0; x--) + { + t[x]--; + if (t[x] != 0xff) + break; + } + /* A := MSB_64(B) */ + memcpy (a, b, 8); + /* R[i] := LSB_64(B) */ + memcpy (r+(i-1)*8, b+8, 8); + } + } + + /* If an IV has been set we compare against this Alternative Initial + Value; if it has not been set we compare against the standard IV. */ + if (c->marks.iv) + j = memcmp (a, c->u_iv.iv, 8); + else + { + for (j=0, x=0; x < 8; x++) + if (a[x] != 0xa6) + { + j=1; + break; + } + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return j? GPG_ERR_CHECKSUM : 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-cbc.c b/comm/third_party/libgcrypt/cipher/cipher-cbc.c new file mode 100644 index 0000000000..d4df1e72aa --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-cbc.c @@ -0,0 +1,292 @@ +/* cipher-cbc.c - Generic CBC mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "./cipher-internal.h" +#include "bufhelp.h" + + + +static inline unsigned int +cbc_encrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf, + const unsigned char *inbuf, size_t nblocks, size_t blocksize, + int is_cbc_cmac) +{ + + unsigned int burn, nburn; + size_t n; + + burn = 0; + + if (c->bulk.cbc_enc) + { + c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks, + is_cbc_cmac); + } + else + { + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + unsigned char *ivp; + + ivp = c->u_iv.iv; + + for (n=0; n < nblocks; n++ ) + { + cipher_block_xor (outbuf, inbuf, ivp, blocksize); + nburn = enc_fn ( &c->context.c, outbuf, outbuf ); + burn = nburn > burn ? nburn : burn; + ivp = outbuf; + inbuf += blocksize; + if (!is_cbc_cmac) + outbuf += blocksize; + } + + if (ivp != c->u_iv.iv) + cipher_block_cpy (c->u_iv.iv, ivp, blocksize); + } + + return burn; +} + + +gcry_err_code_t +_gcry_cipher_cbc_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t blocksize_mask = blocksize - 1; + size_t nblocks = inbuflen >> blocksize_shift; + int is_cbc_cmac = !!(c->flags & GCRY_CIPHER_CBC_MAC); + unsigned int burn; + + if (outbuflen < (is_cbc_cmac ? blocksize : inbuflen)) + return GPG_ERR_BUFFER_TOO_SHORT; + + if (inbuflen & blocksize_mask) + return GPG_ERR_INV_LENGTH; + + burn = cbc_encrypt_inner(c, outbuf, inbuf, nblocks, blocksize, is_cbc_cmac); + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cbc_cts_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t blocksize_mask = blocksize - 1; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t nblocks = inbuflen >> blocksize_shift; + unsigned int burn, nburn; + unsigned char *ivp; + int i; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ((inbuflen & blocksize_mask) && !(inbuflen > blocksize)) + return GPG_ERR_INV_LENGTH; + + burn = 0; + + if (inbuflen > blocksize) + { + if ((inbuflen & blocksize_mask) == 0) + nblocks--; + } + + burn = cbc_encrypt_inner(c, outbuf, inbuf, nblocks, blocksize, 0); + inbuf += nblocks << blocksize_shift; + outbuf += nblocks << blocksize_shift; + + if (inbuflen > blocksize) + { + /* We have to be careful here, since outbuf might be equal to + inbuf. */ + size_t restbytes; + unsigned char b; + + if ((inbuflen & blocksize_mask) == 0) + restbytes = blocksize; + else + restbytes = inbuflen & blocksize_mask; + + outbuf -= blocksize; + for (ivp = c->u_iv.iv, i = 0; i < restbytes; i++) + { + b = inbuf[i]; + outbuf[blocksize + i] = outbuf[i]; + outbuf[i] = b ^ *ivp++; + } + for (; i < blocksize; i++) + outbuf[i] = 0 ^ *ivp++; + + nburn = enc_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + cipher_block_cpy (c->u_iv.iv, outbuf, blocksize); + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +static inline unsigned int +cbc_decrypt_inner(gcry_cipher_hd_t c, unsigned char *outbuf, + const unsigned char *inbuf, size_t nblocks, size_t blocksize) +{ + unsigned int burn, nburn; + size_t n; + + burn = 0; + + if (c->bulk.cbc_dec) + { + c->bulk.cbc_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + } + else + { + gcry_cipher_decrypt_t dec_fn = c->spec->decrypt; + + for (n = 0; n < nblocks; n++) + { + /* Because outbuf and inbuf might be the same, we must not overwrite + the original ciphertext block. We use LASTIV as intermediate + storage here because it is not used otherwise. */ + nburn = dec_fn ( &c->context.c, c->lastiv, inbuf ); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_n_copy_2 (outbuf, c->lastiv, c->u_iv.iv, inbuf, + blocksize); + inbuf += blocksize; + outbuf += blocksize; + } + } + + return burn; +} + + +gcry_err_code_t +_gcry_cipher_cbc_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t blocksize_mask = blocksize - 1; + size_t nblocks = inbuflen >> blocksize_shift; + unsigned int burn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if (inbuflen & blocksize_mask) + return GPG_ERR_INV_LENGTH; + + burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize); + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cbc_cts_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t blocksize_mask = blocksize - 1; + gcry_cipher_decrypt_t dec_fn = c->spec->decrypt; + size_t nblocks = inbuflen >> blocksize_shift; + unsigned int burn, nburn; + int i; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ((inbuflen & blocksize_mask) && !(inbuflen > blocksize)) + return GPG_ERR_INV_LENGTH; + + burn = 0; + + if (inbuflen > blocksize) + { + nblocks--; + if ((inbuflen & blocksize_mask) == 0) + nblocks--; + cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize); + } + + burn = cbc_decrypt_inner(c, outbuf, inbuf, nblocks, blocksize); + inbuf += nblocks << blocksize_shift; + outbuf += nblocks << blocksize_shift; + + if (inbuflen > blocksize) + { + size_t restbytes; + + if ((inbuflen & blocksize_mask) == 0) + restbytes = blocksize; + else + restbytes = inbuflen & blocksize_mask; + + cipher_block_cpy (c->lastiv, c->u_iv.iv, blocksize ); /* Save Cn-2. */ + buf_cpy (c->u_iv.iv, inbuf + blocksize, restbytes ); /* Save Cn. */ + + nburn = dec_fn ( &c->context.c, outbuf, inbuf ); + burn = nburn > burn ? nburn : burn; + buf_xor(outbuf, outbuf, c->u_iv.iv, restbytes); + + buf_cpy (outbuf + blocksize, outbuf, restbytes); + for(i=restbytes; i < blocksize; i++) + c->u_iv.iv[i] = outbuf[i]; + nburn = dec_fn (&c->context.c, outbuf, c->u_iv.iv); + burn = nburn > burn ? nburn : burn; + cipher_block_xor(outbuf, outbuf, c->lastiv, blocksize); + /* c->lastiv is now really lastlastiv, does this matter? */ + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-ccm.c b/comm/third_party/libgcrypt/cipher/cipher-ccm.c new file mode 100644 index 0000000000..dcb268d084 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-ccm.c @@ -0,0 +1,415 @@ +/* cipher-ccm.c - CTR mode with CBC-MAC mode implementation + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +#define set_burn(burn, nburn) do { \ + unsigned int __nburn = (nburn); \ + (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0) + + +static unsigned int +do_cbc_mac (gcry_cipher_hd_t c, const unsigned char *inbuf, size_t inlen, + int do_padding) +{ + const unsigned int blocksize = 16; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + unsigned char tmp[blocksize]; + unsigned int burn = 0; + unsigned int unused = c->u_mode.ccm.mac_unused; + size_t nblocks; + size_t n; + + if (inlen == 0 && (unused == 0 || !do_padding)) + return 0; + + do + { + if (inlen + unused < blocksize || unused > 0) + { + n = (inlen > blocksize - unused) ? blocksize - unused : inlen; + + buf_cpy (&c->u_mode.ccm.macbuf[unused], inbuf, n); + unused += n; + inlen -= n; + inbuf += n; + } + if (!inlen) + { + if (!do_padding) + break; + + n = blocksize - unused; + if (n > 0) + { + memset (&c->u_mode.ccm.macbuf[unused], 0, n); + unused = blocksize; + } + } + + if (unused > 0) + { + /* Process one block from macbuf. */ + cipher_block_xor(c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.macbuf, + blocksize); + set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv )); + + unused = 0; + } + + if (c->bulk.cbc_enc) + { + nblocks = inlen / blocksize; + c->bulk.cbc_enc (&c->context.c, c->u_iv.iv, tmp, inbuf, nblocks, 1); + inbuf += nblocks * blocksize; + inlen -= nblocks * blocksize; + + wipememory (tmp, sizeof(tmp)); + } + else + { + while (inlen >= blocksize) + { + cipher_block_xor(c->u_iv.iv, c->u_iv.iv, inbuf, blocksize); + + set_burn (burn, enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv )); + + inlen -= blocksize; + inbuf += blocksize; + } + } + } + while (inlen > 0); + + c->u_mode.ccm.mac_unused = unused; + + if (burn) + burn += 4 * sizeof(void *); + + return burn; +} + + +gcry_err_code_t +_gcry_cipher_ccm_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen) +{ + unsigned int marks_key; + size_t L = 15 - noncelen; + size_t L_; + + L_ = L - 1; + + if (!nonce) + return GPG_ERR_INV_ARG; + /* Length field must be 2, 3, ..., or 8. */ + if (L < 2 || L > 8) + return GPG_ERR_INV_LENGTH; + + /* Reset state */ + marks_key = c->marks.key; + memset (&c->u_mode, 0, sizeof(c->u_mode)); + memset (&c->marks, 0, sizeof(c->marks)); + memset (&c->u_iv, 0, sizeof(c->u_iv)); + memset (&c->u_ctr, 0, sizeof(c->u_ctr)); + memset (c->lastiv, 0, sizeof(c->lastiv)); + c->unused = 0; + c->marks.key = marks_key; + + /* Setup CTR */ + c->u_ctr.ctr[0] = L_; + memcpy (&c->u_ctr.ctr[1], nonce, noncelen); + memset (&c->u_ctr.ctr[1 + noncelen], 0, L); + + /* Setup IV */ + c->u_iv.iv[0] = L_; + memcpy (&c->u_iv.iv[1], nonce, noncelen); + /* Add (8 * M_ + 64 * flags) to iv[0] and set iv[noncelen + 1 ... 15] later + in set_aad. */ + memset (&c->u_iv.iv[1 + noncelen], 0, L); + + c->u_mode.ccm.nonce = 1; + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_ccm_set_lengths (gcry_cipher_hd_t c, u64 encryptlen, u64 aadlen, + u64 taglen) +{ + unsigned int burn = 0; + unsigned char b0[16]; + size_t noncelen = 15 - (c->u_iv.iv[0] + 1); + u64 M = taglen; + u64 M_; + int i; + + M_ = (M - 2) / 2; + + /* Authentication field must be 4, 6, 8, 10, 12, 14 or 16. */ + if ((M_ * 2 + 2) != M || M < 4 || M > 16) + return GPG_ERR_INV_LENGTH; + if (!c->u_mode.ccm.nonce || c->marks.tag) + return GPG_ERR_INV_STATE; + if (c->u_mode.ccm.lengths) + return GPG_ERR_INV_STATE; + + c->u_mode.ccm.authlen = taglen; + c->u_mode.ccm.encryptlen = encryptlen; + c->u_mode.ccm.aadlen = aadlen; + + /* Complete IV setup. */ + c->u_iv.iv[0] += (aadlen > 0) * 64 + M_ * 8; + for (i = 16 - 1; i >= 1 + noncelen; i--) + { + c->u_iv.iv[i] = encryptlen & 0xff; + encryptlen >>= 8; + } + + memcpy (b0, c->u_iv.iv, 16); + memset (c->u_iv.iv, 0, 16); + + set_burn (burn, do_cbc_mac (c, b0, 16, 0)); + + if (aadlen == 0) + { + /* Do nothing. */ + } + else if (aadlen > 0 && aadlen <= (unsigned int)0xfeff) + { + b0[0] = (aadlen >> 8) & 0xff; + b0[1] = aadlen & 0xff; + set_burn (burn, do_cbc_mac (c, b0, 2, 0)); + } + else if (aadlen > 0xfeff && aadlen <= (unsigned int)0xffffffff) + { + b0[0] = 0xff; + b0[1] = 0xfe; + buf_put_be32(&b0[2], aadlen); + set_burn (burn, do_cbc_mac (c, b0, 6, 0)); + } + else if (aadlen > (unsigned int)0xffffffff) + { + b0[0] = 0xff; + b0[1] = 0xff; + buf_put_be64(&b0[2], aadlen); + set_burn (burn, do_cbc_mac (c, b0, 10, 0)); + } + + /* Generate S_0 and increase counter. */ + set_burn (burn, c->spec->encrypt ( &c->context.c, c->u_mode.ccm.s0, + c->u_ctr.ctr )); + c->u_ctr.ctr[15]++; + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + c->u_mode.ccm.lengths = 1; + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_ccm_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, + size_t abuflen) +{ + unsigned int burn; + + if (abuflen > 0 && !abuf) + return GPG_ERR_INV_ARG; + if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->marks.tag) + return GPG_ERR_INV_STATE; + if (abuflen > c->u_mode.ccm.aadlen) + return GPG_ERR_INV_LENGTH; + + c->u_mode.ccm.aadlen -= abuflen; + burn = do_cbc_mac (c, abuf, abuflen, c->u_mode.ccm.aadlen == 0); + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + return GPG_ERR_NO_ERROR; +} + + +gcry_err_code_t +_gcry_cipher_ccm_tag (gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, int check) +{ + unsigned int burn; + + if (!outbuf || outbuflen == 0) + return GPG_ERR_INV_ARG; + /* Tag length must be same as initial authlen. */ + if (c->u_mode.ccm.authlen != outbuflen) + return GPG_ERR_INV_LENGTH; + if (!c->u_mode.ccm.nonce || !c->u_mode.ccm.lengths || c->u_mode.ccm.aadlen > 0) + return GPG_ERR_INV_STATE; + /* Initial encrypt length must match with length of actual data processed. */ + if (c->u_mode.ccm.encryptlen > 0) + return GPG_ERR_UNFINISHED; + + if (!c->marks.tag) + { + burn = do_cbc_mac (c, NULL, 0, 1); /* Perform final padding. */ + + /* Add S_0 */ + cipher_block_xor (c->u_iv.iv, c->u_iv.iv, c->u_mode.ccm.s0, 16); + + wipememory (c->u_ctr.ctr, 16); + wipememory (c->u_mode.ccm.s0, 16); + wipememory (c->u_mode.ccm.macbuf, 16); + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + + c->marks.tag = 1; + } + + if (!check) + { + memcpy (outbuf, c->u_iv.iv, outbuflen); + return GPG_ERR_NO_ERROR; + } + else + { + return buf_eq_const(outbuf, c->u_iv.iv, outbuflen) ? + GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM; + } +} + + +gcry_err_code_t +_gcry_cipher_ccm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + return _gcry_cipher_ccm_tag (c, outtag, taglen, 0); +} + + +gcry_err_code_t +_gcry_cipher_ccm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_ccm_tag (c, (unsigned char *)intag, taglen, 1); +} + + +gcry_err_code_t +_gcry_cipher_ccm_encrypt (gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, const unsigned char *inbuf, + size_t inbuflen) +{ + gcry_err_code_t err = 0; + unsigned int burn = 0; + unsigned int nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths || + c->u_mode.ccm.aadlen > 0) + return GPG_ERR_INV_STATE; + if (inbuflen > c->u_mode.ccm.encryptlen) + return GPG_ERR_INV_LENGTH; + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Since checksumming is done before encryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for encryption. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + c->u_mode.ccm.encryptlen -= currlen; + nburn = do_cbc_mac (c, inbuf, currlen, 0); + burn = nburn > burn ? nburn : burn; + + err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen); + if (err) + break; + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + return err; +} + + +gcry_err_code_t +_gcry_cipher_ccm_decrypt (gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, const unsigned char *inbuf, + size_t inbuflen) +{ + gcry_err_code_t err = 0; + unsigned int burn = 0; + unsigned int nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (!c->u_mode.ccm.nonce || c->marks.tag || !c->u_mode.ccm.lengths || + c->u_mode.ccm.aadlen > 0) + return GPG_ERR_INV_STATE; + if (inbuflen > c->u_mode.ccm.encryptlen) + return GPG_ERR_INV_LENGTH; + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Since checksumming is done after decryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for checksumming. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen); + if (err) + break; + + c->u_mode.ccm.encryptlen -= currlen; + nburn = do_cbc_mac (c, outbuf, currlen, 0); + burn = nburn > burn ? nburn : burn; + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + if (burn) + _gcry_burn_stack (burn + sizeof(void *) * 5); + return err; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-cfb.c b/comm/third_party/libgcrypt/cipher/cipher-cfb.c new file mode 100644 index 0000000000..012c6c13c3 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-cfb.c @@ -0,0 +1,317 @@ +/* cipher-cfb.c - Generic CFB mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_cfb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + unsigned char *ivp; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t blocksize_x_2 = blocksize + blocksize; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ( inbuflen <= c->unused ) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_2dst(outbuf, ivp, inbuf, inbuflen); + c->unused -= inbuflen; + return 0; + } + + burn = 0; + + if ( c->unused ) + { + /* XOR the input with the IV and store input into IV */ + inbuflen -= c->unused; + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_2dst(outbuf, ivp, inbuf, c->unused); + outbuf += c->unused; + inbuf += c->unused; + c->unused = 0; + } + + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ + if (inbuflen >= blocksize_x_2 && c->bulk.cfb_enc) + { + size_t nblocks = inbuflen >> blocksize_shift; + c->bulk.cfb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks << blocksize_shift; + inbuf += nblocks << blocksize_shift; + inbuflen -= nblocks << blocksize_shift; + } + else + { + while ( inbuflen >= blocksize_x_2 ) + { + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV. */ + cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + } + + if ( inbuflen >= blocksize ) + { + /* Save the current IV and then encrypt the IV. */ + cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize ); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV */ + cipher_block_xor_2dst(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + if ( inbuflen ) + { + /* Save the current IV and then encrypt the IV. */ + cipher_block_cpy( c->lastiv, c->u_iv.iv, blocksize ); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= inbuflen; + buf_xor_2dst(outbuf, c->u_iv.iv, inbuf, inbuflen); + outbuf += inbuflen; + inbuf += inbuflen; + inbuflen = 0; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cfb_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + unsigned char *ivp; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t blocksize_x_2 = blocksize + blocksize; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if (inbuflen <= c->unused) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV and store input into IV. */ + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_n_copy(outbuf, ivp, inbuf, inbuflen); + c->unused -= inbuflen; + return 0; + } + + burn = 0; + + if (c->unused) + { + /* XOR the input with the IV and store input into IV. */ + inbuflen -= c->unused; + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor_n_copy(outbuf, ivp, inbuf, c->unused); + outbuf += c->unused; + inbuf += c->unused; + c->unused = 0; + } + + /* Now we can process complete blocks. We use a loop as long as we + have at least 2 blocks and use conditions for the rest. This + also allows to use a bulk encryption function if available. */ + if (inbuflen >= blocksize_x_2 && c->bulk.cfb_dec) + { + size_t nblocks = inbuflen >> blocksize_shift; + c->bulk.cfb_dec (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks << blocksize_shift; + inbuf += nblocks << blocksize_shift; + inbuflen -= nblocks << blocksize_shift; + } + else + { + while (inbuflen >= blocksize_x_2 ) + { + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV. */ + cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + } + + if (inbuflen >= blocksize ) + { + /* Save the current IV and then encrypt the IV. */ + cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + /* XOR the input with the IV and store input into IV */ + cipher_block_xor_n_copy(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + + if (inbuflen) + { + /* Save the current IV and then encrypt the IV. */ + cipher_block_cpy ( c->lastiv, c->u_iv.iv, blocksize ); + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + c->unused = blocksize; + /* Apply the XOR. */ + c->unused -= inbuflen; + buf_xor_n_copy(outbuf, c->u_iv.iv, inbuf, inbuflen); + outbuf += inbuflen; + inbuf += inbuflen; + inbuflen = 0; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cfb8_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + burn = 0; + + while ( inbuflen > 0) + { + int i; + + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + + outbuf[0] = c->lastiv[0] ^ inbuf[0]; + + /* Bitshift iv by 8 bit to the left */ + for (i = 0; i < blocksize-1; i++) + c->u_iv.iv[i] = c->u_iv.iv[i+1]; + + /* append cipher text to iv */ + c->u_iv.iv[blocksize-1] = outbuf[0]; + + outbuf += 1; + inbuf += 1; + inbuflen -= 1; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_cfb8_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize = c->spec->blocksize; + unsigned int burn, nburn; + unsigned char appendee; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + burn = 0; + + while (inbuflen > 0) + { + int i; + + /* Encrypt the IV. */ + nburn = enc_fn ( &c->context.c, c->lastiv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + + /* inbuf might == outbuf, make sure we keep the value + so we can append it later */ + appendee = inbuf[0]; + + outbuf[0] = inbuf[0] ^ c->lastiv[0]; + + /* Bitshift iv by 8 bit to the left */ + for (i = 0; i < blocksize-1; i++) + c->u_iv.iv[i] = c->u_iv.iv[i+1]; + + c->u_iv.iv[blocksize-1] = appendee; + + outbuf += 1; + inbuf += 1; + inbuflen -= 1; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-cmac.c b/comm/third_party/libgcrypt/cipher/cipher-cmac.c new file mode 100644 index 0000000000..4efd1e19b4 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-cmac.c @@ -0,0 +1,292 @@ +/* cmac.c - CMAC, Cipher-based MAC. + * Copyright (C) 2013,2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "cipher-internal.h" +#include "bufhelp.h" + + +#define set_burn(burn, nburn) do { \ + unsigned int __nburn = (nburn); \ + (burn) = (burn) > __nburn ? (burn) : __nburn; } while (0) + + +gcry_err_code_t +_gcry_cmac_write (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx, + const byte * inbuf, size_t inlen) +{ + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + byte outbuf[MAX_BLOCKSIZE]; + unsigned int burn = 0; + unsigned int nblocks; + size_t n; + + if (ctx->tag) + return GPG_ERR_INV_STATE; + + if (!inbuf) + return GPG_ERR_INV_ARG; + + if (inlen == 0) + return 0; + + /* Last block is needed for cmac_final. */ + if (ctx->mac_unused + inlen <= blocksize) + { + buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, inlen); + ctx->mac_unused += inlen; + inbuf += inlen; + inlen -= inlen; + + return 0; + } + + if (ctx->mac_unused) + { + n = inlen; + if (n > blocksize - ctx->mac_unused) + n = blocksize - ctx->mac_unused; + + buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, n); + ctx->mac_unused += n; + inbuf += n; + inlen -= n; + + cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize); + set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv)); + + ctx->mac_unused = 0; + } + + if (c->bulk.cbc_enc && inlen > blocksize) + { + nblocks = inlen >> blocksize_shift; + nblocks -= ((nblocks << blocksize_shift) == inlen); + + c->bulk.cbc_enc (&c->context.c, ctx->u_iv.iv, outbuf, inbuf, nblocks, 1); + inbuf += nblocks << blocksize_shift; + inlen -= nblocks << blocksize_shift; + + wipememory (outbuf, sizeof (outbuf)); + } + else + while (inlen > blocksize) + { + cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, inbuf, blocksize); + set_burn (burn, enc_fn (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv)); + inlen -= blocksize; + inbuf += blocksize; + } + + /* Make sure that last block is passed to cmac_final. */ + if (inlen == 0) + BUG (); + + n = inlen; + if (n > blocksize - ctx->mac_unused) + n = blocksize - ctx->mac_unused; + + buf_cpy (&ctx->macbuf[ctx->mac_unused], inbuf, n); + ctx->mac_unused += n; + inbuf += n; + inlen -= n; + + if (burn) + _gcry_burn_stack (burn + 4 * sizeof (void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cmac_generate_subkeys (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx) +{ + const unsigned int blocksize = c->spec->blocksize; + byte rb, carry, t, bi; + unsigned int burn; + int i, j; + union + { + size_t _aligned; + byte buf[MAX_BLOCKSIZE]; + } u; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_CIPHER_MODE; + + if (MAX_BLOCKSIZE < blocksize) + BUG (); + + /* encrypt zero block */ + memset (u.buf, 0, blocksize); + burn = c->spec->encrypt (&c->context.c, u.buf, u.buf); + + /* Currently supported blocksizes are 16 and 8. */ + rb = blocksize == 16 ? 0x87 : 0x1B /* blocksize == 8 */ ; + + for (j = 0; j < 2; j++) + { + /* Generate subkeys K1 and K2 */ + carry = 0; + for (i = blocksize - 1; i >= 0; i--) + { + bi = u.buf[i]; + t = carry | (bi << 1); + carry = bi >> 7; + u.buf[i] = t & 0xff; + ctx->subkeys[j][i] = u.buf[i]; + } + u.buf[blocksize - 1] ^= carry ? rb : 0; + ctx->subkeys[j][blocksize - 1] = u.buf[blocksize - 1]; + } + + wipememory (&u, sizeof (u)); + if (burn) + _gcry_burn_stack (burn + 4 * sizeof (void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cmac_final (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx) +{ + const unsigned int blocksize = c->spec->blocksize; + unsigned int count = ctx->mac_unused; + unsigned int burn; + byte *subkey; + + /* Tell compiler that we require a cipher with a 64bit or 128 bit block + * length, to allow better optimization of this function. */ + if (blocksize > 16 || blocksize < 8 || blocksize & (8 - 1)) + return GPG_ERR_INV_CIPHER_MODE; + + if (count == blocksize) + subkey = ctx->subkeys[0]; /* K1 */ + else + { + subkey = ctx->subkeys[1]; /* K2 */ + ctx->macbuf[count++] = 0x80; + while (count < blocksize) + ctx->macbuf[count++] = 0; + } + + cipher_block_xor (ctx->macbuf, ctx->macbuf, subkey, blocksize); + + cipher_block_xor (ctx->u_iv.iv, ctx->u_iv.iv, ctx->macbuf, blocksize); + burn = c->spec->encrypt (&c->context.c, ctx->u_iv.iv, ctx->u_iv.iv); + if (burn) + _gcry_burn_stack (burn + 4 * sizeof (void *)); + + ctx->mac_unused = 0; + + return 0; +} + + +static gcry_err_code_t +cmac_tag (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx, + unsigned char *tag, size_t taglen, int check) +{ + gcry_err_code_t ret; + + if (!tag || taglen == 0 || taglen > c->spec->blocksize) + return GPG_ERR_INV_ARG; + + if (!ctx->tag) + { + ret = _gcry_cmac_final (c, ctx); + if (ret != 0) + return ret; + + ctx->tag = 1; + } + + if (!check) + { + memcpy (tag, ctx->u_iv.iv, taglen); + return GPG_ERR_NO_ERROR; + } + else + { + return buf_eq_const (tag, ctx->u_iv.iv, taglen) ? + GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM; + } +} + + +void +_gcry_cmac_reset (gcry_cmac_context_t *ctx) +{ + char tmp_buf[sizeof(ctx->subkeys)]; + + /* Only keep subkeys when reseting context. */ + + buf_cpy (tmp_buf, ctx->subkeys, sizeof(ctx->subkeys)); + memset (ctx, 0, sizeof(*ctx)); + buf_cpy (ctx->subkeys, tmp_buf, sizeof(ctx->subkeys)); + wipememory (tmp_buf, sizeof(tmp_buf)); +} + + +gcry_err_code_t +_gcry_cipher_cmac_authenticate (gcry_cipher_hd_t c, + const unsigned char *abuf, size_t abuflen) +{ + if (abuflen > 0 && !abuf) + return GPG_ERR_INV_ARG; + /* To support new blocksize, update cmac_generate_subkeys() then add new + blocksize here. */ + if (c->spec->blocksize != 16 && c->spec->blocksize != 8) + return GPG_ERR_INV_CIPHER_MODE; + + return _gcry_cmac_write (c, &c->u_mode.cmac, abuf, abuflen); +} + + +gcry_err_code_t +_gcry_cipher_cmac_get_tag (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen) +{ + return cmac_tag (c, &c->u_mode.cmac, outtag, taglen, 0); +} + + +gcry_err_code_t +_gcry_cipher_cmac_check_tag (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen) +{ + return cmac_tag (c, &c->u_mode.cmac, (unsigned char *) intag, taglen, 1); +} + +gcry_err_code_t +_gcry_cipher_cmac_set_subkeys (gcry_cipher_hd_t c) +{ + return _gcry_cmac_generate_subkeys (c, &c->u_mode.cmac); +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-ctr.c b/comm/third_party/libgcrypt/cipher/cipher-ctr.c new file mode 100644 index 0000000000..5f0afc2f88 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-ctr.c @@ -0,0 +1,120 @@ +/* cipher-ctr.c - Generic CTR mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_ctr_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + size_t n; + int i; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + size_t nblocks; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + burn = 0; + + /* First process a left over encrypted counter. */ + if (c->unused) + { + gcry_assert (c->unused < blocksize); + i = blocksize - c->unused; + n = c->unused > inbuflen ? inbuflen : c->unused; + buf_xor(outbuf, inbuf, &c->lastiv[i], n); + c->unused -= n; + inbuf += n; + outbuf += n; + inbuflen -= n; + } + + /* Use a bulk method if available. */ + nblocks = inbuflen >> blocksize_shift; + if (nblocks && c->bulk.ctr_enc) + { + c->bulk.ctr_enc (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks); + inbuf += nblocks << blocksize_shift; + outbuf += nblocks << blocksize_shift; + inbuflen -= nblocks << blocksize_shift; + } + + /* If we don't have a bulk method use the standard method. We also + use this method for the a remaining partial block. */ + if (inbuflen) + { + unsigned char tmp[MAX_BLOCKSIZE]; + + n = blocksize; + do + { + nburn = enc_fn (&c->context.c, tmp, c->u_ctr.ctr); + burn = nburn > burn ? nburn : burn; + + cipher_block_add(c->u_ctr.ctr, 1, blocksize); + + if (inbuflen < blocksize) + break; + cipher_block_xor(outbuf, inbuf, tmp, blocksize); + + inbuflen -= n; + outbuf += n; + inbuf += n; + } + while (inbuflen); + + if (inbuflen) + { + n = inbuflen; + buf_xor(outbuf, inbuf, tmp, inbuflen); + + inbuflen -= n; + outbuf += n; + inbuf += n; + } + + /* Save the unused bytes of the counter. */ + c->unused = blocksize - n; + if (c->unused) + buf_cpy (c->lastiv+n, tmp+n, c->unused); + + wipememory (tmp, sizeof tmp); + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-eax.c b/comm/third_party/libgcrypt/cipher/cipher-eax.c new file mode 100644 index 0000000000..08f815a9e4 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-eax.c @@ -0,0 +1,289 @@ +/* cipher-eax.c - EAX implementation + * Copyright (C) 2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_eax_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + { + err = _gcry_cipher_eax_set_nonce (c, NULL, 0); + if (err != 0) + return err; + } + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Since checksumming is done after encryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for checksumming. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen); + if (err != 0) + return err; + + err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, outbuf, + currlen); + if (err != 0) + return err; + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_eax_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + { + err = _gcry_cipher_eax_set_nonce (c, NULL, 0); + if (err != 0) + return err; + } + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Since checksumming is done before decryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for decryption. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, inbuf, + currlen); + if (err != 0) + return err; + + err = _gcry_cipher_ctr_encrypt (c, outbuf, outbuflen, inbuf, currlen); + if (err != 0) + return err; + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_eax_authenticate (gcry_cipher_hd_t c, + const byte * aadbuf, size_t aadbuflen) +{ + gcry_err_code_t err; + + if (c->marks.tag) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + { + err = _gcry_cipher_eax_set_nonce (c, NULL, 0); + if (err != 0) + return err; + } + + return _gcry_cmac_write (c, &c->u_mode.eax.cmac_header, aadbuf, aadbuflen); +} + + +gcry_err_code_t +_gcry_cipher_eax_setkey (gcry_cipher_hd_t c) +{ + gcry_err_code_t err; + + err = _gcry_cmac_generate_subkeys (c, &c->u_mode.eax.cmac_header); + if (err != 0) + return err; + + buf_cpy (c->u_mode.eax.cmac_ciphertext.subkeys, + c->u_mode.eax.cmac_header.subkeys, + sizeof(c->u_mode.eax.cmac_header.subkeys)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_eax_set_nonce (gcry_cipher_hd_t c, const byte *nonce, + size_t noncelen) +{ + gcry_cmac_context_t nonce_cmac; + unsigned char initbuf[MAX_BLOCKSIZE]; + gcry_err_code_t err; + + c->marks.iv = 0; + c->marks.tag = 0; + + _gcry_cmac_reset (&c->u_mode.eax.cmac_header); + _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext); + + /* Calculate nonce CMAC */ + + memset(&nonce_cmac, 0, sizeof(nonce_cmac)); + memset(&initbuf, 0, sizeof(initbuf)); + + buf_cpy (&nonce_cmac.subkeys, c->u_mode.eax.cmac_header.subkeys, + sizeof(c->u_mode.eax.cmac_header.subkeys)); + + err = _gcry_cmac_write (c, &nonce_cmac, initbuf, c->spec->blocksize); + if (err != 0) + return err; + + if (noncelen != 0) + { + err = _gcry_cmac_write (c, &nonce_cmac, nonce, noncelen); + if (err != 0) + return err; + } + + err = _gcry_cmac_final (c, &nonce_cmac); + if (err != 0) + return err; + + cipher_block_cpy (c->u_iv.iv, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE); + cipher_block_cpy (c->u_ctr.ctr, nonce_cmac.u_iv.iv, MAX_BLOCKSIZE); + + wipememory (&nonce_cmac, sizeof(nonce_cmac)); + + /* Prepare header CMAC */ + + initbuf[c->spec->blocksize - 1] = 1; + err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_header, initbuf, + c->spec->blocksize); + if (err != 0) + return err; + + /* Prepare ciphertext CMAC */ + + initbuf[c->spec->blocksize - 1] = 2; + err = _gcry_cmac_write (c, &c->u_mode.eax.cmac_ciphertext, initbuf, + c->spec->blocksize); + if (err != 0) + return err; + + c->marks.iv = 1; + c->marks.tag = 0; + + return 0; +} + + +static gcry_err_code_t +_gcry_cipher_eax_tag (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, int check) +{ + gcry_err_code_t err; + + if (!c->marks.tag) + { + err = _gcry_cmac_final (c, &c->u_mode.eax.cmac_header); + if (err != 0) + return err; + + err = _gcry_cmac_final (c, &c->u_mode.eax.cmac_ciphertext); + if (err != 0) + return err; + + cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_header.u_iv.iv, + MAX_BLOCKSIZE); + cipher_block_xor_1 (c->u_iv.iv, c->u_mode.eax.cmac_ciphertext.u_iv.iv, + MAX_BLOCKSIZE); + + _gcry_cmac_reset (&c->u_mode.eax.cmac_header); + _gcry_cmac_reset (&c->u_mode.eax.cmac_ciphertext); + + c->marks.tag = 1; + } + + if (!check) + { + if (outbuflen > c->spec->blocksize) + outbuflen = c->spec->blocksize; + + /* NB: We already checked that OUTBUF is large enough to hold + * the result or has valid truncated length. */ + memcpy (outbuf, c->u_iv.iv, outbuflen); + } + else + { + /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF + * and thus we need to compare its length first. */ + if (!(outbuflen <= c->spec->blocksize) + || !buf_eq_const (outbuf, c->u_iv.iv, outbuflen)) + return GPG_ERR_CHECKSUM; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_eax_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + return _gcry_cipher_eax_tag (c, outtag, taglen, 0); +} + +gcry_err_code_t +_gcry_cipher_eax_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_eax_tag (c, (unsigned char *) intag, taglen, 1); +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S new file mode 100644 index 0000000000..a801a5e57b --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv7-neon.S @@ -0,0 +1,341 @@ +/* cipher-gcm-armv7-neon.S - ARM/NEON accelerated GHASH + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +.syntax unified +.fpu neon +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* Constants */ + +.align 4 +gcry_gcm_reduction_constant: +.Lrconst64: + .quad 0xc200000000000000 + +/* Register macros */ + +#define rhash q0 +#define rhash_l d0 +#define rhash_h d1 + +#define rh1 q1 +#define rh1_l d2 +#define rh1_h d3 + +#define rbuf q2 +#define rbuf_l d4 +#define rbuf_h d5 + +#define rbuf1 q3 +#define rbuf1_l d6 +#define rbuf1_h d7 + +#define t0q q4 +#define t0l d8 +#define t0h d9 + +#define t1q q5 +#define t1l d10 +#define t1h d11 + +#define t2q q6 +#define t2l d12 +#define t2h d13 + +#define t3q q7 +#define t3l d14 +#define t3h d15 + +/* q8 */ +#define k16 d16 +#define k32 d17 + +/* q9 */ +#define k48 d18 + +#define k0 q10 + +#define rr0 q11 +#define rr0_l d22 +#define rr0_h d23 + +#define rr1 q12 +#define rr1_l d24 +#define rr1_h d25 + +#define rt0 q13 +#define rt0_l d26 +#define rt0_h d27 + +#define rt1 q14 +#define rt1_l d28 +#define rt1_h d29 + +#define rrconst q15 +#define rrconst_l d30 +#define rrconst_h d31 + +/* Macro for 64x64=>128 carry-less multiplication using vmull.p8 instruction. + * + * From "Câmara, D.; Gouvêa, C. P. L.; López, J. & Dahab, R. Fast Software + * Polynomial Multiplication on ARM Processors using the NEON Engine. The + * Second International Workshop on Modern Cryptography and Security + * Engineering — MoCrySEn, 2013". */ + +#define vmull_p64(rq, rl, rh, ad, bd) \ + vext.8 t0l, ad, ad, $1; \ + vmull.p8 t0q, t0l, bd; \ + vext.8 rl, bd, bd, $1; \ + vmull.p8 rq, ad, rl; \ + vext.8 t1l, ad, ad, $2; \ + vmull.p8 t1q, t1l, bd; \ + vext.8 t3l, bd, bd, $2; \ + vmull.p8 t3q, ad, t3l; \ + vext.8 t2l, ad, ad, $3; \ + vmull.p8 t2q, t2l, bd; \ + veor t0q, t0q, rq; \ + vext.8 rl, bd, bd, $3; \ + vmull.p8 rq, ad, rl; \ + veor t1q, t1q, t3q; \ + vext.8 t3l, bd, bd, $4; \ + vmull.p8 t3q, ad, t3l; \ + veor t0l, t0l, t0h; \ + vand t0h, t0h, k48; \ + veor t1l, t1l, t1h; \ + vand t1h, t1h, k32; \ + veor t2q, t2q, rq; \ + veor t0l, t0l, t0h; \ + veor t1l, t1l, t1h; \ + veor t2l, t2l, t2h; \ + vand t2h, t2h, k16; \ + veor t3l, t3l, t3h; \ + vmov.i64 t3h, $0; \ + vext.8 t0q, t0q, t0q, $15; \ + veor t2l, t2l, t2h; \ + vext.8 t1q, t1q, t1q, $14; \ + vmull.p8 rq, ad, bd; \ + vext.8 t2q, t2q, t2q, $13; \ + vext.8 t3q, t3q, t3q, $12; \ + veor t0q, t0q, t1q; \ + veor t2q, t2q, t3q; \ + veor rq, rq, t0q; \ + veor rq, rq, t2q; + +/* GHASH macros. + * + * See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + +/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) + * Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'. + */ +#define PMUL_128x128(r0, r1, a, b, t1, t2, interleave_op) \ + veor t1##_h, b##_l, b##_h; \ + veor t1##_l, a##_l, a##_h; \ + vmull_p64( r0, r0##_l, r0##_h, a##_l, b##_l ); \ + vmull_p64( r1, r1##_l, r1##_h, a##_h, b##_h ); \ + vmull_p64( t2, t2##_h, t2##_l, t1##_h, t1##_l ); \ + interleave_op; \ + veor t2, r0; \ + veor t2, r1; \ + veor r0##_h, t2##_l; \ + veor r1##_l, t2##_h; + +/* Reduction using Xor and Shift. + * Input: 'r0:r1', Output: 'a' + * + * See "Shay Gueron, Michael E. Kounavis. Intel Carry-Less Multiplication + * Instruction and its Usage for Computing the GCM Mode" for details. + */ +#define REDUCTION(a, r0, r1, t, interleave_op) \ + vshl.u32 t0q, r0, #31; \ + vshl.u32 t1q, r0, #30; \ + vshl.u32 t2q, r0, #25; \ + veor t0q, t0q, t1q; \ + veor t0q, t0q, t2q; \ + vext.8 t, t0q, k0, #4; \ + vext.8 t0q, k0, t0q, #(16-12); \ + veor r0, r0, t0q; \ + interleave_op; \ + vshr.u32 t0q, r0, #1; \ + vshr.u32 t1q, r0, #2; \ + vshr.u32 t2q, r0, #7; \ + veor t0q, t0q, t1q; \ + veor t0q, t0q, t2q; \ + veor t0q, t0q, t; \ + veor r0, r0, t0q; \ + veor a, r0, r1; + +#define _(...) __VA_ARGS__ +#define __ _() + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result, + * const byte *buf, size_t nblocks); + */ +.align 3 +.globl _gcry_ghash_armv7_neon +.type _gcry_ghash_armv7_neon,%function; +_gcry_ghash_armv7_neon: + /* input: + * r0: gcm_key + * r1: result/hash + * r2: buf + * r3: nblocks + */ + push {r4-r6, lr} + + cmp r3, #0 + beq .Ldo_nothing + + vpush {q4-q7} + + vld1.64 {rhash}, [r1] + vld1.64 {rh1}, [r0] + + vrev64.8 rhash, rhash /* byte-swap */ + + vmov.i64 k0, #0x0 + vmov.i64 k16, #0xffff + vmov.i64 k32, #0xffffffff + vmov.i64 k48, #0xffffffffffff + + vext.8 rhash, rhash, rhash, #8 + + /* Handle remaining blocks. */ + + vld1.64 {rbuf}, [r2]! + subs r3, r3, #1 + + vrev64.8 rbuf, rbuf /* byte-swap */ + vext.8 rbuf, rbuf, rbuf, #8 + + veor rhash, rhash, rbuf + + beq .Lend + +.Loop: + vld1.64 {rbuf}, [r2]! + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(vrev64.8 rbuf, rbuf)) + REDUCTION(rhash, rr0, rr1, rt0, _(vext.8 rbuf, rbuf, rbuf, #8)) + subs r3, r3, #1 + veor rhash, rhash, rbuf + + bne .Loop + +.Lend: + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, rt1, _(CLEAR_REG(rbuf))) + REDUCTION(rhash, rr0, rr1, rt0, _(CLEAR_REG(rh1))) + +.Ldone: + CLEAR_REG(rr1) + vrev64.8 rhash, rhash /* byte-swap */ + CLEAR_REG(rt0) + CLEAR_REG(rr0) + vext.8 rhash, rhash, rhash, #8 + CLEAR_REG(rt1) + CLEAR_REG(t0q) + CLEAR_REG(t1q) + CLEAR_REG(t2q) + CLEAR_REG(t3q) + vst1.64 {rhash}, [r1] + CLEAR_REG(rhash) + + vpop {q4-q7} + +.Ldo_nothing: + mov r0, #0 + pop {r4-r6, pc} +.size _gcry_ghash_armv7_neon,.-_gcry_ghash_armv7_neon; + + +/* + * void _gcry_ghash_armv7_neon (void *gcm_key); + */ +.align 3 +.globl _gcry_ghash_setup_armv7_neon +.type _gcry_ghash_setup_armv7_neon,%function; +_gcry_ghash_setup_armv7_neon: + /* input: + * r0: gcm_key + */ + + vpush {q4-q7} + + GET_DATA_POINTER(r2, .Lrconst64, r3) + + vld1.64 {rrconst_h}, [r2] + +#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \ + /* H <<< 1 */ \ + vshr.s64 ma, ib, #63; \ + vshr.u64 oa, ib, #63; \ + vshr.u64 ob, ia, #63; \ + vand ma, const_d; \ + vshl.u64 ib, ib, #1; \ + vshl.u64 ia, ia, #1; \ + vorr ob, ib; \ + vorr oa, ia; \ + veor ob, ma; \ + vst1.64 {oa, ob}, [r_out] + + vld1.64 {rhash}, [r0] + vrev64.8 rhash, rhash /* byte-swap */ + vext.8 rhash, rhash, rhash, #8 + + vmov rbuf1, rhash + GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */ + + CLEAR_REG(rh1) + CLEAR_REG(rhash) + CLEAR_REG(rbuf1) + CLEAR_REG(rrconst) + vpop {q4-q7} + bx lr +.size _gcry_ghash_setup_armv7_neon,.-_gcry_ghash_setup_armv7_neon; + +#endif diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S new file mode 100644 index 0000000000..1de66a1626 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch32-ce.S @@ -0,0 +1,433 @@ +/* cipher-gcm-armv8-aarch32-ce.S - ARM/CE accelerated GHASH + * Copyright (C) 2016 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) + +.syntax unified +.arch armv8-a +.fpu crypto-neon-fp-armv8 +.arm + +.text + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +/* Constants */ + +.align 4 +gcry_gcm_reduction_constant: +.Lrconst64: + .quad 0xc200000000000000 + + +/* Register macros */ + +#define rhash q0 +#define rhash_l d0 +#define rhash_h d1 + +#define rh1 q1 +#define rh1_l d2 +#define rh1_h d3 + +#define rbuf q2 +#define rbuf_l d4 +#define rbuf_h d5 + +#define rbuf1 q3 +#define rbuf1_l d6 +#define rbuf1_h d7 + +#define rbuf2 q4 +#define rbuf2_l d8 +#define rbuf2_h d9 + +#define rbuf3 q5 +#define rbuf3_l d10 +#define rbuf3_h d11 + +#define rh2 q6 +#define rh2_l d12 +#define rh2_h d13 + +#define rh3 q7 +#define rh3_l d14 +#define rh3_h d15 + +#define rh4 q8 +#define rh4_l d16 +#define rh4_h d17 + +#define rr2 q9 +#define rr2_l d18 +#define rr2_h d19 + +#define rr3 q10 +#define rr3_l d20 +#define rr3_h d21 + +#define rr0 q11 +#define rr0_l d22 +#define rr0_h d23 + +#define rr1 q12 +#define rr1_l d24 +#define rr1_h d25 + +#define rt0 q13 +#define rt0_l d26 +#define rt0_h d27 + +#define rt1 q14 +#define rt1_l d28 +#define rt1_h d29 + +#define rrconst q15 +#define rrconst_l d30 +#define rrconst_h d31 + +/* GHASH macros */ + +/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + +/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) + * Note: 'r1' may be 'a' or 'b', 'r0' must not be either 'a' or 'b'. + */ +#define PMUL_128x128(r0, r1, a, b, t, interleave_op) \ + veor t##_h, b##_l, b##_h; \ + veor t##_l, a##_l, a##_h; \ + vmull.p64 r0, a##_l, b##_l; \ + vmull.p64 r1, a##_h, b##_h; \ + vmull.p64 t, t##_h, t##_l; \ + interleave_op; \ + veor t, r0; \ + veor t, r1; \ + veor r0##_h, t##_l; \ + veor r1##_l, t##_h; + +/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A) + * Note: 'r1A' may be 'aA' or 'bA', 'r0A' must not be either 'aA' or 'bA'. + * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B) + * Note: 'r1B' may be 'aB' or 'bB', 'r0B' must not be either 'aB' or 'bB'. + */ +#define PMUL_128x128_2(r0A, r1A, aA, bA, r0B, r1B, aB, bB, tA, tB, interleave_op) \ + veor tA##_h, bA##_l, bA##_h; \ + veor tA##_l, aA##_l, aA##_h; \ + veor tB##_h, bB##_l, bB##_h; \ + veor tB##_l, aB##_l, aB##_h; \ + vmull.p64 r0A, aA##_l, bA##_l; \ + vmull.p64 r1A, aA##_h, bA##_h; \ + vmull.p64 tA, tA##_h, tA##_l; \ + vmull.p64 r0B, aB##_l, bB##_l; \ + vmull.p64 r1B, aB##_h, bB##_h; \ + vmull.p64 tB, tB##_h, tB##_l; \ + interleave_op; \ + veor tA, r0A; \ + veor tA, r1A; \ + veor tB, r0B; \ + veor tB, r1B; \ + veor r0A##_h, tA##_l; \ + veor r1A##_l, tA##_h; \ + veor r0B##_h, tB##_l; \ + veor r1B##_l, tB##_h; \ + +/* Input: 'r0:r1', Output: 'a' */ +#define REDUCTION(a, r0, r1, rconst, t, interleave_op) \ + vmull.p64 t, r0##_l, rconst; \ + veor r0##_h, t##_l; \ + veor r1##_l, t##_h; \ + interleave_op; \ + vmull.p64 t, r0##_h, rconst; \ + veor r1, t; \ + veor a, r0, r1; + +#define _(...) __VA_ARGS__ +#define __ _() + +/* Other functional macros */ + +#define CLEAR_REG(reg) veor reg, reg; + + +/* + * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, + * const byte *buf, size_t nblocks, + * void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_armv8_ce_pmull +.type _gcry_ghash_armv8_ce_pmull,%function; +_gcry_ghash_armv8_ce_pmull: + /* input: + * r0: gcm_key + * r1: result/hash + * r2: buf + * r3: nblocks + * %st+0: gcm_table + */ + push {r4-r6, lr} + + cmp r3, #0 + beq .Ldo_nothing + + GET_DATA_POINTER(r4, .Lrconst64, lr) + + vld1.64 {rhash}, [r1] + vld1.64 {rh1}, [r0] + + vrev64.8 rhash, rhash /* byte-swap */ + vld1.64 {rrconst_h}, [r4] + vext.8 rhash, rhash, rhash, #8 + + cmp r3, #4 + blo .Less_than_4 + + /* Bulk processing of 4 blocks per loop iteration. */ + + ldr r5, [sp, #(4*4)]; + add r6, r5, #32 + + vpush {q4-q7} + + vld1.64 {rh2-rh3}, [r5] + vld1.64 {rh4}, [r6] + + vld1.64 {rbuf-rbuf1}, [r2]! + sub r3, r3, #4 + vld1.64 {rbuf2-rbuf3}, [r2]! + + cmp r3, #4 + vrev64.8 rbuf, rbuf /* byte-swap */ + vrev64.8 rbuf1, rbuf1 /* byte-swap */ + vrev64.8 rbuf2, rbuf2 /* byte-swap */ + vrev64.8 rbuf3, rbuf3 /* byte-swap */ + + vext.8 rbuf, rbuf, rbuf, #8 + vext.8 rbuf1, rbuf1, rbuf1, #8 + vext.8 rbuf2, rbuf2, rbuf2, #8 + vext.8 rbuf3, rbuf3, rbuf3, #8 + veor rhash, rhash, rbuf /* in0 ^ hash */ + + blo .Lend_4 + +.Loop_4: + /* (in0 ^ hash) * H⁴ => rr2:rr3 */ + /* (in1) * H³ => rr0:rr1 */ + PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __) + + vld1.64 {rbuf-rbuf1}, [r2]! + sub r3, r3, #4 + veor rr0, rr0, rr2 + veor rr1, rr1, rr3 + + /* (in2) * H² => rr2:rr3 */ + /* (in3) * H¹ => rhash:rbuf3 */ + PMUL_128x128_2(rr2, rr3, rbuf2, rh2, rhash, rbuf3, rbuf3, rh1, rt0, rt1, + _(vrev64.8 rbuf, rbuf)) + + vld1.64 {rbuf2}, [r2]! + + vrev64.8 rbuf1, rbuf1 + veor rr0, rr0, rr2 + veor rr1, rr1, rr3 + + cmp r3, #4 + vext.8 rbuf, rbuf, rbuf, #8 + vext.8 rbuf1, rbuf1, rbuf1, #8 + + veor rr0, rr0, rhash + veor rr1, rr1, rbuf3 + + vld1.64 {rbuf3}, [r2]! + + REDUCTION(rhash, rr0, rr1, rrconst_h, rt1, + _(vrev64.8 rbuf2, rbuf2; + vrev64.8 rbuf3, rbuf3)) + + vext.8 rbuf2, rbuf2, rbuf2, #8 + vext.8 rbuf3, rbuf3, rbuf3, #8 + veor rhash, rhash, rbuf /* in0 ^ hash */ + + bhs .Loop_4 + +.Lend_4: + /* (in0 ^ hash) * H⁴ => rr2:rr3 */ + /* (in1) * H³ => rr0:rr1 */ + PMUL_128x128_2(rr0, rr1, rbuf1, rh3, rr2, rr3, rhash, rh4, rt1, rt0, __) + + /* (in2) * H² => rhash:rbuf */ + /* (in3) * H¹ => rbuf1:rbuf2 */ + PMUL_128x128_2(rhash, rbuf, rbuf2, rh2, rbuf1, rbuf2, rbuf3, rh1, rt0, rt1, + _(veor rr0, rr0, rr2; + veor rr1, rr1, rr3)) + + veor rr0, rr0, rhash + veor rr1, rr1, rbuf + + veor rr0, rr0, rbuf1 + veor rr1, rr1, rbuf2 + + REDUCTION(rhash, rr0, rr1, rrconst_h, rt1, + _(CLEAR_REG(rr2); + CLEAR_REG(rr3); + CLEAR_REG(rbuf1); + CLEAR_REG(rbuf2); + CLEAR_REG(rbuf3); + CLEAR_REG(rh2); + CLEAR_REG(rh3); + CLEAR_REG(rh4))) + + vpop {q4-q7} + + cmp r3, #0 + beq .Ldone + +.Less_than_4: + /* Handle remaining blocks. */ + + vld1.64 {rbuf}, [r2]! + subs r3, r3, #1 + + vrev64.8 rbuf, rbuf /* byte-swap */ + vext.8 rbuf, rbuf, rbuf, #8 + + veor rhash, rhash, rbuf + + beq .Lend + +.Loop: + vld1.64 {rbuf}, [r2]! + subs r3, r3, #1 + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(vrev64.8 rbuf, rbuf)) + REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(vext.8 rbuf, rbuf, rbuf, #8)) + veor rhash, rhash, rbuf + + bne .Loop + +.Lend: + PMUL_128x128(rr0, rr1, rhash, rh1, rt0, _(CLEAR_REG(rbuf))) + REDUCTION(rhash, rr0, rr1, rrconst_h, rt0, _(CLEAR_REG(rh1))) + +.Ldone: + CLEAR_REG(rr1) + vrev64.8 rhash, rhash /* byte-swap */ + CLEAR_REG(rt0) + CLEAR_REG(rr0) + vext.8 rhash, rhash, rhash, #8 + CLEAR_REG(rt1) + vst1.64 {rhash}, [r1] + CLEAR_REG(rhash) + +.Ldo_nothing: + mov r0, #0 + pop {r4-r6, pc} +.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull; + + +/* + * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_setup_armv8_ce_pmull +.type _gcry_ghash_setup_armv8_ce_pmull,%function; +_gcry_ghash_setup_armv8_ce_pmull: + /* input: + * r0: gcm_key + * r1: gcm_table + */ + + vpush {q4-q7} + + GET_DATA_POINTER(r2, .Lrconst64, r3) + + vld1.64 {rrconst_h}, [r2] + +#define GCM_LSH_1(r_out, ia, ib, const_d, oa, ob, ma) \ + /* H <<< 1 */ \ + vshr.s64 ma, ib, #63; \ + vshr.u64 oa, ib, #63; \ + vshr.u64 ob, ia, #63; \ + vand ma, const_d; \ + vshl.u64 ib, ib, #1; \ + vshl.u64 ia, ia, #1; \ + vorr ob, ib; \ + vorr oa, ia; \ + veor ob, ma; \ + vst1.64 {oa, ob}, [r_out] + + vld1.64 {rhash}, [r0] + vrev64.8 rhash, rhash /* byte-swap */ + vext.8 rhash, rhash, rhash, #8 + + vmov rbuf1, rhash + GCM_LSH_1(r0, rhash_l, rhash_h, rrconst_h, rh1_l, rh1_h, rt1_l) /* H<<<1 */ + + /* H² */ + PMUL_128x128(rr0, rr1, rbuf1, rh1, rt0, __) + REDUCTION(rh2, rr0, rr1, rrconst_h, rt0, __) + vmov rhash, rh2 + GCM_LSH_1(r1, rh2_l, rh2_h, rrconst_h, rbuf1_l, rbuf1_h, rt1_l) /* H²<<<1 */ + add r1, r1, #16 + + /* H³ */ + PMUL_128x128(rr0, rr1, rhash, rh1, rt1, __) + REDUCTION(rh3, rr0, rr1, rrconst_h, rt1, __) + + /* H⁴ */ + PMUL_128x128(rr0, rr1, rhash, rbuf1, rt0, __) + REDUCTION(rh4, rr0, rr1, rrconst_h, rt0, __) + + GCM_LSH_1(r1, rh3_l, rh3_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H³<<<1 */ + add r1, r1, #16 + GCM_LSH_1(r1, rh4_l, rh4_h, rrconst_h, rt0_l, rt0_h, rt1_l) /* H⁴<<<1 */ + + CLEAR_REG(rt0) + CLEAR_REG(rt1) + CLEAR_REG(rr1) + CLEAR_REG(rr0) + CLEAR_REG(rh1) + CLEAR_REG(rh2) + CLEAR_REG(rh3) + CLEAR_REG(rh4) + CLEAR_REG(rhash) + CLEAR_REG(rbuf1) + CLEAR_REG(rrconst) + vpop {q4-q7} + bx lr +.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull; + +#endif diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S new file mode 100644 index 0000000000..877207d3e5 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-armv8-aarch64-ce.S @@ -0,0 +1,424 @@ +/* cipher-gcm-armv8-aarch64-ce.S - ARM/CE accelerated GHASH + * Copyright (C) 2016 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include "asm-common-aarch64.h" + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) + +.cpu generic+simd+crypto + +.text + + +/* Constants */ + +.align 4 +gcry_gcm_reduction_constant: +.Lrconst: + .quad 0x87 + + +/* Register macros */ + +#define rhash v0 +#define rr0 v1 +#define rr1 v2 +#define rbuf v3 +#define rbuf1 v4 +#define rbuf2 v5 +#define rbuf3 v6 +#define rbuf4 v7 +#define rbuf5 v8 +#define rr2 v9 +#define rr3 v10 +#define rr4 v11 +#define rr5 v12 +#define rr6 v13 +#define rr7 v14 +#define rr8 v15 +#define rr9 v16 + +#define rrconst v18 +#define rh1 v19 +#define rh2 v20 +#define rh3 v21 +#define rh4 v22 +#define rh5 v23 +#define rh6 v24 +#define t0 v25 +#define t1 v26 +#define t2 v27 +#define t3 v28 +#define t4 v29 +#define t5 v30 +#define vZZ v31 + +/* GHASH macros */ + +/* See "Gouvêa, C. P. L. & López, J. Implementing GCM on ARMv8. Topics in + * Cryptology — CT-RSA 2015" for details. + */ + +/* Input: 'a' and 'b', Output: 'r0:r1' (low 128-bits in r0, high in r1) */ +#define PMUL_128x128(r0, r1, a, b, T0, T1, interleave_op) \ + ext T0.16b, b.16b, b.16b, #8; \ + pmull r0.1q, a.1d, b.1d; \ + pmull2 r1.1q, a.2d, b.2d; \ + pmull T1.1q, a.1d, T0.1d; \ + pmull2 T0.1q, a.2d, T0.2d; \ + interleave_op; \ + eor T0.16b, T0.16b, T1.16b; \ + ext T1.16b, vZZ.16b, T0.16b, #8; \ + ext T0.16b, T0.16b, vZZ.16b, #8; \ + eor r0.16b, r0.16b, T1.16b; \ + eor r1.16b, r1.16b, T0.16b; + +/* Input: 'aA' and 'bA', Output: 'r0A:r1A' (low 128-bits in r0A, high in r1A) + * Input: 'aB' and 'bB', Output: 'r0B:r1B' (low 128-bits in r0B, high in r1B) + * Input: 'aC' and 'bC', Output: 'r0C:r1C' (low 128-bits in r0C, high in r1C) + */ +#define PMUL_128x128_3(r0A, r1A, aA, bA, t0A, t1A, \ + r0B, r1B, aB, bB, t0B, t1B, \ + r0C, r1C, aC, bC, t0C, t1C, interleave_op) \ + ext t0A.16b, bA.16b, bA.16b, #8; \ + pmull r0A.1q, aA.1d, bA.1d; \ + pmull2 r1A.1q, aA.2d, bA.2d; \ + ext t0B.16b, bB.16b, bB.16b, #8; \ + pmull r0B.1q, aB.1d, bB.1d; \ + pmull2 r1B.1q, aB.2d, bB.2d; \ + ext t0C.16b, bC.16b, bC.16b, #8; \ + pmull r0C.1q, aC.1d, bC.1d; \ + pmull2 r1C.1q, aC.2d, bC.2d; \ + pmull t1A.1q, aA.1d, t0A.1d; \ + pmull2 t0A.1q, aA.2d, t0A.2d; \ + pmull t1B.1q, aB.1d, t0B.1d; \ + pmull2 t0B.1q, aB.2d, t0B.2d; \ + pmull t1C.1q, aC.1d, t0C.1d; \ + pmull2 t0C.1q, aC.2d, t0C.2d; \ + eor t0A.16b, t0A.16b, t1A.16b; \ + eor t0B.16b, t0B.16b, t1B.16b; \ + eor t0C.16b, t0C.16b, t1C.16b; \ + interleave_op; \ + ext t1A.16b, vZZ.16b, t0A.16b, #8; \ + ext t0A.16b, t0A.16b, vZZ.16b, #8; \ + ext t1B.16b, vZZ.16b, t0B.16b, #8; \ + ext t0B.16b, t0B.16b, vZZ.16b, #8; \ + ext t1C.16b, vZZ.16b, t0C.16b, #8; \ + ext t0C.16b, t0C.16b, vZZ.16b, #8; \ + eor r0A.16b, r0A.16b, t1A.16b; \ + eor r1A.16b, r1A.16b, t0A.16b; \ + eor r0B.16b, r0B.16b, t1B.16b; \ + eor r1B.16b, r1B.16b, t0B.16b; \ + eor r0C.16b, r0C.16b, t1C.16b; \ + eor r1C.16b, r1C.16b, t0C.16b; \ + +/* Input: 'r0:r1', Output: 'a' */ +#define REDUCTION(a, r0, r1, rconst, T0, T1, interleave_op1, interleave_op2, \ + interleave_op3) \ + pmull2 T0.1q, r1.2d, rconst.2d; \ + interleave_op1; \ + ext T1.16b, T0.16b, vZZ.16b, #8; \ + ext T0.16b, vZZ.16b, T0.16b, #8; \ + interleave_op2; \ + eor r1.16b, r1.16b, T1.16b; \ + eor r0.16b, r0.16b, T0.16b; \ + pmull T0.1q, r1.1d, rconst.1d; \ + interleave_op3; \ + eor a.16b, r0.16b, T0.16b; + +/* Other functional macros */ + +#define _(...) __VA_ARGS__ +#define __ _() + +#define CLEAR_REG(reg) eor reg.16b, reg.16b, reg.16b; + +#define VPUSH_ABI \ + stp d8, d9, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + stp d10, d11, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + stp d12, d13, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); \ + stp d14, d15, [sp, #-16]!; \ + CFI_ADJUST_CFA_OFFSET(16); + +#define VPOP_ABI \ + ldp d14, d15, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + ldp d12, d13, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + ldp d10, d11, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); \ + ldp d8, d9, [sp], #16; \ + CFI_ADJUST_CFA_OFFSET(-16); + +/* + * unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, + * const byte *buf, size_t nblocks, + * void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_armv8_ce_pmull +ELF(.type _gcry_ghash_armv8_ce_pmull,%function;) +_gcry_ghash_armv8_ce_pmull: + /* input: + * x0: gcm_key + * x1: result/hash + * x2: buf + * x3: nblocks + * x4: gcm_table + */ + CFI_STARTPROC(); + + cbz x3, .Ldo_nothing; + + GET_DATA_POINTER(x5, .Lrconst) + + eor vZZ.16b, vZZ.16b, vZZ.16b + ld1 {rhash.16b}, [x1] + ld1 {rh1.16b}, [x0] + + rbit rhash.16b, rhash.16b /* bit-swap */ + ld1r {rrconst.2d}, [x5] + + cmp x3, #6 + b.lo .Less_than_6 + + add x6, x4, #64 + VPUSH_ABI + + ld1 {rh2.16b-rh5.16b}, [x4] + ld1 {rh6.16b}, [x6] + + sub x3, x3, #6 + + ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16) + ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16) + + rbit rbuf.16b, rbuf.16b /* bit-swap */ + rbit rbuf1.16b, rbuf1.16b /* bit-swap */ + rbit rbuf2.16b, rbuf2.16b /* bit-swap */ + rbit rbuf3.16b, rbuf3.16b /* bit-swap */ + rbit rbuf4.16b, rbuf4.16b /* bit-swap */ + rbit rbuf5.16b, rbuf5.16b /* bit-swap */ + eor rhash.16b, rhash.16b, rbuf.16b + + cmp x3, #6 + b.lo .Lend_6 + +.Loop_6: + + /* (in1) * H⁵ => rr0:rr1 */ + /* (in2) * H⁴ => rr2:rr3 */ + /* (in0 ^ hash) * H⁶ => rr4:rr5 */ + PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1, + rr2, rr3, rbuf2, rh4, t2, t3, + rr4, rr5, rhash, rh6, t4, t5, + _(sub x3, x3, #6)) + + ld1 {rbuf.16b-rbuf2.16b}, [x2], #(3*16) + cmp x3, #6 + + eor rr0.16b, rr0.16b, rr2.16b + eor rr1.16b, rr1.16b, rr3.16b + + /* (in3) * H³ => rr2:rr3 */ + /* (in4) * H² => rr6:rr7 */ + /* (in5) * H¹ => rr8:rr9 */ + PMUL_128x128_3(rr2, rr3, rbuf3, rh3, t0, t1, + rr6, rr7, rbuf4, rh2, t2, t3, + rr8, rr9, rbuf5, rh1, t4, t5, + _(eor rr0.16b, rr0.16b, rr4.16b; + eor rr1.16b, rr1.16b, rr5.16b)) + + eor rr0.16b, rr0.16b, rr2.16b + eor rr1.16b, rr1.16b, rr3.16b + rbit rbuf.16b, rbuf.16b + eor rr0.16b, rr0.16b, rr6.16b + eor rr1.16b, rr1.16b, rr7.16b + rbit rbuf1.16b, rbuf1.16b + eor rr0.16b, rr0.16b, rr8.16b + eor rr1.16b, rr1.16b, rr9.16b + ld1 {rbuf3.16b-rbuf5.16b}, [x2], #(3*16) + + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, + _(rbit rbuf2.16b, rbuf2.16b), + _(rbit rbuf3.16b, rbuf3.16b), + _(rbit rbuf4.16b, rbuf4.16b)) + + rbit rbuf5.16b, rbuf5.16b + eor rhash.16b, rhash.16b, rbuf.16b + + b.hs .Loop_6 + +.Lend_6: + + /* (in1) * H⁵ => rr0:rr1 */ + /* (in0 ^ hash) * H⁶ => rr2:rr3 */ + /* (in2) * H⁴ => rr4:rr5 */ + PMUL_128x128_3(rr0, rr1, rbuf1, rh5, t0, t1, + rr2, rr3, rhash, rh6, t2, t3, + rr4, rr5, rbuf2, rh4, t4, t5, + __) + eor rr0.16b, rr0.16b, rr2.16b + eor rr1.16b, rr1.16b, rr3.16b + eor rr0.16b, rr0.16b, rr4.16b + eor rr1.16b, rr1.16b, rr5.16b + + /* (in3) * H³ => rhash:rbuf */ + /* (in4) * H² => rr6:rr7 */ + /* (in5) * H¹ => rr8:rr9 */ + PMUL_128x128_3(rhash, rbuf, rbuf3, rh3, t0, t1, + rr6, rr7, rbuf4, rh2, t2, t3, + rr8, rr9, rbuf5, rh1, t4, t5, + _(CLEAR_REG(rh4); + CLEAR_REG(rh5); + CLEAR_REG(rh6))) + eor rr0.16b, rr0.16b, rhash.16b + eor rr1.16b, rr1.16b, rbuf.16b + eor rr0.16b, rr0.16b, rr6.16b + eor rr1.16b, rr1.16b, rr7.16b + eor rr0.16b, rr0.16b, rr8.16b + eor rr1.16b, rr1.16b, rr9.16b + + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, + _(CLEAR_REG(rh2); + CLEAR_REG(rh3); + CLEAR_REG(rr2); + CLEAR_REG(rbuf2); + CLEAR_REG(rbuf3)), + _(CLEAR_REG(rr3); + CLEAR_REG(rr4); + CLEAR_REG(rr5); + CLEAR_REG(rr6); + CLEAR_REG(rr7)), + _(CLEAR_REG(rr8); + CLEAR_REG(rr9); + CLEAR_REG(rbuf1); + CLEAR_REG(rbuf2))) + + CLEAR_REG(rbuf4) + CLEAR_REG(rbuf5) + CLEAR_REG(t2) + CLEAR_REG(t3) + CLEAR_REG(t4) + CLEAR_REG(t5) + + VPOP_ABI + + cbz x3, .Ldone + +.Less_than_6: + /* Handle remaining blocks. */ + + ld1 {rbuf.16b}, [x2], #16 + sub x3, x3, #1 + + rbit rbuf.16b, rbuf.16b /* bit-swap */ + + eor rhash.16b, rhash.16b, rbuf.16b + + cbz x3, .Lend + +.Loop: + PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(ld1 {rbuf.16b}, [x2], #16)) + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, + _(sub x3, x3, #1), + _(rbit rbuf.16b, rbuf.16b), + __) + eor rhash.16b, rhash.16b, rbuf.16b + + cbnz x3, .Loop + +.Lend: + PMUL_128x128(rr0, rr1, rh1, rhash, t0, t1, _(CLEAR_REG(rbuf))) + REDUCTION(rhash, rr0, rr1, rrconst, t0, t1, __, _(CLEAR_REG(rh1)), __) + +.Ldone: + CLEAR_REG(rr1) + CLEAR_REG(rr0) + rbit rhash.16b, rhash.16b /* bit-swap */ + CLEAR_REG(t0) + CLEAR_REG(t1) + + st1 {rhash.2d}, [x1] + CLEAR_REG(rhash) + +.Ldo_nothing: + mov x0, #0 + ret + CFI_ENDPROC() +ELF(.size _gcry_ghash_armv8_ce_pmull,.-_gcry_ghash_armv8_ce_pmull;) + + +/* + * void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); + */ +.align 3 +.globl _gcry_ghash_setup_armv8_ce_pmull +ELF(.type _gcry_ghash_setup_armv8_ce_pmull,%function;) +_gcry_ghash_setup_armv8_ce_pmull: + /* input: + * x0: gcm_key + * x1: gcm_table + */ + CFI_STARTPROC() + + GET_DATA_POINTER(x2, .Lrconst) + + eor vZZ.16b, vZZ.16b, vZZ.16b + + /* H¹ */ + ld1 {rh1.16b}, [x0] + rbit rh1.16b, rh1.16b + st1 {rh1.16b}, [x0] + + ld1r {rrconst.2d}, [x2] + + /* H² */ + PMUL_128x128(rr0, rr1, rh1, rh1, t0, t1, __) + REDUCTION(rh2, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H³ */ + PMUL_128x128(rr0, rr1, rh2, rh1, t0, t1, __) + REDUCTION(rh3, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H⁴ */ + PMUL_128x128(rr0, rr1, rh2, rh2, t0, t1, __) + REDUCTION(rh4, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H⁵ */ + PMUL_128x128(rr0, rr1, rh2, rh3, t0, t1, __) + REDUCTION(rh5, rr0, rr1, rrconst, t0, t1, __, __, __) + + /* H⁶ */ + PMUL_128x128(rr0, rr1, rh3, rh3, t0, t1, __) + REDUCTION(rh6, rr0, rr1, rrconst, t0, t1, __, __, __) + + st1 {rh2.16b-rh4.16b}, [x1], #(3*16) + st1 {rh5.16b-rh6.16b}, [x1] + + ret + CFI_ENDPROC() +ELF(.size _gcry_ghash_setup_armv8_ce_pmull,.-_gcry_ghash_setup_armv8_ce_pmull;) + +#endif diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c b/comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c new file mode 100644 index 0000000000..28165c653f --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-gcm-intel-pclmul.c @@ -0,0 +1,712 @@ +/* cipher-gcm-intel-pclmul.c - Intel PCLMUL accelerated Galois Counter Mode + * implementation + * Copyright (C) 2013-2014,2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +#ifdef GCM_USE_INTEL_PCLMUL + + +#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ +/* Prevent compiler from issuing SSE instructions between asm blocks. */ +# pragma GCC target("no-sse") +#endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif + + +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE + + +/* + Intel PCLMUL ghash based on white paper: + "Intel® Carry-Less Multiplication Instruction and its Usage for Computing the + GCM Mode - Rev 2.01"; Shay Gueron, Michael E. Kounavis. + */ +static ASM_FUNC_ATTR_INLINE void reduction(void) +{ + /* input: */ + + asm volatile (/* first phase of the reduction */ + "movdqa %%xmm3, %%xmm6\n\t" + "movdqa %%xmm3, %%xmm5\n\t" + "psllq $1, %%xmm6\n\t" /* packed right shifting << 63 */ + "pxor %%xmm3, %%xmm6\n\t" + "psllq $57, %%xmm5\n\t" /* packed right shifting << 57 */ + "psllq $62, %%xmm6\n\t" /* packed right shifting << 62 */ + "pxor %%xmm5, %%xmm6\n\t" /* xor the shifted versions */ + "pshufd $0x6a, %%xmm6, %%xmm5\n\t" + "pshufd $0xae, %%xmm6, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" /* first phase of the reduction + complete */ + + /* second phase of the reduction */ + "pxor %%xmm3, %%xmm1\n\t" /* xor the shifted versions */ + "psrlq $1, %%xmm3\n\t" /* packed left shifting >> 1 */ + "pxor %%xmm3, %%xmm6\n\t" + "psrlq $1, %%xmm3\n\t" /* packed left shifting >> 2 */ + "pxor %%xmm3, %%xmm1\n\t" + "psrlq $5, %%xmm3\n\t" /* packed left shifting >> 7 */ + "pxor %%xmm3, %%xmm6\n\t" + "pxor %%xmm6, %%xmm1\n\t" /* the result is in xmm1 */ + ::: "memory" ); +} + +static ASM_FUNC_ATTR_INLINE void gfmul_pclmul(void) +{ + /* Input: XMM0 and XMM1, Output: XMM1. Input XMM0 stays unmodified. + Input must be converted to little-endian. + */ + asm volatile (/* gfmul, xmm0 has operator a and xmm1 has operator b. */ + "pshufd $78, %%xmm0, %%xmm2\n\t" + "pshufd $78, %%xmm1, %%xmm4\n\t" + "pxor %%xmm0, %%xmm2\n\t" /* xmm2 holds a0+a1 */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds b0+b1 */ + + "movdqa %%xmm0, %%xmm3\n\t" + "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds a0*b0 */ + "pclmulqdq $17, %%xmm0, %%xmm1\n\t" /* xmm6 holds a1*b1 */ + "movdqa %%xmm3, %%xmm5\n\t" + "pclmulqdq $0, %%xmm2, %%xmm4\n\t" /* xmm4 holds (a0+a1)*(b0+b1) */ + + "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ + "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ + "movdqa %%xmm4, %%xmm5\n\t" + "psrldq $8, %%xmm4\n\t" + "pslldq $8, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm4, %%xmm1\n\t" /* holds the result of the + carry-less multiplication of xmm0 + by xmm1 */ + ::: "memory" ); + + reduction(); +} + +static ASM_FUNC_ATTR_INLINE void +gfmul_pclmul_aggr4(const void *buf, const void *h_1, const void *h_table, + const unsigned char *be_mask) +{ + /* Input: + Hash: XMM1 + Output: + Hash: XMM1 + */ + asm volatile (/* perform clmul and merge results... */ + "movdqu 2*16(%[h_table]), %%xmm2\n\t" /* Load H4 */ + "movdqu 0*16(%[buf]), %%xmm5\n\t" + "pshufb %[be_mask], %%xmm5\n\t" /* be => le */ + "pxor %%xmm5, %%xmm1\n\t" + + "pshufd $78, %%xmm2, %%xmm5\n\t" + "pshufd $78, %%xmm1, %%xmm4\n\t" + "pxor %%xmm2, %%xmm5\n\t" /* xmm5 holds 4:a0+a1 */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 4:b0+b1 */ + "movdqa %%xmm2, %%xmm3\n\t" + "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds 4:a0*b0 */ + "pclmulqdq $17, %%xmm2, %%xmm1\n\t" /* xmm1 holds 4:a1*b1 */ + "pclmulqdq $0, %%xmm5, %%xmm4\n\t" /* xmm4 holds 4:(a0+a1)*(b0+b1) */ + + "movdqu 1*16(%[h_table]), %%xmm5\n\t" /* Load H3 */ + "movdqu 1*16(%[buf]), %%xmm2\n\t" + "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ + + "pshufd $78, %%xmm5, %%xmm0\n\t" + "pshufd $78, %%xmm2, %%xmm7\n\t" + "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 3:a0+a1 */ + "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */ + "movdqa %%xmm5, %%xmm6\n\t" + "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */ + "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */ + "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */ + + "movdqu 2*16(%[buf]), %%xmm5\n\t" + "pshufb %[be_mask], %%xmm5\n\t" /* be => le */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4:(a0+a1)*(b0+b1) */ + + "movdqu 0*16(%[h_table]), %%xmm2\n\t" /* Load H2 */ + + "pshufd $78, %%xmm2, %%xmm0\n\t" + "pshufd $78, %%xmm5, %%xmm7\n\t" + "pxor %%xmm2, %%xmm0\n\t" /* xmm0 holds 2:a0+a1 */ + "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 2:b0+b1 */ + "movdqa %%xmm2, %%xmm6\n\t" + "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 2:a0*b0 */ + "pclmulqdq $17, %%xmm2, %%xmm5\n\t" /* xmm5 holds 2:a1*b1 */ + "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 2:(a0+a1)*(b0+b1) */ + + "movdqu 3*16(%[buf]), %%xmm2\n\t" + "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ + : + : [buf] "r" (buf), + [h_table] "r" (h_table), + [be_mask] "m" (*be_mask) + : "memory" ); + + asm volatile ("pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4:a0*b0 */ + "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4:(a0+a1)*(b0+b1) */ + + "movdqu %[h_1], %%xmm5\n\t" /* Load H1 */ + + "pshufd $78, %%xmm5, %%xmm0\n\t" + "pshufd $78, %%xmm2, %%xmm7\n\t" + "pxor %%xmm5, %%xmm0\n\t" /* xmm0 holds 1:a0+a1 */ + "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 1:b0+b1 */ + "movdqa %%xmm5, %%xmm6\n\t" + "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 1:a0*b0 */ + "pclmulqdq $17, %%xmm5, %%xmm2\n\t" /* xmm2 holds 1:a1*b1 */ + "pclmulqdq $0, %%xmm0, %%xmm7\n\t" /* xmm7 holds 1:(a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+4:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+4:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+4:(a0+a1)*(b0+b1) */ + + /* aggregated reduction... */ + "movdqa %%xmm3, %%xmm5\n\t" + "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ + "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ + "movdqa %%xmm4, %%xmm5\n\t" + "psrldq $8, %%xmm4\n\t" + "pslldq $8, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm4, %%xmm1\n\t" /* holds the result of the + carry-less multiplication of xmm0 + by xmm1 */ + : + : [h_1] "m" (*(const unsigned char *)h_1) + : "memory" ); + + reduction(); +} + +#ifdef __x86_64__ +static ASM_FUNC_ATTR_INLINE void +gfmul_pclmul_aggr8(const void *buf, const void *h_table) +{ + /* Input: + H¹: XMM0 + bemask: XMM15 + Hash: XMM1 + Output: + Hash: XMM1 + Inputs XMM0 and XMM15 stays unmodified. + */ + asm volatile (/* Load H6, H7, H8. */ + "movdqu 6*16(%[h_table]), %%xmm10\n\t" + "movdqu 5*16(%[h_table]), %%xmm9\n\t" + "movdqu 4*16(%[h_table]), %%xmm8\n\t" + + /* perform clmul and merge results... */ + "movdqu 0*16(%[buf]), %%xmm5\n\t" + "movdqu 1*16(%[buf]), %%xmm2\n\t" + "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ + "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ + "pxor %%xmm5, %%xmm1\n\t" + + "pshufd $78, %%xmm10, %%xmm5\n\t" + "pshufd $78, %%xmm1, %%xmm4\n\t" + "pxor %%xmm10, %%xmm5\n\t" /* xmm5 holds 8:a0+a1 */ + "pxor %%xmm1, %%xmm4\n\t" /* xmm4 holds 8:b0+b1 */ + "movdqa %%xmm10, %%xmm3\n\t" + "pclmulqdq $0, %%xmm1, %%xmm3\n\t" /* xmm3 holds 8:a0*b0 */ + "pclmulqdq $17, %%xmm10, %%xmm1\n\t" /* xmm1 holds 8:a1*b1 */ + "pclmulqdq $0, %%xmm5, %%xmm4\n\t" /* xmm4 holds 8:(a0+a1)*(b0+b1) */ + + "pshufd $78, %%xmm9, %%xmm11\n\t" + "pshufd $78, %%xmm2, %%xmm7\n\t" + "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 7:a0+a1 */ + "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 7:b0+b1 */ + "movdqa %%xmm9, %%xmm6\n\t" + "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 7:a0*b0 */ + "pclmulqdq $17, %%xmm9, %%xmm2\n\t" /* xmm2 holds 7:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 7:(a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 7+8:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 7+8:(a0+a1)*(b0+b1) */ + + "movdqu 2*16(%[buf]), %%xmm5\n\t" + "movdqu 3*16(%[buf]), %%xmm2\n\t" + "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ + "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ + + "pshufd $78, %%xmm8, %%xmm11\n\t" + "pshufd $78, %%xmm5, %%xmm7\n\t" + "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 6:a0+a1 */ + "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 6:b0+b1 */ + "movdqa %%xmm8, %%xmm6\n\t" + "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 6:a0*b0 */ + "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 6:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 6:(a0+a1)*(b0+b1) */ + + /* Load H3, H4, H5. */ + "movdqu 3*16(%[h_table]), %%xmm10\n\t" + "movdqu 2*16(%[h_table]), %%xmm9\n\t" + "movdqu 1*16(%[h_table]), %%xmm8\n\t" + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 6+7+8:a0*b0 */ + "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 6+7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 6+7+8:(a0+a1)*(b0+b1) */ + + "pshufd $78, %%xmm10, %%xmm11\n\t" + "pshufd $78, %%xmm2, %%xmm7\n\t" + "pxor %%xmm10, %%xmm11\n\t" /* xmm11 holds 5:a0+a1 */ + "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 5:b0+b1 */ + "movdqa %%xmm10, %%xmm6\n\t" + "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 5:a0*b0 */ + "pclmulqdq $17, %%xmm10, %%xmm2\n\t" /* xmm2 holds 5:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 5:(a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 5+6+7+8:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 5+6+7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 5+6+7+8:(a0+a1)*(b0+b1) */ + + "movdqu 4*16(%[buf]), %%xmm5\n\t" + "movdqu 5*16(%[buf]), %%xmm2\n\t" + "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ + "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ + + "pshufd $78, %%xmm9, %%xmm11\n\t" + "pshufd $78, %%xmm5, %%xmm7\n\t" + "pxor %%xmm9, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */ + "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 4:b0+b1 */ + "movdqa %%xmm9, %%xmm6\n\t" + "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 4:a0*b0 */ + "pclmulqdq $17, %%xmm9, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 4+5+6+7+8:a0*b0 */ + "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 4+5+6+7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 4+5+6+7+8:(a0+a1)*(b0+b1) */ + + "pshufd $78, %%xmm8, %%xmm11\n\t" + "pshufd $78, %%xmm2, %%xmm7\n\t" + "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */ + "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */ + "movdqa %%xmm8, %%xmm6\n\t" + "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */ + "pclmulqdq $17, %%xmm8, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */ + + "movdqu 0*16(%[h_table]), %%xmm8\n\t" /* Load H2 */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 3+4+5+6+7+8:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 3+4+5+6+7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 3+4+5+6+7+8:(a0+a1)*(b0+b1) */ + + "movdqu 6*16(%[buf]), %%xmm5\n\t" + "movdqu 7*16(%[buf]), %%xmm2\n\t" + "pshufb %%xmm15, %%xmm5\n\t" /* be => le */ + "pshufb %%xmm15, %%xmm2\n\t" /* be => le */ + + "pshufd $78, %%xmm8, %%xmm11\n\t" + "pshufd $78, %%xmm5, %%xmm7\n\t" + "pxor %%xmm8, %%xmm11\n\t" /* xmm11 holds 4:a0+a1 */ + "pxor %%xmm5, %%xmm7\n\t" /* xmm7 holds 4:b0+b1 */ + "movdqa %%xmm8, %%xmm6\n\t" + "pclmulqdq $0, %%xmm5, %%xmm6\n\t" /* xmm6 holds 4:a0*b0 */ + "pclmulqdq $17, %%xmm8, %%xmm5\n\t" /* xmm5 holds 4:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 4:(a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 2+3+4+5+6+7+8:a0*b0 */ + "pxor %%xmm5, %%xmm1\n\t" /* xmm1 holds 2+3+4+5+6+7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 2+3+4+5+6+7+8:(a0+a1)*(b0+b1) */ + + "pshufd $78, %%xmm0, %%xmm11\n\t" + "pshufd $78, %%xmm2, %%xmm7\n\t" + "pxor %%xmm0, %%xmm11\n\t" /* xmm11 holds 3:a0+a1 */ + "pxor %%xmm2, %%xmm7\n\t" /* xmm7 holds 3:b0+b1 */ + "movdqa %%xmm0, %%xmm6\n\t" + "pclmulqdq $0, %%xmm2, %%xmm6\n\t" /* xmm6 holds 3:a0*b0 */ + "pclmulqdq $17, %%xmm0, %%xmm2\n\t" /* xmm2 holds 3:a1*b1 */ + "pclmulqdq $0, %%xmm11, %%xmm7\n\t" /* xmm7 holds 3:(a0+a1)*(b0+b1) */ + + "pxor %%xmm6, %%xmm3\n\t" /* xmm3 holds 1+2+3+3+4+5+6+7+8:a0*b0 */ + "pxor %%xmm2, %%xmm1\n\t" /* xmm1 holds 1+2+3+3+4+5+6+7+8:a1*b1 */ + "pxor %%xmm7, %%xmm4\n\t" /* xmm4 holds 1+2+3+3+4+5+6+7+8:(a0+a1)*(b0+b1) */ + + /* aggregated reduction... */ + "movdqa %%xmm3, %%xmm5\n\t" + "pxor %%xmm1, %%xmm5\n\t" /* xmm5 holds a0*b0+a1*b1 */ + "pxor %%xmm5, %%xmm4\n\t" /* xmm4 holds a0*b0+a1*b1+(a0+a1)*(b0+b1) */ + "movdqa %%xmm4, %%xmm5\n\t" + "psrldq $8, %%xmm4\n\t" + "pslldq $8, %%xmm5\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm4, %%xmm1\n\t" /* holds the result of the + carry-less multiplication of xmm0 + by xmm1 */ + : + : [buf] "r" (buf), + [h_table] "r" (h_table) + : "memory" ); + + reduction(); +} +#endif + +static ASM_FUNC_ATTR_INLINE void gcm_lsh(void *h, unsigned int hoffs) +{ + static const u64 pconst[2] __attribute__ ((aligned (16))) = + { U64_C(0x0000000000000001), U64_C(0xc200000000000000) }; + + asm volatile ("movdqu (%[h]), %%xmm2\n\t" + "pshufd $0xff, %%xmm2, %%xmm3\n\t" + "movdqa %%xmm2, %%xmm4\n\t" + "psrad $31, %%xmm3\n\t" + "pslldq $8, %%xmm4\n\t" + "pand %[pconst], %%xmm3\n\t" + "paddq %%xmm2, %%xmm2\n\t" + "psrlq $63, %%xmm4\n\t" + "pxor %%xmm3, %%xmm2\n\t" + "pxor %%xmm4, %%xmm2\n\t" + "movdqu %%xmm2, (%[h])\n\t" + : + : [pconst] "m" (pconst), + [h] "r" ((byte *)h + hoffs) + : "memory" ); +} + +void ASM_FUNC_ATTR +_gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c) +{ + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[10 * 16]; + + /* XMM6-XMM15 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + "movdqu %%xmm8, 2*16(%0)\n\t" + "movdqu %%xmm9, 3*16(%0)\n\t" + "movdqu %%xmm10, 4*16(%0)\n\t" + "movdqu %%xmm11, 5*16(%0)\n\t" + "movdqu %%xmm12, 6*16(%0)\n\t" + "movdqu %%xmm13, 7*16(%0)\n\t" + "movdqu %%xmm14, 8*16(%0)\n\t" + "movdqu %%xmm15, 9*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory" ); +#endif + + /* Swap endianness of hsub. */ + asm volatile ("movdqu (%[key]), %%xmm0\n\t" + "pshufb %[be_mask], %%xmm0\n\t" + "movdqu %%xmm0, (%[key])\n\t" + : + : [key] "r" (c->u_mode.gcm.u_ghash_key.key), + [be_mask] "m" (*be_mask) + : "memory"); + + gcm_lsh(c->u_mode.gcm.u_ghash_key.key, 0); /* H <<< 1 */ + + asm volatile ("movdqa %%xmm0, %%xmm1\n\t" + "movdqu (%[key]), %%xmm0\n\t" /* load H <<< 1 */ + : + : [key] "r" (c->u_mode.gcm.u_ghash_key.key) + : "memory"); + + gfmul_pclmul (); /* H<<<1•H => H² */ + + asm volatile ("movdqu %%xmm1, 0*16(%[h_table])\n\t" + "movdqa %%xmm1, %%xmm7\n\t" + : + : [h_table] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gcm_lsh(c->u_mode.gcm.gcm_table, 0 * 16); /* H² <<< 1 */ + gfmul_pclmul (); /* H<<<1•H² => H³ */ + + asm volatile ("movdqa %%xmm7, %%xmm0\n\t" + "movdqu %%xmm1, 1*16(%[h_table])\n\t" + "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */ + : + : [h_table] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gfmul_pclmul (); /* H²<<<1•H² => H⁴ */ + + asm volatile ("movdqu %%xmm1, 2*16(%[h_table])\n\t" + "movdqa %%xmm1, %%xmm0\n\t" + "movdqu (%[key]), %%xmm1\n\t" /* load H <<< 1 */ + : + : [h_table] "r" (c->u_mode.gcm.gcm_table), + [key] "r" (c->u_mode.gcm.u_ghash_key.key) + : "memory"); + + gcm_lsh(c->u_mode.gcm.gcm_table, 1 * 16); /* H³ <<< 1 */ + gcm_lsh(c->u_mode.gcm.gcm_table, 2 * 16); /* H⁴ <<< 1 */ + +#ifdef __x86_64__ + gfmul_pclmul (); /* H<<<1•H⁴ => H⁵ */ + + asm volatile ("movdqu %%xmm1, 3*16(%[h_table])\n\t" + "movdqu 0*16(%[h_table]), %%xmm1\n\t" /* load H² <<< 1 */ + : + : [h_table] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gfmul_pclmul (); /* H²<<<1•H⁴ => H⁶ */ + + asm volatile ("movdqu %%xmm1, 4*16(%[h_table])\n\t" + "movdqu 1*16(%[h_table]), %%xmm1\n\t" /* load H³ <<< 1 */ + : + : [h_table] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gfmul_pclmul (); /* H³<<<1•H⁴ => H⁷ */ + + asm volatile ("movdqu %%xmm1, 5*16(%[h_table])\n\t" + "movdqu 2*16(%[h_table]), %%xmm1\n\t" /* load H⁴ <<< 1 */ + : + : [h_table] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gfmul_pclmul (); /* H³<<<1•H⁴ => H⁸ */ + + asm volatile ("movdqu %%xmm1, 6*16(%[h_table])\n\t" + : + : [h_table] "r" (c->u_mode.gcm.gcm_table) + : "memory"); + + gcm_lsh(c->u_mode.gcm.gcm_table, 3 * 16); /* H⁵ <<< 1 */ + gcm_lsh(c->u_mode.gcm.gcm_table, 4 * 16); /* H⁶ <<< 1 */ + gcm_lsh(c->u_mode.gcm.gcm_table, 5 * 16); /* H⁷ <<< 1 */ + gcm_lsh(c->u_mode.gcm.gcm_table, 6 * 16); /* H⁸ <<< 1 */ + +#ifdef __WIN64__ + /* Clear/restore used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + "movdqu 2*16(%0), %%xmm8\n\t" + "movdqu 3*16(%0), %%xmm9\n\t" + "movdqu 4*16(%0), %%xmm10\n\t" + "movdqu 5*16(%0), %%xmm11\n\t" + "movdqu 6*16(%0), %%xmm12\n\t" + "movdqu 7*16(%0), %%xmm13\n\t" + "movdqu 8*16(%0), %%xmm14\n\t" + "movdqu 9*16(%0), %%xmm15\n\t" + : + : "r" (win64tmp) + : "memory" ); +#else + /* Clear used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "pxor %%xmm6, %%xmm6\n\t" + "pxor %%xmm7, %%xmm7\n\t" + "pxor %%xmm8, %%xmm8\n\t" + "pxor %%xmm9, %%xmm9\n\t" + "pxor %%xmm10, %%xmm10\n\t" + "pxor %%xmm11, %%xmm11\n\t" + "pxor %%xmm12, %%xmm12\n\t" + "pxor %%xmm13, %%xmm13\n\t" + "pxor %%xmm14, %%xmm14\n\t" + "pxor %%xmm15, %%xmm15\n\t" + ::: "memory" ); +#endif +#endif +} + + +unsigned int ASM_FUNC_ATTR +_gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + static const unsigned char be_mask[16] __attribute__ ((aligned (16))) = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[10 * 16]; +#endif + + if (nblocks == 0) + return 0; + +#if defined(__x86_64__) && defined(__WIN64__) + /* XMM6-XMM15 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + "movdqu %%xmm8, 2*16(%0)\n\t" + "movdqu %%xmm9, 3*16(%0)\n\t" + "movdqu %%xmm10, 4*16(%0)\n\t" + "movdqu %%xmm11, 5*16(%0)\n\t" + "movdqu %%xmm12, 6*16(%0)\n\t" + "movdqu %%xmm13, 7*16(%0)\n\t" + "movdqu %%xmm14, 8*16(%0)\n\t" + "movdqu %%xmm15, 9*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory" ); +#endif + + /* Preload hash. */ + asm volatile ("movdqa %[be_mask], %%xmm7\n\t" + "movdqu %[hash], %%xmm1\n\t" + "pshufb %%xmm7, %%xmm1\n\t" /* be => le */ + : + : [hash] "m" (*result), + [be_mask] "m" (*be_mask) + : "memory" ); + +#ifdef __x86_64__ + if (nblocks >= 8) + { + /* Preload H1. */ + asm volatile ("movdqa %%xmm7, %%xmm15\n\t" + "movdqa %[h_1], %%xmm0\n\t" + : + : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key) + : "memory" ); + + while (nblocks >= 8) + { + gfmul_pclmul_aggr8 (buf, c->u_mode.gcm.gcm_table); + + buf += 8 * blocksize; + nblocks -= 8; + } +#ifndef __WIN64__ + /* Clear used x86-64/XMM registers. */ + asm volatile( "pxor %%xmm8, %%xmm8\n\t" + "pxor %%xmm9, %%xmm9\n\t" + "pxor %%xmm10, %%xmm10\n\t" + "pxor %%xmm11, %%xmm11\n\t" + "pxor %%xmm12, %%xmm12\n\t" + "pxor %%xmm13, %%xmm13\n\t" + "pxor %%xmm14, %%xmm14\n\t" + "pxor %%xmm15, %%xmm15\n\t" + ::: "memory" ); +#endif + } +#endif + + while (nblocks >= 4) + { + gfmul_pclmul_aggr4 (buf, c->u_mode.gcm.u_ghash_key.key, + c->u_mode.gcm.gcm_table, be_mask); + + buf += 4 * blocksize; + nblocks -= 4; + } + + if (nblocks) + { + /* Preload H1. */ + asm volatile ("movdqa %[h_1], %%xmm0\n\t" + : + : [h_1] "m" (*c->u_mode.gcm.u_ghash_key.key) + : "memory" ); + + while (nblocks) + { + asm volatile ("movdqu %[buf], %%xmm2\n\t" + "pshufb %[be_mask], %%xmm2\n\t" /* be => le */ + "pxor %%xmm2, %%xmm1\n\t" + : + : [buf] "m" (*buf), [be_mask] "m" (*be_mask) + : "memory" ); + + gfmul_pclmul (); + + buf += blocksize; + nblocks--; + } + } + + /* Store hash. */ + asm volatile ("pshufb %[be_mask], %%xmm1\n\t" /* be => le */ + "movdqu %%xmm1, %[hash]\n\t" + : [hash] "=m" (*result) + : [be_mask] "m" (*be_mask) + : "memory" ); + +#if defined(__x86_64__) && defined(__WIN64__) + /* Clear/restore used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + "movdqu 2*16(%0), %%xmm8\n\t" + "movdqu 3*16(%0), %%xmm9\n\t" + "movdqu 4*16(%0), %%xmm10\n\t" + "movdqu 5*16(%0), %%xmm11\n\t" + "movdqu 6*16(%0), %%xmm12\n\t" + "movdqu 7*16(%0), %%xmm13\n\t" + "movdqu 8*16(%0), %%xmm14\n\t" + "movdqu 9*16(%0), %%xmm15\n\t" + : + : "r" (win64tmp) + : "memory" ); +#else + /* Clear used registers. */ + asm volatile( "pxor %%xmm0, %%xmm0\n\t" + "pxor %%xmm1, %%xmm1\n\t" + "pxor %%xmm2, %%xmm2\n\t" + "pxor %%xmm3, %%xmm3\n\t" + "pxor %%xmm4, %%xmm4\n\t" + "pxor %%xmm5, %%xmm5\n\t" + "pxor %%xmm6, %%xmm6\n\t" + "pxor %%xmm7, %%xmm7\n\t" + ::: "memory" ); +#endif + + return 0; +} + +#if __clang__ +# pragma clang attribute pop +#endif + +#endif /* GCM_USE_INTEL_PCLMUL */ diff --git a/comm/third_party/libgcrypt/cipher/cipher-gcm.c b/comm/third_party/libgcrypt/cipher/cipher-gcm.c new file mode 100644 index 0000000000..7aad12776f --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-gcm.c @@ -0,0 +1,1207 @@ +/* cipher-gcm.c - Generic Galois Counter Mode implementation + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * Copyright (C) 2013, 2018-2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +/* Helper macro to force alignment to 16 or 64 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_64 __attribute__ ((aligned (64))) +#else +# define ATTR_ALIGNED_64 +#endif + + +#ifdef GCM_USE_INTEL_PCLMUL +extern void _gcry_ghash_setup_intel_pclmul (gcry_cipher_hd_t c); + +extern unsigned int _gcry_ghash_intel_pclmul (gcry_cipher_hd_t c, byte *result, + const byte *buf, size_t nblocks); +#endif + +#ifdef GCM_USE_ARM_PMULL +extern void _gcry_ghash_setup_armv8_ce_pmull (void *gcm_key, void *gcm_table); + +extern unsigned int _gcry_ghash_armv8_ce_pmull (void *gcm_key, byte *result, + const byte *buf, size_t nblocks, + void *gcm_table); + +static void +ghash_setup_armv8_ce_pmull (gcry_cipher_hd_t c) +{ + _gcry_ghash_setup_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, + c->u_mode.gcm.gcm_table); +} + +static unsigned int +ghash_armv8_ce_pmull (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + return _gcry_ghash_armv8_ce_pmull(c->u_mode.gcm.u_ghash_key.key, result, buf, + nblocks, c->u_mode.gcm.gcm_table); +} +#endif /* GCM_USE_ARM_PMULL */ + +#ifdef GCM_USE_ARM_NEON +extern void _gcry_ghash_setup_armv7_neon (void *gcm_key); + +extern unsigned int _gcry_ghash_armv7_neon (void *gcm_key, byte *result, + const byte *buf, size_t nblocks); + +static void +ghash_setup_armv7_neon (gcry_cipher_hd_t c) +{ + _gcry_ghash_setup_armv7_neon(c->u_mode.gcm.u_ghash_key.key); +} + +static unsigned int +ghash_armv7_neon (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + return _gcry_ghash_armv7_neon(c->u_mode.gcm.u_ghash_key.key, result, buf, + nblocks); +} +#endif /* GCM_USE_ARM_NEON */ + +#ifdef GCM_USE_S390X_CRYPTO +#include "asm-inline-s390x.h" + +static unsigned int +ghash_s390x_kimd (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + u128_t params[2]; + + memcpy (¶ms[0], result, 16); + memcpy (¶ms[1], c->u_mode.gcm.u_ghash_key.key, 16); + + kimd_execute (KMID_FUNCTION_GHASH, ¶ms, buf, nblocks * 16); + + memcpy (result, ¶ms[0], 16); + wipememory (params, sizeof(params)); + return 0; +} +#endif /* GCM_USE_S390X_CRYPTO*/ + + +#ifdef GCM_USE_TABLES +static struct +{ + volatile u32 counter_head; + u32 cacheline_align[64 / 4 - 1]; + u16 R[256]; + volatile u32 counter_tail; +} gcm_table ATTR_ALIGNED_64 = + { + 0, + { 0, }, + { + 0x0000, 0x01c2, 0x0384, 0x0246, 0x0708, 0x06ca, 0x048c, 0x054e, + 0x0e10, 0x0fd2, 0x0d94, 0x0c56, 0x0918, 0x08da, 0x0a9c, 0x0b5e, + 0x1c20, 0x1de2, 0x1fa4, 0x1e66, 0x1b28, 0x1aea, 0x18ac, 0x196e, + 0x1230, 0x13f2, 0x11b4, 0x1076, 0x1538, 0x14fa, 0x16bc, 0x177e, + 0x3840, 0x3982, 0x3bc4, 0x3a06, 0x3f48, 0x3e8a, 0x3ccc, 0x3d0e, + 0x3650, 0x3792, 0x35d4, 0x3416, 0x3158, 0x309a, 0x32dc, 0x331e, + 0x2460, 0x25a2, 0x27e4, 0x2626, 0x2368, 0x22aa, 0x20ec, 0x212e, + 0x2a70, 0x2bb2, 0x29f4, 0x2836, 0x2d78, 0x2cba, 0x2efc, 0x2f3e, + 0x7080, 0x7142, 0x7304, 0x72c6, 0x7788, 0x764a, 0x740c, 0x75ce, + 0x7e90, 0x7f52, 0x7d14, 0x7cd6, 0x7998, 0x785a, 0x7a1c, 0x7bde, + 0x6ca0, 0x6d62, 0x6f24, 0x6ee6, 0x6ba8, 0x6a6a, 0x682c, 0x69ee, + 0x62b0, 0x6372, 0x6134, 0x60f6, 0x65b8, 0x647a, 0x663c, 0x67fe, + 0x48c0, 0x4902, 0x4b44, 0x4a86, 0x4fc8, 0x4e0a, 0x4c4c, 0x4d8e, + 0x46d0, 0x4712, 0x4554, 0x4496, 0x41d8, 0x401a, 0x425c, 0x439e, + 0x54e0, 0x5522, 0x5764, 0x56a6, 0x53e8, 0x522a, 0x506c, 0x51ae, + 0x5af0, 0x5b32, 0x5974, 0x58b6, 0x5df8, 0x5c3a, 0x5e7c, 0x5fbe, + 0xe100, 0xe0c2, 0xe284, 0xe346, 0xe608, 0xe7ca, 0xe58c, 0xe44e, + 0xef10, 0xeed2, 0xec94, 0xed56, 0xe818, 0xe9da, 0xeb9c, 0xea5e, + 0xfd20, 0xfce2, 0xfea4, 0xff66, 0xfa28, 0xfbea, 0xf9ac, 0xf86e, + 0xf330, 0xf2f2, 0xf0b4, 0xf176, 0xf438, 0xf5fa, 0xf7bc, 0xf67e, + 0xd940, 0xd882, 0xdac4, 0xdb06, 0xde48, 0xdf8a, 0xddcc, 0xdc0e, + 0xd750, 0xd692, 0xd4d4, 0xd516, 0xd058, 0xd19a, 0xd3dc, 0xd21e, + 0xc560, 0xc4a2, 0xc6e4, 0xc726, 0xc268, 0xc3aa, 0xc1ec, 0xc02e, + 0xcb70, 0xcab2, 0xc8f4, 0xc936, 0xcc78, 0xcdba, 0xcffc, 0xce3e, + 0x9180, 0x9042, 0x9204, 0x93c6, 0x9688, 0x974a, 0x950c, 0x94ce, + 0x9f90, 0x9e52, 0x9c14, 0x9dd6, 0x9898, 0x995a, 0x9b1c, 0x9ade, + 0x8da0, 0x8c62, 0x8e24, 0x8fe6, 0x8aa8, 0x8b6a, 0x892c, 0x88ee, + 0x83b0, 0x8272, 0x8034, 0x81f6, 0x84b8, 0x857a, 0x873c, 0x86fe, + 0xa9c0, 0xa802, 0xaa44, 0xab86, 0xaec8, 0xaf0a, 0xad4c, 0xac8e, + 0xa7d0, 0xa612, 0xa454, 0xa596, 0xa0d8, 0xa11a, 0xa35c, 0xa29e, + 0xb5e0, 0xb422, 0xb664, 0xb7a6, 0xb2e8, 0xb32a, 0xb16c, 0xb0ae, + 0xbbf0, 0xba32, 0xb874, 0xb9b6, 0xbcf8, 0xbd3a, 0xbf7c, 0xbebe, + }, + 0 + }; + +#define gcmR gcm_table.R + +static inline +void prefetch_table(const void *tab, size_t len) +{ + const volatile byte *vtab = tab; + size_t i; + + for (i = 0; len - i >= 8 * 32; i += 8 * 32) + { + (void)vtab[i + 0 * 32]; + (void)vtab[i + 1 * 32]; + (void)vtab[i + 2 * 32]; + (void)vtab[i + 3 * 32]; + (void)vtab[i + 4 * 32]; + (void)vtab[i + 5 * 32]; + (void)vtab[i + 6 * 32]; + (void)vtab[i + 7 * 32]; + } + for (; i < len; i += 32) + { + (void)vtab[i]; + } + + (void)vtab[len - 1]; +} + +static inline void +do_prefetch_tables (const void *gcmM, size_t gcmM_size) +{ + /* Modify counters to trigger copy-on-write and unsharing if physical pages + * of look-up table are shared between processes. Modifying counters also + * causes checksums for pages to change and hint same-page merging algorithm + * that these pages are frequently changing. */ + gcm_table.counter_head++; + gcm_table.counter_tail++; + + /* Prefetch look-up tables to cache. */ + prefetch_table(gcmM, gcmM_size); + prefetch_table(&gcm_table, sizeof(gcm_table)); +} + +#ifdef GCM_TABLES_USE_U64 +static void +bshift (u64 * b0, u64 * b1) +{ + u64 t[2], mask; + + t[0] = *b0; + t[1] = *b1; + mask = -(t[1] & 1) & 0xe1; + mask <<= 56; + + *b1 = (t[1] >> 1) ^ (t[0] << 63); + *b0 = (t[0] >> 1) ^ mask; +} + +static void +do_fillM (unsigned char *h, u64 *M) +{ + int i, j; + + M[0 + 0] = 0; + M[0 + 16] = 0; + + M[8 + 0] = buf_get_be64 (h + 0); + M[8 + 16] = buf_get_be64 (h + 8); + + for (i = 4; i > 0; i /= 2) + { + M[i + 0] = M[2 * i + 0]; + M[i + 16] = M[2 * i + 16]; + + bshift (&M[i], &M[i + 16]); + } + + for (i = 2; i < 16; i *= 2) + for (j = 1; j < i; j++) + { + M[(i + j) + 0] = M[i + 0] ^ M[j + 0]; + M[(i + j) + 16] = M[i + 16] ^ M[j + 16]; + } + + for (i = 0; i < 16; i++) + { + M[i + 32] = (M[i + 0] >> 4) ^ ((u64) gcmR[(M[i + 16] & 0xf) << 4] << 48); + M[i + 48] = (M[i + 16] >> 4) ^ (M[i + 0] << 60); + } +} + +static inline unsigned int +do_ghash (unsigned char *result, const unsigned char *buf, const u64 *gcmM) +{ + u64 V[2]; + u64 tmp[2]; + const u64 *M; + u64 T; + u32 A; + int i; + + cipher_block_xor (V, result, buf, 16); + V[0] = be_bswap64 (V[0]); + V[1] = be_bswap64 (V[1]); + + /* First round can be manually tweaked based on fact that 'tmp' is zero. */ + M = &gcmM[(V[1] & 0xf) + 32]; + V[1] >>= 4; + tmp[0] = M[0]; + tmp[1] = M[16]; + tmp[0] ^= gcmM[(V[1] & 0xf) + 0]; + tmp[1] ^= gcmM[(V[1] & 0xf) + 16]; + V[1] >>= 4; + + i = 6; + while (1) + { + M = &gcmM[(V[1] & 0xf) + 32]; + V[1] >>= 4; + + A = tmp[1] & 0xff; + T = tmp[0]; + tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[1] & 0xf) + 0]; + tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[1] & 0xf) + 16]; + + tmp[0] ^= M[0]; + tmp[1] ^= M[16]; + + if (i == 0) + break; + + V[1] >>= 4; + --i; + } + + i = 7; + while (1) + { + M = &gcmM[(V[0] & 0xf) + 32]; + V[0] >>= 4; + + A = tmp[1] & 0xff; + T = tmp[0]; + tmp[0] = (T >> 8) ^ ((u64) gcmR[A] << 48) ^ gcmM[(V[0] & 0xf) + 0]; + tmp[1] = (T << 56) ^ (tmp[1] >> 8) ^ gcmM[(V[0] & 0xf) + 16]; + + tmp[0] ^= M[0]; + tmp[1] ^= M[16]; + + if (i == 0) + break; + + V[0] >>= 4; + --i; + } + + buf_put_be64 (result + 0, tmp[0]); + buf_put_be64 (result + 8, tmp[1]); + + return (sizeof(V) + sizeof(T) + sizeof(tmp) + + sizeof(int)*2 + sizeof(void*)*5); +} + +#else /*!GCM_TABLES_USE_U64*/ + +static void +bshift (u32 * M, int i) +{ + u32 t[4], mask; + + t[0] = M[i * 4 + 0]; + t[1] = M[i * 4 + 1]; + t[2] = M[i * 4 + 2]; + t[3] = M[i * 4 + 3]; + mask = -(t[3] & 1) & 0xe1; + + M[i * 4 + 3] = (t[3] >> 1) ^ (t[2] << 31); + M[i * 4 + 2] = (t[2] >> 1) ^ (t[1] << 31); + M[i * 4 + 1] = (t[1] >> 1) ^ (t[0] << 31); + M[i * 4 + 0] = (t[0] >> 1) ^ (mask << 24); +} + +static void +do_fillM (unsigned char *h, u32 *M) +{ + int i, j; + + M[0 * 4 + 0] = 0; + M[0 * 4 + 1] = 0; + M[0 * 4 + 2] = 0; + M[0 * 4 + 3] = 0; + + M[8 * 4 + 0] = buf_get_be32 (h + 0); + M[8 * 4 + 1] = buf_get_be32 (h + 4); + M[8 * 4 + 2] = buf_get_be32 (h + 8); + M[8 * 4 + 3] = buf_get_be32 (h + 12); + + for (i = 4; i > 0; i /= 2) + { + M[i * 4 + 0] = M[2 * i * 4 + 0]; + M[i * 4 + 1] = M[2 * i * 4 + 1]; + M[i * 4 + 2] = M[2 * i * 4 + 2]; + M[i * 4 + 3] = M[2 * i * 4 + 3]; + + bshift (M, i); + } + + for (i = 2; i < 16; i *= 2) + for (j = 1; j < i; j++) + { + M[(i + j) * 4 + 0] = M[i * 4 + 0] ^ M[j * 4 + 0]; + M[(i + j) * 4 + 1] = M[i * 4 + 1] ^ M[j * 4 + 1]; + M[(i + j) * 4 + 2] = M[i * 4 + 2] ^ M[j * 4 + 2]; + M[(i + j) * 4 + 3] = M[i * 4 + 3] ^ M[j * 4 + 3]; + } + + for (i = 0; i < 4 * 16; i += 4) + { + M[i + 0 + 64] = (M[i + 0] >> 4) + ^ ((u64) gcmR[(M[i + 3] << 4) & 0xf0] << 16); + M[i + 1 + 64] = (M[i + 1] >> 4) ^ (M[i + 0] << 28); + M[i + 2 + 64] = (M[i + 2] >> 4) ^ (M[i + 1] << 28); + M[i + 3 + 64] = (M[i + 3] >> 4) ^ (M[i + 2] << 28); + } +} + +static inline unsigned int +do_ghash (unsigned char *result, const unsigned char *buf, const u32 *gcmM) +{ + byte V[16]; + u32 tmp[4]; + u32 v; + const u32 *M, *m; + u32 T[3]; + int i; + + cipher_block_xor (V, result, buf, 16); /* V is big-endian */ + + /* First round can be manually tweaked based on fact that 'tmp' is zero. */ + i = 15; + + v = V[i]; + M = &gcmM[(v & 0xf) * 4 + 64]; + v = (v & 0xf0) >> 4; + m = &gcmM[v * 4]; + v = V[--i]; + + tmp[0] = M[0] ^ m[0]; + tmp[1] = M[1] ^ m[1]; + tmp[2] = M[2] ^ m[2]; + tmp[3] = M[3] ^ m[3]; + + while (1) + { + M = &gcmM[(v & 0xf) * 4 + 64]; + v = (v & 0xf0) >> 4; + m = &gcmM[v * 4]; + + T[0] = tmp[0]; + T[1] = tmp[1]; + T[2] = tmp[2]; + tmp[0] = (T[0] >> 8) ^ ((u32) gcmR[tmp[3] & 0xff] << 16) ^ m[0]; + tmp[1] = (T[0] << 24) ^ (tmp[1] >> 8) ^ m[1]; + tmp[2] = (T[1] << 24) ^ (tmp[2] >> 8) ^ m[2]; + tmp[3] = (T[2] << 24) ^ (tmp[3] >> 8) ^ m[3]; + + tmp[0] ^= M[0]; + tmp[1] ^= M[1]; + tmp[2] ^= M[2]; + tmp[3] ^= M[3]; + + if (i == 0) + break; + + v = V[--i]; + } + + buf_put_be32 (result + 0, tmp[0]); + buf_put_be32 (result + 4, tmp[1]); + buf_put_be32 (result + 8, tmp[2]); + buf_put_be32 (result + 12, tmp[3]); + + return (sizeof(V) + sizeof(T) + sizeof(tmp) + + sizeof(int)*2 + sizeof(void*)*6); +} +#endif /*!GCM_TABLES_USE_U64*/ + +#define fillM(c) \ + do_fillM (c->u_mode.gcm.u_ghash_key.key, c->u_mode.gcm.gcm_table) +#define GHASH(c, result, buf) do_ghash (result, buf, c->u_mode.gcm.gcm_table) +#define prefetch_tables(c) \ + do_prefetch_tables(c->u_mode.gcm.gcm_table, sizeof(c->u_mode.gcm.gcm_table)) + +#else + +static unsigned long +bshift (unsigned long *b) +{ + unsigned long c; + int i; + c = b[3] & 1; + for (i = 3; i > 0; i--) + { + b[i] = (b[i] >> 1) | (b[i - 1] << 31); + } + b[i] >>= 1; + return c; +} + +static unsigned int +do_ghash (unsigned char *hsub, unsigned char *result, const unsigned char *buf) +{ + unsigned long V[4]; + int i, j; + byte *p; + +#ifdef WORDS_BIGENDIAN + p = result; +#else + unsigned long T[4]; + + cipher_block_xor (V, result, buf, 16); + for (i = 0; i < 4; i++) + { + V[i] = (V[i] & 0x00ff00ff) << 8 | (V[i] & 0xff00ff00) >> 8; + V[i] = (V[i] & 0x0000ffff) << 16 | (V[i] & 0xffff0000) >> 16; + } + p = (byte *) T; +#endif + + memset (p, 0, 16); + + for (i = 0; i < 16; i++) + { + for (j = 0x80; j; j >>= 1) + { + if (hsub[i] & j) + cipher_block_xor (p, p, V, 16); + if (bshift (V)) + V[0] ^= 0xe1000000; + } + } +#ifndef WORDS_BIGENDIAN + for (i = 0, p = (byte *) T; i < 16; i += 4, p += 4) + { + result[i + 0] = p[3]; + result[i + 1] = p[2]; + result[i + 2] = p[1]; + result[i + 3] = p[0]; + } +#endif + + return (sizeof(V) + sizeof(T) + sizeof(int)*2 + sizeof(void*)*5); +} + +#define fillM(c) do { } while (0) +#define GHASH(c, result, buf) do_ghash (c->u_mode.gcm.u_ghash_key.key, result, buf) +#define prefetch_tables(c) do {} while (0) + +#endif /* !GCM_USE_TABLES */ + + +static unsigned int +ghash_internal (gcry_cipher_hd_t c, byte *result, const byte *buf, + size_t nblocks) +{ + const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; + unsigned int burn = 0; + + prefetch_tables (c); + + while (nblocks) + { + burn = GHASH (c, result, buf); + buf += blocksize; + nblocks--; + } + + return burn + (burn ? 5*sizeof(void*) : 0); +} + + +static void +setupM (gcry_cipher_hd_t c) +{ +#if defined(GCM_USE_INTEL_PCLMUL) || defined(GCM_USE_ARM_PMULL) || \ + defined(GCM_USE_S390X_CRYPTO) + unsigned int features = _gcry_get_hw_features (); +#endif + + c->u_mode.gcm.ghash_fn = NULL; + + if (0) + ; +#ifdef GCM_USE_INTEL_PCLMUL + else if (features & HWF_INTEL_PCLMUL) + { + c->u_mode.gcm.ghash_fn = _gcry_ghash_intel_pclmul; + _gcry_ghash_setup_intel_pclmul (c); + } +#endif +#ifdef GCM_USE_ARM_PMULL + else if (features & HWF_ARM_PMULL) + { + c->u_mode.gcm.ghash_fn = ghash_armv8_ce_pmull; + ghash_setup_armv8_ce_pmull (c); + } +#endif +#ifdef GCM_USE_ARM_NEON + else if (features & HWF_ARM_NEON) + { + c->u_mode.gcm.ghash_fn = ghash_armv7_neon; + ghash_setup_armv7_neon (c); + } +#endif +#ifdef GCM_USE_S390X_CRYPTO + else if (features & HWF_S390X_MSA) + { + if (kimd_query () & km_function_to_mask (KMID_FUNCTION_GHASH)) + { + c->u_mode.gcm.ghash_fn = ghash_s390x_kimd; + } + } +#endif + + if (c->u_mode.gcm.ghash_fn == NULL) + { + c->u_mode.gcm.ghash_fn = ghash_internal; + fillM (c); + } +} + + +static inline void +gcm_bytecounter_add (u32 ctr[2], size_t add) +{ + if (sizeof(add) > sizeof(u32)) + { + u32 high_add = ((add >> 31) >> 1) & 0xffffffff; + ctr[1] += high_add; + } + + ctr[0] += add; + if (ctr[0] >= add) + return; + ++ctr[1]; +} + + +static inline u32 +gcm_add32_be128 (byte *ctr, unsigned int add) +{ + /* 'ctr' must be aligned to four bytes. */ + const unsigned int blocksize = GCRY_GCM_BLOCK_LEN; + u32 *pval = (u32 *)(void *)(ctr + blocksize - sizeof(u32)); + u32 val; + + val = be_bswap32(*pval) + add; + *pval = be_bswap32(val); + + return val; /* return result as host-endian value */ +} + + +static inline int +gcm_check_datalen (u32 ctr[2]) +{ + /* len(plaintext) <= 2^39-256 bits == 2^36-32 bytes == 2^32-2 blocks */ + if (ctr[1] > 0xfU) + return 0; + if (ctr[1] < 0xfU) + return 1; + + if (ctr[0] <= 0xffffffe0U) + return 1; + + return 0; +} + + +static inline int +gcm_check_aadlen_or_ivlen (u32 ctr[2]) +{ + /* len(aad/iv) <= 2^64-1 bits ~= 2^61-1 bytes */ + if (ctr[1] > 0x1fffffffU) + return 0; + if (ctr[1] < 0x1fffffffU) + return 1; + + if (ctr[0] <= 0xffffffffU) + return 1; + + return 0; +} + + +static void +do_ghash_buf(gcry_cipher_hd_t c, byte *hash, const byte *buf, + size_t buflen, int do_padding) +{ + unsigned int blocksize = GCRY_GCM_BLOCK_LEN; + unsigned int unused = c->u_mode.gcm.mac_unused; + ghash_fn_t ghash_fn = c->u_mode.gcm.ghash_fn; + size_t nblocks, n; + unsigned int burn = 0; + + if (buflen == 0 && (unused == 0 || !do_padding)) + return; + + do + { + if (buflen > 0 && (buflen + unused < blocksize || unused > 0)) + { + n = blocksize - unused; + n = n < buflen ? n : buflen; + + buf_cpy (&c->u_mode.gcm.macbuf[unused], buf, n); + + unused += n; + buf += n; + buflen -= n; + } + if (!buflen) + { + if (!do_padding && unused < blocksize) + { + break; + } + + n = blocksize - unused; + if (n > 0) + { + memset (&c->u_mode.gcm.macbuf[unused], 0, n); + unused = blocksize; + } + } + + if (unused > 0) + { + gcry_assert (unused == blocksize); + + /* Process one block from macbuf. */ + burn = ghash_fn (c, hash, c->u_mode.gcm.macbuf, 1); + unused = 0; + } + + nblocks = buflen / blocksize; + + if (nblocks) + { + burn = ghash_fn (c, hash, buf, nblocks); + buf += blocksize * nblocks; + buflen -= blocksize * nblocks; + } + } + while (buflen > 0); + + c->u_mode.gcm.mac_unused = unused; + + if (burn) + _gcry_burn_stack (burn); +} + + +static gcry_err_code_t +gcm_ctr_encrypt (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err = 0; + + while (inbuflen) + { + u32 nblocks_to_overflow; + u32 num_ctr_increments; + u32 curr_ctr_low; + size_t currlen = inbuflen; + byte ctr_copy[GCRY_GCM_BLOCK_LEN]; + int fix_ctr = 0; + + /* GCM CTR increments only least significant 32-bits, without carry + * to upper 96-bits of counter. Using generic CTR implementation + * directly would carry 32-bit overflow to upper 96-bit. Detect + * if input length is long enough to cause overflow, and limit + * input length so that CTR overflow happen but updated CTR value is + * not used to encrypt further input. After overflow, upper 96 bits + * of CTR are restored to cancel out modification done by generic CTR + * encryption. */ + + if (inbuflen > c->unused) + { + curr_ctr_low = gcm_add32_be128 (c->u_ctr.ctr, 0); + + /* Number of CTR increments this inbuflen would cause. */ + num_ctr_increments = (inbuflen - c->unused) / GCRY_GCM_BLOCK_LEN + + !!((inbuflen - c->unused) % GCRY_GCM_BLOCK_LEN); + + if ((u32)(num_ctr_increments + curr_ctr_low) < curr_ctr_low) + { + nblocks_to_overflow = 0xffffffffU - curr_ctr_low + 1; + currlen = nblocks_to_overflow * GCRY_GCM_BLOCK_LEN + c->unused; + if (currlen > inbuflen) + { + currlen = inbuflen; + } + + fix_ctr = 1; + cipher_block_cpy(ctr_copy, c->u_ctr.ctr, GCRY_GCM_BLOCK_LEN); + } + } + + err = _gcry_cipher_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen); + if (err != 0) + return err; + + if (fix_ctr) + { + /* Lower 32-bits of CTR should now be zero. */ + gcry_assert(gcm_add32_be128 (c->u_ctr.ctr, 0) == 0); + + /* Restore upper part of CTR. */ + buf_cpy(c->u_ctr.ctr, ctr_copy, GCRY_GCM_BLOCK_LEN - sizeof(u32)); + + wipememory(ctr_copy, sizeof(ctr_copy)); + } + + inbuflen -= currlen; + inbuf += currlen; + outbuflen -= currlen; + outbuf += currlen; + } + + return err; +} + + +static gcry_err_code_t +gcm_crypt_inner (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen, int encrypt) +{ + gcry_err_code_t err; + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Use a bulk method if available. */ + if (c->bulk.gcm_crypt) + { + /* Bulk method requires that there is no cached data. */ + if (inbuflen >= GCRY_GCM_BLOCK_LEN && c->u_mode.gcm.mac_unused == 0) + { + size_t nblks = inbuflen / GCRY_GCM_BLOCK_LEN; + size_t nleft; + size_t ndone; + + nleft = c->bulk.gcm_crypt (c, outbuf, inbuf, nblks, encrypt); + ndone = nblks - nleft; + + inbuf += ndone * GCRY_GCM_BLOCK_LEN; + outbuf += ndone * GCRY_GCM_BLOCK_LEN; + inbuflen -= ndone * GCRY_GCM_BLOCK_LEN; + outbuflen -= ndone * GCRY_GCM_BLOCK_LEN; + + if (inbuflen == 0) + break; + + currlen = inbuflen; + } + else if (c->u_mode.gcm.mac_unused > 0 + && inbuflen >= GCRY_GCM_BLOCK_LEN + + (16 - c->u_mode.gcm.mac_unused)) + { + /* Handle just enough data so that cache is depleted, and on + * next loop iteration use bulk method. */ + currlen = 16 - c->u_mode.gcm.mac_unused; + + gcry_assert(currlen); + } + } + + /* Since checksumming is done after/before encryption/decryption, + * process input in 24KiB chunks to keep data loaded in L1 cache for + * checksumming/decryption. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + if (!encrypt) + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, inbuf, currlen, 0); + + err = gcm_ctr_encrypt(c, outbuf, outbuflen, inbuf, currlen); + if (err != 0) + return err; + + if (encrypt) + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, outbuf, currlen, 0); + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_gcm_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + static const unsigned char zerobuf[MAX_BLOCKSIZE]; + + if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->marks.tag + || c->u_mode.gcm.ghash_data_finalized + || !c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); + + if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode) + return GPG_ERR_INV_STATE; + + if (!c->u_mode.gcm.ghash_aad_finalized) + { + /* Start of encryption marks end of AAD stream. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); + c->u_mode.gcm.ghash_aad_finalized = 1; + } + + gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen); + if (!gcm_check_datalen(c->u_mode.gcm.datalen)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + return gcm_crypt_inner (c, outbuf, outbuflen, inbuf, inbuflen, 1); +} + + +gcry_err_code_t +_gcry_cipher_gcm_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + static const unsigned char zerobuf[MAX_BLOCKSIZE]; + + if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->marks.tag + || c->u_mode.gcm.ghash_data_finalized + || !c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); + + if (!c->u_mode.gcm.ghash_aad_finalized) + { + /* Start of decryption marks end of AAD stream. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); + c->u_mode.gcm.ghash_aad_finalized = 1; + } + + gcm_bytecounter_add(c->u_mode.gcm.datalen, inbuflen); + if (!gcm_check_datalen(c->u_mode.gcm.datalen)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + return gcm_crypt_inner (c, outbuf, outbuflen, inbuf, inbuflen, 0); +} + + +gcry_err_code_t +_gcry_cipher_gcm_authenticate (gcry_cipher_hd_t c, + const byte * aadbuf, size_t aadbuflen) +{ + static const unsigned char zerobuf[MAX_BLOCKSIZE]; + + if (c->spec->blocksize != GCRY_GCM_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->marks.tag + || c->u_mode.gcm.ghash_aad_finalized + || c->u_mode.gcm.ghash_data_finalized + || !c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + _gcry_cipher_gcm_setiv (c, zerobuf, GCRY_GCM_BLOCK_LEN); + + gcm_bytecounter_add(c->u_mode.gcm.aadlen, aadbuflen); + if (!gcm_check_aadlen_or_ivlen(c->u_mode.gcm.aadlen)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, aadbuf, aadbuflen, 0); + + return 0; +} + + +void +_gcry_cipher_gcm_setkey (gcry_cipher_hd_t c) +{ + memset (c->u_mode.gcm.u_ghash_key.key, 0, GCRY_GCM_BLOCK_LEN); + + c->spec->encrypt (&c->context.c, c->u_mode.gcm.u_ghash_key.key, + c->u_mode.gcm.u_ghash_key.key); + setupM (c); +} + + +static gcry_err_code_t +_gcry_cipher_gcm_initiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + memset (c->u_mode.gcm.aadlen, 0, sizeof(c->u_mode.gcm.aadlen)); + memset (c->u_mode.gcm.datalen, 0, sizeof(c->u_mode.gcm.datalen)); + memset (c->u_mode.gcm.u_tag.tag, 0, GCRY_GCM_BLOCK_LEN); + c->u_mode.gcm.datalen_over_limits = 0; + c->u_mode.gcm.ghash_data_finalized = 0; + c->u_mode.gcm.ghash_aad_finalized = 0; + + if (ivlen == 0) + return GPG_ERR_INV_LENGTH; + + if (ivlen != GCRY_GCM_BLOCK_LEN - 4) + { + u32 iv_bytes[2] = {0, 0}; + u32 bitlengths[2][2]; + + if (!c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + memset(c->u_ctr.ctr, 0, GCRY_GCM_BLOCK_LEN); + + gcm_bytecounter_add(iv_bytes, ivlen); + if (!gcm_check_aadlen_or_ivlen(iv_bytes)) + { + c->u_mode.gcm.datalen_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + do_ghash_buf(c, c->u_ctr.ctr, iv, ivlen, 1); + + /* iv length, 64-bit */ + bitlengths[1][1] = be_bswap32(iv_bytes[0] << 3); + bitlengths[1][0] = be_bswap32((iv_bytes[0] >> 29) | + (iv_bytes[1] << 3)); + /* zeros, 64-bit */ + bitlengths[0][1] = 0; + bitlengths[0][0] = 0; + + do_ghash_buf(c, c->u_ctr.ctr, (byte*)bitlengths, GCRY_GCM_BLOCK_LEN, 1); + + wipememory (iv_bytes, sizeof iv_bytes); + wipememory (bitlengths, sizeof bitlengths); + } + else + { + /* 96-bit IV is handled differently. */ + memcpy (c->u_ctr.ctr, iv, ivlen); + c->u_ctr.ctr[12] = c->u_ctr.ctr[13] = c->u_ctr.ctr[14] = 0; + c->u_ctr.ctr[15] = 1; + } + + c->spec->encrypt (&c->context.c, c->u_mode.gcm.tagiv, c->u_ctr.ctr); + + gcm_add32_be128 (c->u_ctr.ctr, 1); + + c->unused = 0; + c->marks.iv = 1; + c->marks.tag = 0; + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_gcm_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + c->marks.iv = 0; + c->marks.tag = 0; + c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0; + + if (fips_mode ()) + { + /* Direct invocation of GCM setiv in FIPS mode disables encryption. */ + c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 1; + } + + return _gcry_cipher_gcm_initiv (c, iv, ivlen); +} + + +#if 0 && TODO +void +_gcry_cipher_gcm_geniv (gcry_cipher_hd_t c, + byte *ivout, size_t ivoutlen, const byte *nonce, + size_t noncelen) +{ + /* nonce: user provided part (might be null) */ + /* noncelen: check if proper length (if nonce not null) */ + /* ivout: iv used to initialize gcm, output to user */ + /* ivoutlen: check correct size */ + byte iv[IVLEN]; + + if (!ivout) + return GPG_ERR_INV_ARG; + if (ivoutlen != IVLEN) + return GPG_ERR_INV_LENGTH; + if (nonce != NULL && !is_nonce_ok_len(noncelen)) + return GPG_ERR_INV_ARG; + + gcm_generate_iv(iv, nonce, noncelen); + + c->marks.iv = 0; + c->marks.tag = 0; + c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode = 0; + + _gcry_cipher_gcm_initiv (c, iv, IVLEN); + + buf_cpy(ivout, iv, IVLEN); + wipememory(iv, sizeof(iv)); +} +#endif + + +static int +is_tag_length_valid(size_t taglen) +{ + switch (taglen) + { + /* Allowed tag lengths from NIST SP 800-38D. */ + case 128 / 8: /* GCRY_GCM_BLOCK_LEN */ + case 120 / 8: + case 112 / 8: + case 104 / 8: + case 96 / 8: + case 64 / 8: + case 32 / 8: + return 1; + + default: + return 0; + } +} + +static gcry_err_code_t +_gcry_cipher_gcm_tag (gcry_cipher_hd_t c, + byte * outbuf, size_t outbuflen, int check) +{ + if (!(is_tag_length_valid (outbuflen) || outbuflen >= GCRY_GCM_BLOCK_LEN)) + return GPG_ERR_INV_LENGTH; + if (c->u_mode.gcm.datalen_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.tag) + { + u32 bitlengths[2][2]; + + if (!c->u_mode.gcm.ghash_fn) + return GPG_ERR_INV_STATE; + + /* aad length */ + bitlengths[0][1] = be_bswap32(c->u_mode.gcm.aadlen[0] << 3); + bitlengths[0][0] = be_bswap32((c->u_mode.gcm.aadlen[0] >> 29) | + (c->u_mode.gcm.aadlen[1] << 3)); + /* data length */ + bitlengths[1][1] = be_bswap32(c->u_mode.gcm.datalen[0] << 3); + bitlengths[1][0] = be_bswap32((c->u_mode.gcm.datalen[0] >> 29) | + (c->u_mode.gcm.datalen[1] << 3)); + + /* Finalize data-stream. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, NULL, 0, 1); + c->u_mode.gcm.ghash_aad_finalized = 1; + c->u_mode.gcm.ghash_data_finalized = 1; + + /* Add bitlengths to tag. */ + do_ghash_buf(c, c->u_mode.gcm.u_tag.tag, (byte*)bitlengths, + GCRY_GCM_BLOCK_LEN, 1); + cipher_block_xor (c->u_mode.gcm.u_tag.tag, c->u_mode.gcm.tagiv, + c->u_mode.gcm.u_tag.tag, GCRY_GCM_BLOCK_LEN); + c->marks.tag = 1; + + wipememory (bitlengths, sizeof (bitlengths)); + wipememory (c->u_mode.gcm.macbuf, GCRY_GCM_BLOCK_LEN); + wipememory (c->u_mode.gcm.tagiv, GCRY_GCM_BLOCK_LEN); + wipememory (c->u_mode.gcm.aadlen, sizeof (c->u_mode.gcm.aadlen)); + wipememory (c->u_mode.gcm.datalen, sizeof (c->u_mode.gcm.datalen)); + } + + if (!check) + { + if (outbuflen > GCRY_GCM_BLOCK_LEN) + outbuflen = GCRY_GCM_BLOCK_LEN; + + /* NB: We already checked that OUTBUF is large enough to hold + * the result or has valid truncated length. */ + memcpy (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen); + } + else + { + /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF + * and thus we need to compare its length first. */ + if (!is_tag_length_valid (outbuflen) + || !buf_eq_const (outbuf, c->u_mode.gcm.u_tag.tag, outbuflen)) + return GPG_ERR_CHECKSUM; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_gcm_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + /* Outputting authentication tag is part of encryption. */ + if (c->u_mode.gcm.disallow_encryption_because_of_setiv_in_fips_mode) + return GPG_ERR_INV_STATE; + + return _gcry_cipher_gcm_tag (c, outtag, taglen, 0); +} + +gcry_err_code_t +_gcry_cipher_gcm_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_gcm_tag (c, (unsigned char *) intag, taglen, 1); +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-internal.h b/comm/third_party/libgcrypt/cipher/cipher-internal.h new file mode 100644 index 0000000000..59b36ce78b --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-internal.h @@ -0,0 +1,809 @@ +/* cipher-internal.h - Internal defs for cipher.c + * Copyright (C) 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef G10_CIPHER_INTERNAL_H +#define G10_CIPHER_INTERNAL_H + +#include "./poly1305-internal.h" + + +/* The maximum supported size of a block in bytes. */ +#define MAX_BLOCKSIZE 16 + +/* The length for an OCB block. Although OCB supports any block + length it does not make sense to use a 64 bit blocklen (and cipher) + because this reduces the security margin to an unacceptable state. + Thus we require a cipher with 128 bit blocklength. */ +#define OCB_BLOCK_LEN (128/8) + +/* The size of the pre-computed L table for OCB. This takes the same + size as the table used for GCM and thus we don't save anything by + not using such a table. */ +#define OCB_L_TABLE_SIZE 16 + + +/* Check the above constants. */ +#if OCB_BLOCK_LEN > MAX_BLOCKSIZE +# error OCB_BLOCKLEN > MAX_BLOCKSIZE +#endif + + + +/* Magic values for the context structure. */ +#define CTX_MAGIC_NORMAL 0x24091964 +#define CTX_MAGIC_SECURE 0x46919042 + +/* Try to use 16 byte aligned cipher context for better performance. + We use the aligned attribute, thus it is only possible to implement + this with gcc. */ +#undef NEED_16BYTE_ALIGNED_CONTEXT +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define NEED_16BYTE_ALIGNED_CONTEXT 1 +#endif + +/* Undef this symbol to trade GCM speed for 256 bytes of memory per context */ +#define GCM_USE_TABLES 1 + + +/* GCM_USE_INTEL_PCLMUL indicates whether to compile GCM with Intel PCLMUL + code. */ +#undef GCM_USE_INTEL_PCLMUL +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(GCM_USE_TABLES) +# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define GCM_USE_INTEL_PCLMUL 1 +# endif +# endif +#endif /* GCM_USE_INTEL_PCLMUL */ + +/* GCM_USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */ +#undef GCM_USE_ARM_PMULL +#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(GCM_USE_TABLES) +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_AARCH32_CRYPTO) +# define GCM_USE_ARM_PMULL 1 +# elif defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) +# define GCM_USE_ARM_PMULL 1 +# endif +#endif /* GCM_USE_ARM_PMULL */ + +/* GCM_USE_ARM_NEON indicates whether to compile GCM with ARMv7 NEON code. */ +#undef GCM_USE_ARM_NEON +#if defined(GCM_USE_TABLES) +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) +# define GCM_USE_ARM_NEON 1 +#endif +#endif /* GCM_USE_ARM_NEON */ + +/* GCM_USE_S390X_CRYPTO indicates whether to enable zSeries code. */ +#undef GCM_USE_S390X_CRYPTO +#if defined(HAVE_GCC_INLINE_ASM_S390X) +# define GCM_USE_S390X_CRYPTO 1 +#endif /* GCM_USE_S390X_CRYPTO */ + +typedef unsigned int (*ghash_fn_t) (gcry_cipher_hd_t c, byte *result, + const byte *buf, size_t nblocks); + + +/* A structure with function pointers for mode operations. */ +typedef struct cipher_mode_ops +{ + gcry_err_code_t (*encrypt)(gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, const unsigned char *inbuf, + size_t inbuflen); + gcry_err_code_t (*decrypt)(gcry_cipher_hd_t c, unsigned char *outbuf, + size_t outbuflen, const unsigned char *inbuf, + size_t inbuflen); + gcry_err_code_t (*setiv)(gcry_cipher_hd_t c, const unsigned char *iv, + size_t ivlen); + + gcry_err_code_t (*authenticate)(gcry_cipher_hd_t c, + const unsigned char *abuf, size_t abuflen); + gcry_err_code_t (*get_tag)(gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen); + gcry_err_code_t (*check_tag)(gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen); +} cipher_mode_ops_t; + + +/* A structure with function pointers for bulk operations. The cipher + algorithm setkey function initializes them when bulk operations are + available and the actual encryption routines use them if they are + not NULL. */ +typedef struct cipher_bulk_ops +{ + void (*cfb_enc)(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks); + void (*cfb_dec)(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks); + void (*cbc_enc)(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int cbc_mac); + void (*cbc_dec)(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks); + void (*ofb_enc)(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks); + void (*ctr_enc)(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks); + size_t (*ocb_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); + size_t (*ocb_auth)(gcry_cipher_hd_t c, const void *abuf_arg, size_t nblocks); + void (*xts_crypt)(void *context, unsigned char *tweak, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); + size_t (*gcm_crypt)(gcry_cipher_hd_t c, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks, int encrypt); +} cipher_bulk_ops_t; + + +/* A VIA processor with the Padlock engine as well as the Intel AES_NI + instructions require an alignment of most data on a 16 byte + boundary. Because we trick out the compiler while allocating the + context, the align attribute as used in rijndael.c does not work on + its own. Thus we need to make sure that the entire context + structure is a aligned on that boundary. We achieve this by + defining a new type and use that instead of our usual alignment + type. */ +typedef union +{ + PROPERLY_ALIGNED_TYPE foo; +#ifdef NEED_16BYTE_ALIGNED_CONTEXT + char bar[16] __attribute__ ((aligned (16))); +#endif + char c[1]; +} cipher_context_alignment_t; + + +/* Storage structure for CMAC, for CMAC and EAX modes. */ +typedef struct { + /* The initialization vector. Also contains tag after finalization. */ + union { + cipher_context_alignment_t iv_align; + unsigned char iv[MAX_BLOCKSIZE]; + } u_iv; + + /* Subkeys for tag creation, not cleared by gcry_cipher_reset. */ + unsigned char subkeys[2][MAX_BLOCKSIZE]; + + /* Space to save partial input lengths for MAC. */ + unsigned char macbuf[MAX_BLOCKSIZE]; + + int mac_unused; /* Number of unprocessed bytes in MACBUF. */ + unsigned int tag:1; /* Set to 1 if tag has been finalized. */ +} gcry_cmac_context_t; + + +/* The handle structure. */ +struct gcry_cipher_handle +{ + int magic; + size_t actual_handle_size; /* Allocated size of this handle. */ + size_t handle_offset; /* Offset to the malloced block. */ + gcry_cipher_spec_t *spec; + + /* The algorithm id. This is a hack required because the module + interface does not easily allow to retrieve this value. */ + int algo; + + /* A structure with function pointers for mode operations. */ + cipher_mode_ops_t mode_ops; + + /* A structure with function pointers for bulk operations. Due to + limitations of the module system (we don't want to change the + API) we need to keep these function pointers here. */ + cipher_bulk_ops_t bulk; + + int mode; + unsigned int flags; + + struct { + unsigned int key:1; /* Set to 1 if a key has been set. */ + unsigned int iv:1; /* Set to 1 if a IV has been set. */ + unsigned int tag:1; /* Set to 1 if a tag is finalized. */ + unsigned int finalize:1; /* Next encrypt/decrypt has the final data. */ + unsigned int allow_weak_key:1; /* Set to 1 if weak keys are allowed. */ + } marks; + + /* The initialization vector. For best performance we make sure + that it is properly aligned. In particular some implementations + of bulk operations expect an 16 byte aligned IV. IV is also used + to store CBC-MAC in CCM mode; counter IV is stored in U_CTR. For + OCB mode it is used for the offset value. */ + union { + cipher_context_alignment_t iv_align; + unsigned char iv[MAX_BLOCKSIZE]; + } u_iv; + + /* The counter for CTR mode. This field is also used by AESWRAP and + thus we can't use the U_IV union. For OCB mode it is used for + the checksum. */ + union { + cipher_context_alignment_t iv_align; + unsigned char ctr[MAX_BLOCKSIZE]; + } u_ctr; + + /* Space to save an IV or CTR for chaining operations. */ + unsigned char lastiv[MAX_BLOCKSIZE]; + int unused; /* Number of unused bytes in LASTIV. */ + + union { + /* Mode specific storage for CCM mode. */ + struct { + u64 encryptlen; + u64 aadlen; + unsigned int authlen; + + /* Space to save partial input lengths for MAC. */ + unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; + int mac_unused; /* Number of unprocessed bytes in MACBUF. */ + + unsigned char s0[GCRY_CCM_BLOCK_LEN]; + + unsigned int nonce:1; /* Set to 1 if nonce has been set. */ + unsigned int lengths:1; /* Set to 1 if CCM length parameters has been + processed. */ + } ccm; + + /* Mode specific storage for Poly1305 mode. */ + struct { + /* byte counter for AAD. */ + u32 aadcount[2]; + + /* byte counter for data. */ + u32 datacount[2]; + + unsigned int aad_finalized:1; + unsigned int bytecount_over_limits:1; + + poly1305_context_t ctx; + } poly1305; + + /* Mode specific storage for CMAC mode. */ + gcry_cmac_context_t cmac; + + /* Mode specific storage for EAX mode. */ + struct { + /* CMAC for header (AAD). */ + gcry_cmac_context_t cmac_header; + + /* CMAC for ciphertext. */ + gcry_cmac_context_t cmac_ciphertext; + } eax; + + /* Mode specific storage for GCM mode. */ + struct { + /* The interim tag for GCM mode. */ + union { + cipher_context_alignment_t iv_align; + unsigned char tag[MAX_BLOCKSIZE]; + } u_tag; + + /* Space to save partial input lengths for MAC. */ + unsigned char macbuf[GCRY_CCM_BLOCK_LEN]; + int mac_unused; /* Number of unprocessed bytes in MACBUF. */ + + /* byte counters for GCM */ + u32 aadlen[2]; + u32 datalen[2]; + + /* encrypted tag counter */ + unsigned char tagiv[MAX_BLOCKSIZE]; + + unsigned int ghash_data_finalized:1; + unsigned int ghash_aad_finalized:1; + + unsigned int datalen_over_limits:1; + unsigned int disallow_encryption_because_of_setiv_in_fips_mode:1; + + /* --- Following members are not cleared in gcry_cipher_reset --- */ + + /* GHASH multiplier from key. */ + union { + cipher_context_alignment_t iv_align; + unsigned char key[MAX_BLOCKSIZE]; + } u_ghash_key; + + /* GHASH implementation in use. */ + ghash_fn_t ghash_fn; + + /* Pre-calculated table for GCM. */ +#ifdef GCM_USE_TABLES + #if (SIZEOF_UNSIGNED_LONG == 8 || defined(__x86_64__)) + #define GCM_TABLES_USE_U64 1 + u64 gcm_table[4 * 16]; + #else + #undef GCM_TABLES_USE_U64 + u32 gcm_table[8 * 16]; + #endif +#endif + } gcm; + + /* Mode specific storage for OCB mode. */ + struct { + /* --- Following members are not cleared in gcry_cipher_reset --- */ + + /* Helper variables and pre-computed table of L values. */ + unsigned char L_star[OCB_BLOCK_LEN]; + unsigned char L_dollar[OCB_BLOCK_LEN]; + unsigned char L0L1[OCB_BLOCK_LEN]; + unsigned char L[OCB_L_TABLE_SIZE][OCB_BLOCK_LEN]; + + /* --- Following members are cleared in gcry_cipher_reset --- */ + + /* The tag is valid if marks.tag has been set. */ + unsigned char tag[OCB_BLOCK_LEN]; + + /* A buffer to hold the offset for the AAD processing. */ + unsigned char aad_offset[OCB_BLOCK_LEN]; + + /* A buffer to hold the current sum of AAD processing. We can't + use tag here because tag may already hold the preprocessed + checksum of the data. */ + unsigned char aad_sum[OCB_BLOCK_LEN]; + + /* A buffer to store AAD data not yet processed. */ + unsigned char aad_leftover[OCB_BLOCK_LEN]; + + /* Number of data/aad blocks processed so far. */ + u64 data_nblocks; + u64 aad_nblocks; + + /* Number of valid bytes in AAD_LEFTOVER. */ + unsigned char aad_nleftover; + + /* Length of the tag. Fixed for now but may eventually be + specified using a set of gcry_cipher_flags. */ + unsigned char taglen; + + /* Flags indicating that the final data/aad block has been + processed. */ + unsigned int data_finalized:1; + unsigned int aad_finalized:1; + } ocb; + + /* Mode specific storage for XTS mode. */ + struct { + /* Pointer to tweak cipher context, allocated after actual + * cipher context. */ + char *tweak_context; + } xts; + } u_mode; + + /* What follows are two contexts of the cipher in use. The first + one needs to be aligned well enough for the cipher operation + whereas the second one is a copy created by cipher_setkey and + used by cipher_reset. That second copy has no need for proper + aligment because it is only accessed by memcpy. */ + cipher_context_alignment_t context; +}; + + +/*-- cipher-cbc.c --*/ +gcry_err_code_t _gcry_cipher_cbc_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cbc_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cbc_cts_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cbc_cts_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + +/*-- cipher-cfb.c --*/ +gcry_err_code_t _gcry_cipher_cfb_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cfb_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cfb8_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_cfb8_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + + +/*-- cipher-ofb.c --*/ +gcry_err_code_t _gcry_cipher_ofb_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + +/*-- cipher-ctr.c --*/ +gcry_err_code_t _gcry_cipher_ctr_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + + +/*-- cipher-aeswrap.c --*/ +gcry_err_code_t _gcry_cipher_aeswrap_encrypt +/* */ (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_aeswrap_decrypt +/* */ (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen); + + +/*-- cipher-ccm.c --*/ +gcry_err_code_t _gcry_cipher_ccm_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ccm_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ccm_set_nonce +/* */ (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen); +gcry_err_code_t _gcry_cipher_ccm_authenticate +/* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); +gcry_err_code_t _gcry_cipher_ccm_set_lengths +/* */ (gcry_cipher_hd_t c, u64 encryptedlen, u64 aadlen, u64 taglen); +gcry_err_code_t _gcry_cipher_ccm_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_ccm_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); + + +/*-- cipher-cmac.c --*/ +gcry_err_code_t _gcry_cmac_generate_subkeys +/* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx); +gcry_err_code_t _gcry_cmac_write +/* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx, + const byte * inbuf, size_t inlen); +gcry_err_code_t _gcry_cmac_final +/* */ (gcry_cipher_hd_t c, gcry_cmac_context_t *ctx); +void _gcry_cmac_reset (gcry_cmac_context_t *ctx); + + +/*-- cipher-eax.c --*/ +gcry_err_code_t _gcry_cipher_eax_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_eax_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_eax_set_nonce +/* */ (gcry_cipher_hd_t c, + const unsigned char *nonce, size_t noncelen); +gcry_err_code_t _gcry_cipher_eax_authenticate +/* */ (gcry_cipher_hd_t c, + const unsigned char *aadbuf, size_t aadbuflen); +gcry_err_code_t _gcry_cipher_eax_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_eax_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); +gcry_err_code_t _gcry_cipher_eax_setkey +/* */ (gcry_cipher_hd_t c); + + +/*-- cipher-gcm.c --*/ +gcry_err_code_t _gcry_cipher_gcm_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_gcm_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_gcm_setiv +/* */ (gcry_cipher_hd_t c, + const unsigned char *iv, size_t ivlen); +gcry_err_code_t _gcry_cipher_gcm_authenticate +/* */ (gcry_cipher_hd_t c, + const unsigned char *aadbuf, size_t aadbuflen); +gcry_err_code_t _gcry_cipher_gcm_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_gcm_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); +void _gcry_cipher_gcm_setkey +/* */ (gcry_cipher_hd_t c); + + +/*-- cipher-poly1305.c --*/ +gcry_err_code_t _gcry_cipher_poly1305_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_poly1305_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_poly1305_setiv +/* */ (gcry_cipher_hd_t c, + const unsigned char *iv, size_t ivlen); +gcry_err_code_t _gcry_cipher_poly1305_authenticate +/* */ (gcry_cipher_hd_t c, + const unsigned char *aadbuf, size_t aadbuflen); +gcry_err_code_t _gcry_cipher_poly1305_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_poly1305_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); +void _gcry_cipher_poly1305_setkey +/* */ (gcry_cipher_hd_t c); + + +/*-- chacha20.c --*/ +gcry_err_code_t _gcry_chacha20_poly1305_encrypt +/* */ (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, + size_t length); +gcry_err_code_t _gcry_chacha20_poly1305_decrypt +/* */ (gcry_cipher_hd_t c, byte *outbuf, const byte *inbuf, + size_t length); + + +/*-- cipher-ocb.c --*/ +gcry_err_code_t _gcry_cipher_ocb_encrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ocb_decrypt +/* */ (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_ocb_set_nonce +/* */ (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen); +gcry_err_code_t _gcry_cipher_ocb_authenticate +/* */ (gcry_cipher_hd_t c, const unsigned char *abuf, size_t abuflen); +gcry_err_code_t _gcry_cipher_ocb_get_tag +/* */ (gcry_cipher_hd_t c, + unsigned char *outtag, size_t taglen); +gcry_err_code_t _gcry_cipher_ocb_check_tag +/* */ (gcry_cipher_hd_t c, + const unsigned char *intag, size_t taglen); +void _gcry_cipher_ocb_setkey +/* */ (gcry_cipher_hd_t c); + + +/*-- cipher-xts.c --*/ +gcry_err_code_t _gcry_cipher_xts_encrypt +/* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); +gcry_err_code_t _gcry_cipher_xts_decrypt +/* */ (gcry_cipher_hd_t c, unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen); + + +/* Return the L-value for block N. Note: 'cipher_ocb.c' ensures that N + * will never be multiple of 65536 (1 << OCB_L_TABLE_SIZE), thus N can + * be directly passed to _gcry_ctz() function and resulting index will + * never overflow the table. */ +static inline const unsigned char * +ocb_get_l (gcry_cipher_hd_t c, u64 n) +{ + unsigned long ntz; + +#if ((defined(__i386__) || defined(__x86_64__)) && __GNUC__ >= 4) + /* Assumes that N != 0. */ + asm ("rep;bsfl %k[low], %k[ntz]\n\t" + : [ntz] "=r" (ntz) + : [low] "r" ((unsigned long)n) + : "cc"); +#else + ntz = _gcry_ctz (n); +#endif + + return c->u_mode.ocb.L[ntz]; +} + + +/* Return bit-shift of blocksize. */ +static inline unsigned int _gcry_blocksize_shift(gcry_cipher_hd_t c) +{ + /* Only blocksizes 8 and 16 are used. Return value in such way + * that compiler can optimize calling functions based on this. */ + return c->spec->blocksize == 8 ? 3 : 4; +} + + +/* Optimized function for adding value to cipher block. */ +static inline void +cipher_block_add(void *_dstsrc, unsigned int add, size_t blocksize) +{ + byte *dstsrc = _dstsrc; + u64 s[2]; + + if (blocksize == 8) + { + buf_put_be64(dstsrc + 0, buf_get_be64(dstsrc + 0) + add); + } + else /* blocksize == 16 */ + { + s[0] = buf_get_be64(dstsrc + 8); + s[1] = buf_get_be64(dstsrc + 0); + s[0] += add; + s[1] += (s[0] < add); + buf_put_be64(dstsrc + 8, s[0]); + buf_put_be64(dstsrc + 0, s[1]); + } +} + + +/* Optimized function for cipher block copying */ +static inline void +cipher_block_cpy(void *_dst, const void *_src, size_t blocksize) +{ + byte *dst = _dst; + const byte *src = _src; + u64 s[2]; + + if (blocksize == 8) + { + buf_put_he64(dst + 0, buf_get_he64(src + 0)); + } + else /* blocksize == 16 */ + { + s[0] = buf_get_he64(src + 0); + s[1] = buf_get_he64(src + 8); + buf_put_he64(dst + 0, s[0]); + buf_put_he64(dst + 8, s[1]); + } +} + + +/* Optimized function for cipher block xoring */ +static inline void +cipher_block_xor(void *_dst, const void *_src1, const void *_src2, + size_t blocksize) +{ + byte *dst = _dst; + const byte *src1 = _src1; + const byte *src2 = _src2; + u64 s1[2]; + u64 s2[2]; + + if (blocksize == 8) + { + buf_put_he64(dst + 0, buf_get_he64(src1 + 0) ^ buf_get_he64(src2 + 0)); + } + else /* blocksize == 16 */ + { + s1[0] = buf_get_he64(src1 + 0); + s1[1] = buf_get_he64(src1 + 8); + s2[0] = buf_get_he64(src2 + 0); + s2[1] = buf_get_he64(src2 + 8); + buf_put_he64(dst + 0, s1[0] ^ s2[0]); + buf_put_he64(dst + 8, s1[1] ^ s2[1]); + } +} + + +/* Optimized function for in-place cipher block xoring */ +static inline void +cipher_block_xor_1(void *_dst, const void *_src, size_t blocksize) +{ + cipher_block_xor (_dst, _dst, _src, blocksize); +} + + +/* Optimized function for cipher block xoring with two destination cipher + blocks. Used mainly by CFB mode encryption. */ +static inline void +cipher_block_xor_2dst(void *_dst1, void *_dst2, const void *_src, + size_t blocksize) +{ + byte *dst1 = _dst1; + byte *dst2 = _dst2; + const byte *src = _src; + u64 d2[2]; + u64 s[2]; + + if (blocksize == 8) + { + d2[0] = buf_get_he64(dst2 + 0) ^ buf_get_he64(src + 0); + buf_put_he64(dst2 + 0, d2[0]); + buf_put_he64(dst1 + 0, d2[0]); + } + else /* blocksize == 16 */ + { + s[0] = buf_get_he64(src + 0); + s[1] = buf_get_he64(src + 8); + d2[0] = buf_get_he64(dst2 + 0); + d2[1] = buf_get_he64(dst2 + 8); + d2[0] = d2[0] ^ s[0]; + d2[1] = d2[1] ^ s[1]; + buf_put_he64(dst2 + 0, d2[0]); + buf_put_he64(dst2 + 8, d2[1]); + buf_put_he64(dst1 + 0, d2[0]); + buf_put_he64(dst1 + 8, d2[1]); + } +} + + +/* Optimized function for combined cipher block xoring and copying. + Used by mainly CBC mode decryption. */ +static inline void +cipher_block_xor_n_copy_2(void *_dst_xor, const void *_src_xor, + void *_srcdst_cpy, const void *_src_cpy, + size_t blocksize) +{ + byte *dst_xor = _dst_xor; + byte *srcdst_cpy = _srcdst_cpy; + const byte *src_xor = _src_xor; + const byte *src_cpy = _src_cpy; + u64 sc[2]; + u64 sx[2]; + u64 sdc[2]; + + if (blocksize == 8) + { + sc[0] = buf_get_he64(src_cpy + 0); + buf_put_he64(dst_xor + 0, + buf_get_he64(srcdst_cpy + 0) ^ buf_get_he64(src_xor + 0)); + buf_put_he64(srcdst_cpy + 0, sc[0]); + } + else /* blocksize == 16 */ + { + sc[0] = buf_get_he64(src_cpy + 0); + sc[1] = buf_get_he64(src_cpy + 8); + sx[0] = buf_get_he64(src_xor + 0); + sx[1] = buf_get_he64(src_xor + 8); + sdc[0] = buf_get_he64(srcdst_cpy + 0); + sdc[1] = buf_get_he64(srcdst_cpy + 8); + sx[0] ^= sdc[0]; + sx[1] ^= sdc[1]; + buf_put_he64(dst_xor + 0, sx[0]); + buf_put_he64(dst_xor + 8, sx[1]); + buf_put_he64(srcdst_cpy + 0, sc[0]); + buf_put_he64(srcdst_cpy + 8, sc[1]); + } +} + + +/* Optimized function for combined cipher block xoring and copying. + Used by mainly CFB mode decryption. */ +static inline void +cipher_block_xor_n_copy(void *_dst_xor, void *_srcdst_cpy, const void *_src, + size_t blocksize) +{ + cipher_block_xor_n_copy_2(_dst_xor, _src, _srcdst_cpy, _src, blocksize); +} + + +#endif /*G10_CIPHER_INTERNAL_H*/ diff --git a/comm/third_party/libgcrypt/cipher/cipher-ocb.c b/comm/third_party/libgcrypt/cipher/cipher-ocb.c new file mode 100644 index 0000000000..24db6a9e2c --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-ocb.c @@ -0,0 +1,761 @@ +/* cipher-ocb.c - OCB cipher mode + * Copyright (C) 2015, 2016 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * + * OCB is covered by several patents but may be used freely by most + * software. See http://web.cs.ucdavis.edu/~rogaway/ocb/license.htm . + * In particular license 1 is suitable for Libgcrypt: See + * http://web.cs.ucdavis.edu/~rogaway/ocb/license1.pdf for the full + * license document; it basically says: + * + * License 1 — License for Open-Source Software Implementations of OCB + * (Jan 9, 2013) + * + * Under this license, you are authorized to make, use, and + * distribute open-source software implementations of OCB. This + * license terminates for you if you sue someone over their + * open-source software implementation of OCB claiming that you have + * a patent covering their implementation. + */ + + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +/* Double the OCB_BLOCK_LEN sized block B in-place. */ +static inline void +double_block (u64 b[2]) +{ + u64 l_0, l, r; + + l = b[1]; + r = b[0]; + + l_0 = -(l >> 63); + l = (l + l) ^ (r >> 63); + r = (r + r) ^ (l_0 & 135); + + b[1] = l; + b[0] = r; +} + + +/* Copy OCB_BLOCK_LEN from buffer S starting at bit offset BITOFF to + * buffer D. */ +static void +bit_copy (unsigned char d[16], const unsigned char s[24], unsigned int bitoff) +{ + u64 s0l, s1l, s1r, s2r; + unsigned int shift; + unsigned int byteoff; + + byteoff = bitoff / 8; + shift = bitoff % 8; + + s0l = buf_get_be64 (s + byteoff + 0); + s1l = buf_get_be64 (s + byteoff + 8); + s1r = shift ? s1l : 0; + s2r = shift ? buf_get_be64 (s + 16) << (8 * byteoff) : 0; + + buf_put_be64 (d + 0, (s0l << shift) | (s1r >> ((64 - shift) & 63))); + buf_put_be64 (d + 8, (s1l << shift) | (s2r >> ((64 - shift) & 63))); +} + + +/* Get L_big value for block N, where N is multiple of 65536. */ +static void +ocb_get_L_big (gcry_cipher_hd_t c, u64 n, unsigned char *l_buf) +{ + int ntz = _gcry_ctz64 (n); + u64 L[2]; + + gcry_assert(ntz >= OCB_L_TABLE_SIZE); + + L[1] = buf_get_be64 (c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1]); + L[0] = buf_get_be64 (c->u_mode.ocb.L[OCB_L_TABLE_SIZE - 1] + 8); + + for (ntz -= OCB_L_TABLE_SIZE - 1; ntz; ntz--) + double_block (L); + + buf_put_be64 (l_buf + 0, L[1]); + buf_put_be64 (l_buf + 8, L[0]); +} + + +/* Called after key has been set. Sets up L table. */ +void _gcry_cipher_ocb_setkey (gcry_cipher_hd_t c) +{ + unsigned char ktop[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + u64 L[2]; + int i; + + /* L_star = E(zero_128) */ + memset (ktop, 0, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, c->u_mode.ocb.L_star, ktop); + burn = nburn > burn ? nburn : burn; + /* L_dollar = double(L_star) */ + L[1] = buf_get_be64 (c->u_mode.ocb.L_star); + L[0] = buf_get_be64 (c->u_mode.ocb.L_star + 8); + double_block (L); + buf_put_be64 (c->u_mode.ocb.L_dollar + 0, L[1]); + buf_put_be64 (c->u_mode.ocb.L_dollar + 8, L[0]); + /* L_0 = double(L_dollar), ... */ + double_block (L); + buf_put_be64 (c->u_mode.ocb.L[0] + 0, L[1]); + buf_put_be64 (c->u_mode.ocb.L[0] + 8, L[0]); + for (i = 1; i < OCB_L_TABLE_SIZE; i++) + { + double_block (L); + buf_put_be64 (c->u_mode.ocb.L[i] + 0, L[1]); + buf_put_be64 (c->u_mode.ocb.L[i] + 8, L[0]); + } + /* Precalculated offset L0+L1 */ + cipher_block_xor (c->u_mode.ocb.L0L1, + c->u_mode.ocb.L[0], c->u_mode.ocb.L[1], OCB_BLOCK_LEN); + + /* Cleanup */ + wipememory (ktop, sizeof ktop); + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); +} + + +/* Set the nonce for OCB. This requires that the key has been set. + Using it again resets start a new encryption cycle using the same + key. */ +gcry_err_code_t +_gcry_cipher_ocb_set_nonce (gcry_cipher_hd_t c, const unsigned char *nonce, + size_t noncelen) +{ + unsigned char ktop[OCB_BLOCK_LEN]; + unsigned char stretch[OCB_BLOCK_LEN + 8]; + unsigned int bottom; + unsigned int burn = 0; + unsigned int nburn; + + /* Check args. */ + if (!c->marks.key) + return GPG_ERR_INV_STATE; /* Key must have been set first. */ + switch (c->u_mode.ocb.taglen) + { + case 8: + case 12: + case 16: + break; + default: + return GPG_ERR_BUG; /* Invalid tag length. */ + } + + if (c->spec->blocksize != OCB_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (!nonce) + return GPG_ERR_INV_ARG; + /* 120 bit is the allowed maximum. In addition we impose a minimum + of 64 bit. */ + if (noncelen > (120/8) || noncelen < (64/8) || noncelen >= OCB_BLOCK_LEN) + return GPG_ERR_INV_LENGTH; + + /* Prepare the nonce. */ + memset (ktop, 0, OCB_BLOCK_LEN); + buf_cpy (ktop + (OCB_BLOCK_LEN - noncelen), nonce, noncelen); + ktop[0] = ((c->u_mode.ocb.taglen * 8) % 128) << 1; + ktop[OCB_BLOCK_LEN - noncelen - 1] |= 1; + bottom = ktop[OCB_BLOCK_LEN - 1] & 0x3f; + ktop[OCB_BLOCK_LEN - 1] &= 0xc0; /* Zero the bottom bits. */ + nburn = c->spec->encrypt (&c->context.c, ktop, ktop); + burn = nburn > burn ? nburn : burn; + /* Stretch = Ktop || (Ktop[1..64] xor Ktop[9..72]) */ + cipher_block_cpy (stretch, ktop, OCB_BLOCK_LEN); + cipher_block_xor (stretch + OCB_BLOCK_LEN, ktop, ktop + 1, 8); + /* Offset_0 = Stretch[1+bottom..128+bottom] + (We use the IV field to store the offset) */ + bit_copy (c->u_iv.iv, stretch, bottom); + c->marks.iv = 1; + + /* Checksum_0 = zeros(128) + (We use the CTR field to store the checksum) */ + memset (c->u_ctr.ctr, 0, OCB_BLOCK_LEN); + + /* Clear AAD buffer. */ + memset (c->u_mode.ocb.aad_offset, 0, OCB_BLOCK_LEN); + memset (c->u_mode.ocb.aad_sum, 0, OCB_BLOCK_LEN); + + /* Setup other values. */ + memset (c->lastiv, 0, sizeof(c->lastiv)); + c->unused = 0; + c->marks.tag = 0; + c->marks.finalize = 0; + c->u_mode.ocb.data_nblocks = 0; + c->u_mode.ocb.aad_nblocks = 0; + c->u_mode.ocb.aad_nleftover = 0; + c->u_mode.ocb.data_finalized = 0; + c->u_mode.ocb.aad_finalized = 0; + + /* log_printhex ("L_* ", c->u_mode.ocb.L_star, OCB_BLOCK_LEN); */ + /* log_printhex ("L_$ ", c->u_mode.ocb.L_dollar, OCB_BLOCK_LEN); */ + /* log_printhex ("L_0 ", c->u_mode.ocb.L[0], OCB_BLOCK_LEN); */ + /* log_printhex ("L_1 ", c->u_mode.ocb.L[1], OCB_BLOCK_LEN); */ + /* log_debug ( "bottom : %u (decimal)\n", bottom); */ + /* log_printhex ("Ktop ", ktop, OCB_BLOCK_LEN); */ + /* log_printhex ("Stretch ", stretch, sizeof stretch); */ + /* log_printhex ("Offset_0 ", c->u_iv.iv, OCB_BLOCK_LEN); */ + + /* Cleanup */ + wipememory (ktop, sizeof ktop); + wipememory (stretch, sizeof stretch); + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; +} + + +/* Process additional authentication data. This implementation allows + to add additional authentication data at any time before the final + gcry_cipher_gettag. */ +gcry_err_code_t +_gcry_cipher_ocb_authenticate (gcry_cipher_hd_t c, const unsigned char *abuf, + size_t abuflen) +{ + const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE; + const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1); + unsigned char l_tmp[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + size_t n; + + /* Check that a nonce and thus a key has been set and that we have + not yet computed the tag. We also return an error if the aad has + been finalized (i.e. a short block has been processed). */ + if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized) + return GPG_ERR_INV_STATE; + + /* Check correct usage and arguments. */ + if (c->spec->blocksize != OCB_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + + /* Process remaining data from the last call first. */ + if (c->u_mode.ocb.aad_nleftover) + { + n = abuflen; + if (n > OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover) + n = OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover; + + buf_cpy (&c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover], + abuf, n); + c->u_mode.ocb.aad_nleftover += n; + abuf += n; + abuflen -= n; + + if (c->u_mode.ocb.aad_nleftover == OCB_BLOCK_LEN) + { + c->u_mode.ocb.aad_nblocks++; + + if ((c->u_mode.ocb.aad_nblocks % table_maxblks) == 0) + { + /* Table overflow, L needs to be generated. */ + ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks + 1, l_tmp); + } + else + { + cipher_block_cpy (l_tmp, ocb_get_l (c, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); + } + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, + c->u_mode.ocb.aad_leftover, OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + c->u_mode.ocb.aad_nleftover = 0; + } + } + + if (!abuflen) + { + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; + } + + /* Full blocks handling. */ + while (abuflen >= OCB_BLOCK_LEN) + { + size_t nblks = abuflen / OCB_BLOCK_LEN; + size_t nmaxblks; + + /* Check how many blocks to process till table overflow. */ + nmaxblks = (c->u_mode.ocb.aad_nblocks + 1) % table_maxblks; + nmaxblks = (table_maxblks - nmaxblks) % table_maxblks; + + if (nmaxblks == 0) + { + /* Table overflow, generate L and process one block. */ + c->u_mode.ocb.aad_nblocks++; + ocb_get_L_big(c, c->u_mode.ocb.aad_nblocks, l_tmp); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, l_tmp, OCB_BLOCK_LEN); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, + OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + abuf += OCB_BLOCK_LEN; + abuflen -= OCB_BLOCK_LEN; + nblks--; + + /* With overflow handled, retry loop again. Next overflow will + * happen after 65535 blocks. */ + continue; + } + + nblks = nblks < nmaxblks ? nblks : nmaxblks; + + /* Use a bulk method if available. */ + if (nblks && c->bulk.ocb_auth) + { + size_t nleft; + size_t ndone; + + nleft = c->bulk.ocb_auth (c, abuf, nblks); + ndone = nblks - nleft; + + abuf += ndone * OCB_BLOCK_LEN; + abuflen -= ndone * OCB_BLOCK_LEN; + nblks = nleft; + } + + /* Hash all full blocks. */ + while (nblks) + { + c->u_mode.ocb.aad_nblocks++; + + gcry_assert(c->u_mode.ocb.aad_nblocks & table_size_mask); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, + ocb_get_l (c, c->u_mode.ocb.aad_nblocks), + OCB_BLOCK_LEN); + /* Sum_i = Sum_{i-1} xor ENCIPHER(K, A_i xor Offset_i) */ + cipher_block_xor (l_tmp, c->u_mode.ocb.aad_offset, abuf, + OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + abuf += OCB_BLOCK_LEN; + abuflen -= OCB_BLOCK_LEN; + nblks--; + } + } + + /* Store away the remaining data. */ + if (abuflen) + { + n = abuflen; + if (n > OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover) + n = OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover; + + buf_cpy (&c->u_mode.ocb.aad_leftover[c->u_mode.ocb.aad_nleftover], + abuf, n); + c->u_mode.ocb.aad_nleftover += n; + abuf += n; + abuflen -= n; + } + + gcry_assert (!abuflen); + + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; +} + + +/* Hash final partial AAD block. */ +static void +ocb_aad_finalize (gcry_cipher_hd_t c) +{ + unsigned char l_tmp[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + + /* Check that a nonce and thus a key has been set and that we have + not yet computed the tag. We also skip this if the aad has been + finalized. */ + if (!c->marks.iv || c->marks.tag || c->u_mode.ocb.aad_finalized) + return; + if (c->spec->blocksize != OCB_BLOCK_LEN) + return; /* Ooops. */ + + /* Hash final partial block if any. */ + if (c->u_mode.ocb.aad_nleftover) + { + /* Offset_* = Offset_m xor L_* */ + cipher_block_xor_1 (c->u_mode.ocb.aad_offset, + c->u_mode.ocb.L_star, OCB_BLOCK_LEN); + /* CipherInput = (A_* || 1 || zeros(127-bitlen(A_*))) xor Offset_* */ + buf_cpy (l_tmp, c->u_mode.ocb.aad_leftover, c->u_mode.ocb.aad_nleftover); + memset (l_tmp + c->u_mode.ocb.aad_nleftover, 0, + OCB_BLOCK_LEN - c->u_mode.ocb.aad_nleftover); + l_tmp[c->u_mode.ocb.aad_nleftover] = 0x80; + cipher_block_xor_1 (l_tmp, c->u_mode.ocb.aad_offset, OCB_BLOCK_LEN); + /* Sum = Sum_m xor ENCIPHER(K, CipherInput) */ + nburn = c->spec->encrypt (&c->context.c, l_tmp, l_tmp); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_1 (c->u_mode.ocb.aad_sum, l_tmp, OCB_BLOCK_LEN); + + c->u_mode.ocb.aad_nleftover = 0; + } + + /* Mark AAD as finalized so that gcry_cipher_ocb_authenticate can + * return an erro when called again. */ + c->u_mode.ocb.aad_finalized = 1; + + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); +} + + + +/* Checksumming for encrypt and decrypt. */ +static void +ocb_checksum (unsigned char *chksum, const unsigned char *plainbuf, + size_t nblks) +{ + while (nblks > 0) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + cipher_block_xor_1(chksum, plainbuf, OCB_BLOCK_LEN); + + plainbuf += OCB_BLOCK_LEN; + nblks--; + } +} + + +/* Common code for encrypt and decrypt. */ +static gcry_err_code_t +ocb_crypt (gcry_cipher_hd_t c, int encrypt, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + const size_t table_maxblks = 1 << OCB_L_TABLE_SIZE; + const u32 table_size_mask = ((1 << OCB_L_TABLE_SIZE) - 1); + unsigned char l_tmp[OCB_BLOCK_LEN]; + unsigned int burn = 0; + unsigned int nburn; + gcry_cipher_encrypt_t crypt_fn = + encrypt ? c->spec->encrypt : c->spec->decrypt; + + /* Check that a nonce and thus a key has been set and that we are + not yet in end of data state. */ + if (!c->marks.iv || c->u_mode.ocb.data_finalized) + return GPG_ERR_INV_STATE; + + /* Check correct usage and arguments. */ + if (c->spec->blocksize != OCB_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.finalize) + ; /* Allow arbitarty length. */ + else if ((inbuflen % OCB_BLOCK_LEN)) + return GPG_ERR_INV_LENGTH; /* We support only full blocks for now. */ + + /* Full blocks handling. */ + while (inbuflen >= OCB_BLOCK_LEN) + { + size_t nblks = inbuflen / OCB_BLOCK_LEN; + size_t nmaxblks; + + /* Check how many blocks to process till table overflow. */ + nmaxblks = (c->u_mode.ocb.data_nblocks + 1) % table_maxblks; + nmaxblks = (table_maxblks - nmaxblks) % table_maxblks; + + if (nmaxblks == 0) + { + /* Table overflow, generate L and process one block. */ + c->u_mode.ocb.data_nblocks++; + ocb_get_L_big(c, c->u_mode.ocb.data_nblocks, l_tmp); + + if (encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, inbuf, 1); + } + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + cipher_block_xor_1 (c->u_iv.iv, l_tmp, OCB_BLOCK_LEN); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + + if (!encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, outbuf, 1); + } + + inbuf += OCB_BLOCK_LEN; + inbuflen -= OCB_BLOCK_LEN; + outbuf += OCB_BLOCK_LEN; + outbuflen =- OCB_BLOCK_LEN; + nblks--; + + /* With overflow handled, retry loop again. Next overflow will + * happen after 65535 blocks. */ + continue; + } + + nblks = nblks < nmaxblks ? nblks : nmaxblks; + + /* Since checksum xoring is done before/after encryption/decryption, + process input in 24KiB chunks to keep data loaded in L1 cache for + checksumming. */ + if (nblks > 24 * 1024 / OCB_BLOCK_LEN) + nblks = 24 * 1024 / OCB_BLOCK_LEN; + + /* Use a bulk method if available. */ + if (nblks && c->bulk.ocb_crypt) + { + size_t nleft; + size_t ndone; + + nleft = c->bulk.ocb_crypt (c, outbuf, inbuf, nblks, encrypt); + ndone = nblks - nleft; + + inbuf += ndone * OCB_BLOCK_LEN; + outbuf += ndone * OCB_BLOCK_LEN; + inbuflen -= ndone * OCB_BLOCK_LEN; + outbuflen -= ndone * OCB_BLOCK_LEN; + nblks = nleft; + } + + if (nblks) + { + size_t nblks_chksum = nblks; + + if (encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, inbuf, nblks_chksum); + } + + /* Encrypt all full blocks. */ + while (nblks) + { + c->u_mode.ocb.data_nblocks++; + + gcry_assert(c->u_mode.ocb.data_nblocks & table_size_mask); + + /* Offset_i = Offset_{i-1} xor L_{ntz(i)} */ + cipher_block_xor_1 (c->u_iv.iv, + ocb_get_l (c, c->u_mode.ocb.data_nblocks), + OCB_BLOCK_LEN); + /* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */ + cipher_block_xor (outbuf, c->u_iv.iv, inbuf, OCB_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + cipher_block_xor_1 (outbuf, c->u_iv.iv, OCB_BLOCK_LEN); + + inbuf += OCB_BLOCK_LEN; + inbuflen -= OCB_BLOCK_LEN; + outbuf += OCB_BLOCK_LEN; + outbuflen =- OCB_BLOCK_LEN; + nblks--; + } + + if (!encrypt) + { + /* Checksum_i = Checksum_{i-1} xor P_i */ + ocb_checksum (c->u_ctr.ctr, + outbuf - nblks_chksum * OCB_BLOCK_LEN, + nblks_chksum); + } + } + } + + /* Encrypt final partial block. Note that we expect INBUFLEN to be + shorter than OCB_BLOCK_LEN (see above). */ + if (inbuflen) + { + unsigned char pad[OCB_BLOCK_LEN]; + + /* Offset_* = Offset_m xor L_* */ + cipher_block_xor_1 (c->u_iv.iv, c->u_mode.ocb.L_star, OCB_BLOCK_LEN); + /* Pad = ENCIPHER(K, Offset_*) */ + nburn = c->spec->encrypt (&c->context.c, pad, c->u_iv.iv); + burn = nburn > burn ? nburn : burn; + + if (encrypt) + { + /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ + /* Note that INBUFLEN is less than OCB_BLOCK_LEN. */ + buf_cpy (l_tmp, inbuf, inbuflen); + memset (l_tmp + inbuflen, 0, OCB_BLOCK_LEN - inbuflen); + l_tmp[inbuflen] = 0x80; + cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); + /* C_* = P_* xor Pad[1..bitlen(P_*)] */ + buf_xor (outbuf, inbuf, pad, inbuflen); + } + else + { + /* P_* = C_* xor Pad[1..bitlen(C_*)] */ + /* Checksum_* = Checksum_m xor (P_* || 1 || zeros(127-bitlen(P_*))) */ + cipher_block_cpy (l_tmp, pad, OCB_BLOCK_LEN); + buf_cpy (l_tmp, inbuf, inbuflen); + cipher_block_xor_1 (l_tmp, pad, OCB_BLOCK_LEN); + l_tmp[inbuflen] = 0x80; + buf_cpy (outbuf, l_tmp, inbuflen); + + cipher_block_xor_1 (c->u_ctr.ctr, l_tmp, OCB_BLOCK_LEN); + } + } + + /* Compute the tag if the finalize flag has been set. */ + if (c->marks.finalize) + { + /* Tag = ENCIPHER(K, Checksum xor Offset xor L_$) xor HASH(K,A) */ + cipher_block_xor (c->u_mode.ocb.tag, c->u_ctr.ctr, c->u_iv.iv, + OCB_BLOCK_LEN); + cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.L_dollar, + OCB_BLOCK_LEN); + nburn = c->spec->encrypt (&c->context.c, + c->u_mode.ocb.tag, c->u_mode.ocb.tag); + burn = nburn > burn ? nburn : burn; + + c->u_mode.ocb.data_finalized = 1; + /* Note that the the final part of the tag computation is done + by _gcry_cipher_ocb_get_tag. */ + } + + if (burn > 0) + _gcry_burn_stack (burn + 4*sizeof(void*)); + + return 0; +} + + +/* Encrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF. OUTBUFLEN gives + the allocated size of OUTBUF. This function accepts only multiples + of a full block unless gcry_cipher_final has been called in which + case the next block may have any length. */ +gcry_err_code_t +_gcry_cipher_ocb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) + +{ + return ocb_crypt (c, 1, outbuf, outbuflen, inbuf, inbuflen); +} + + +/* Decrypt (INBUF,INBUFLEN) in OCB mode to OUTBUF. OUTBUFLEN gives + the allocated size of OUTBUF. This function accepts only multiples + of a full block unless gcry_cipher_final has been called in which + case the next block may have any length. */ +gcry_err_code_t +_gcry_cipher_ocb_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return ocb_crypt (c, 0, outbuf, outbuflen, inbuf, inbuflen); +} + + +/* Compute the tag. The last data operation has already done some + part of it. To allow adding AAD even after having done all data, + we finish the tag computation only here. */ +static void +compute_tag_if_needed (gcry_cipher_hd_t c) +{ + if (!c->marks.tag) + { + ocb_aad_finalize (c); + cipher_block_xor_1 (c->u_mode.ocb.tag, c->u_mode.ocb.aad_sum, + OCB_BLOCK_LEN); + c->marks.tag = 1; + } +} + + +/* Copy the already computed tag to OUTTAG. OUTTAGSIZE is the + allocated size of OUTTAG; the function returns an error if that is + too short to hold the tag. */ +gcry_err_code_t +_gcry_cipher_ocb_get_tag (gcry_cipher_hd_t c, + unsigned char *outtag, size_t outtagsize) +{ + if (c->u_mode.ocb.taglen > outtagsize) + return GPG_ERR_BUFFER_TOO_SHORT; + if (!c->u_mode.ocb.data_finalized) + return GPG_ERR_INV_STATE; /* Data has not yet been finalized. */ + + compute_tag_if_needed (c); + + memcpy (outtag, c->u_mode.ocb.tag, c->u_mode.ocb.taglen); + + return 0; +} + + +/* Check that the tag (INTAG,TAGLEN) matches the computed tag for the + handle C. */ +gcry_err_code_t +_gcry_cipher_ocb_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + size_t n; + + if (!c->u_mode.ocb.data_finalized) + return GPG_ERR_INV_STATE; /* Data has not yet been finalized. */ + + compute_tag_if_needed (c); + + n = c->u_mode.ocb.taglen; + if (taglen < n) + n = taglen; + + if (!buf_eq_const (intag, c->u_mode.ocb.tag, n) + || c->u_mode.ocb.taglen != taglen) + return GPG_ERR_CHECKSUM; + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-ofb.c b/comm/third_party/libgcrypt/cipher/cipher-ofb.c new file mode 100644 index 0000000000..09db397e65 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-ofb.c @@ -0,0 +1,108 @@ +/* cipher-ofb.c - Generic OFB mode implementation + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +gcry_err_code_t +_gcry_cipher_ofb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + unsigned char *ivp; + gcry_cipher_encrypt_t enc_fn = c->spec->encrypt; + size_t blocksize_shift = _gcry_blocksize_shift(c); + size_t blocksize = 1 << blocksize_shift; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + + if ( inbuflen <= c->unused ) + { + /* Short enough to be encoded by the remaining XOR mask. */ + /* XOR the input with the IV */ + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor(outbuf, ivp, inbuf, inbuflen); + c->unused -= inbuflen; + return 0; + } + + burn = 0; + + if( c->unused ) + { + inbuflen -= c->unused; + ivp = c->u_iv.iv + blocksize - c->unused; + buf_xor(outbuf, ivp, inbuf, c->unused); + outbuf += c->unused; + inbuf += c->unused; + c->unused = 0; + } + + /* Now we can process complete blocks. */ + if (c->bulk.ofb_enc) + { + size_t nblocks = inbuflen >> blocksize_shift; + c->bulk.ofb_enc (&c->context.c, c->u_iv.iv, outbuf, inbuf, nblocks); + outbuf += nblocks << blocksize_shift; + inbuf += nblocks << blocksize_shift; + inbuflen -= nblocks << blocksize_shift; + } + else + { + while ( inbuflen >= blocksize ) + { + /* Encrypt the IV (and save the current one). */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + cipher_block_xor(outbuf, c->u_iv.iv, inbuf, blocksize); + outbuf += blocksize; + inbuf += blocksize; + inbuflen -= blocksize; + } + } + + if ( inbuflen ) + { /* process the remaining bytes */ + nburn = enc_fn ( &c->context.c, c->u_iv.iv, c->u_iv.iv ); + burn = nburn > burn ? nburn : burn; + c->unused = blocksize; + c->unused -= inbuflen; + buf_xor(outbuf, c->u_iv.iv, inbuf, inbuflen); + outbuf += inbuflen; + inbuf += inbuflen; + inbuflen = 0; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-poly1305.c b/comm/third_party/libgcrypt/cipher/cipher-poly1305.c new file mode 100644 index 0000000000..bb475236b8 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-poly1305.c @@ -0,0 +1,375 @@ +/* cipher-poly1305.c - Poly1305 based AEAD cipher mode, RFC-8439 + * Copyright (C) 2014 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" +#include "./poly1305-internal.h" + + +static inline int +poly1305_bytecounter_add (u32 ctr[2], size_t add) +{ + int overflow = 0; + + if (sizeof(add) > sizeof(u32)) + { + u32 high_add = ((add >> 31) >> 1) & 0xffffffff; + ctr[1] += high_add; + if (ctr[1] < high_add) + overflow = 1; + } + + ctr[0] += add; + if (ctr[0] >= add) + return overflow; + + ctr[1] += 1; + return (ctr[1] < 1) || overflow; +} + + +static void +poly1305_fill_bytecounts (gcry_cipher_hd_t c) +{ + u32 lenbuf[4]; + + lenbuf[0] = le_bswap32(c->u_mode.poly1305.aadcount[0]); + lenbuf[1] = le_bswap32(c->u_mode.poly1305.aadcount[1]); + lenbuf[2] = le_bswap32(c->u_mode.poly1305.datacount[0]); + lenbuf[3] = le_bswap32(c->u_mode.poly1305.datacount[1]); + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, (byte*)lenbuf, + sizeof(lenbuf)); + + wipememory(lenbuf, sizeof(lenbuf)); +} + + +static void +poly1305_do_padding (gcry_cipher_hd_t c, u32 ctr[2]) +{ + static const byte zero_padding_buf[15] = {}; + u32 padding_count; + + /* Padding to 16 byte boundary. */ + if (ctr[0] % 16 > 0) + { + padding_count = 16 - ctr[0] % 16; + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, zero_padding_buf, + padding_count); + } +} + + +static void +poly1305_aad_finish (gcry_cipher_hd_t c) +{ + /* After AAD, feed padding bytes so we get 16 byte alignment. */ + poly1305_do_padding (c, c->u_mode.poly1305.aadcount); + + /* Start of encryption marks end of AAD stream. */ + c->u_mode.poly1305.aad_finalized = 1; + + c->u_mode.poly1305.datacount[0] = 0; + c->u_mode.poly1305.datacount[1] = 0; +} + + +static gcry_err_code_t +poly1305_set_zeroiv (gcry_cipher_hd_t c) +{ + byte zero[8] = { 0, }; + + return _gcry_cipher_poly1305_setiv (c, zero, sizeof(zero)); +} + + +gcry_err_code_t +_gcry_cipher_poly1305_authenticate (gcry_cipher_hd_t c, + const byte * aadbuf, size_t aadbuflen) +{ + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + if (c->u_mode.poly1305.aad_finalized) + return GPG_ERR_INV_STATE; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + + if (!c->marks.iv) + poly1305_set_zeroiv(c); + + if (poly1305_bytecounter_add(c->u_mode.poly1305.aadcount, aadbuflen)) + { + c->u_mode.poly1305.bytecount_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, aadbuf, aadbuflen); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_encrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.iv) + { + err = poly1305_set_zeroiv(c); + if (err) + return err; + } + + if (!c->u_mode.poly1305.aad_finalized) + poly1305_aad_finish(c); + + if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen)) + { + c->u_mode.poly1305.bytecount_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + if (LIKELY(inbuflen > 0) && LIKELY(c->spec->algo == GCRY_CIPHER_CHACHA20)) + { + return _gcry_chacha20_poly1305_encrypt (c, outbuf, inbuf, inbuflen); + } + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Since checksumming is done after encryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for checksumming. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + c->spec->stencrypt(&c->context.c, outbuf, (byte*)inbuf, currlen); + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, outbuf, currlen); + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_decrypt (gcry_cipher_hd_t c, + byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t err; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->marks.tag) + return GPG_ERR_INV_STATE; + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.iv) + { + err = poly1305_set_zeroiv(c); + if (err) + return err; + } + + if (!c->u_mode.poly1305.aad_finalized) + poly1305_aad_finish(c); + + if (poly1305_bytecounter_add(c->u_mode.poly1305.datacount, inbuflen)) + { + c->u_mode.poly1305.bytecount_over_limits = 1; + return GPG_ERR_INV_LENGTH; + } + + if (LIKELY(inbuflen > 0) && LIKELY(c->spec->algo == GCRY_CIPHER_CHACHA20)) + { + return _gcry_chacha20_poly1305_decrypt (c, outbuf, inbuf, inbuflen); + } + + while (inbuflen) + { + size_t currlen = inbuflen; + + /* Since checksumming is done before decryption, process input in 24KiB + * chunks to keep data loaded in L1 cache for decryption. */ + if (currlen > 24 * 1024) + currlen = 24 * 1024; + + _gcry_poly1305_update (&c->u_mode.poly1305.ctx, inbuf, currlen); + + c->spec->stdecrypt(&c->context.c, outbuf, (byte*)inbuf, currlen); + + outbuf += currlen; + inbuf += currlen; + outbuflen -= currlen; + inbuflen -= currlen; + } + + return 0; +} + + +static gcry_err_code_t +_gcry_cipher_poly1305_tag (gcry_cipher_hd_t c, + byte * outbuf, size_t outbuflen, int check) +{ + gcry_err_code_t err; + + if (outbuflen < POLY1305_TAGLEN) + return GPG_ERR_BUFFER_TOO_SHORT; + if (c->u_mode.poly1305.bytecount_over_limits) + return GPG_ERR_INV_LENGTH; + + if (!c->marks.iv) + { + err = poly1305_set_zeroiv(c); + if (err) + return err; + } + + if (!c->u_mode.poly1305.aad_finalized) + poly1305_aad_finish(c); + + if (!c->marks.tag) + { + /* After data, feed padding bytes so we get 16 byte alignment. */ + poly1305_do_padding (c, c->u_mode.poly1305.datacount); + + /* Write byte counts to poly1305. */ + poly1305_fill_bytecounts(c); + + _gcry_poly1305_finish(&c->u_mode.poly1305.ctx, c->u_iv.iv); + + c->marks.tag = 1; + } + + if (!check) + { + memcpy (outbuf, c->u_iv.iv, POLY1305_TAGLEN); + } + else + { + /* OUTBUFLEN gives the length of the user supplied tag in OUTBUF + * and thus we need to compare its length first. */ + if (outbuflen != POLY1305_TAGLEN + || !buf_eq_const (outbuf, c->u_iv.iv, POLY1305_TAGLEN)) + return GPG_ERR_CHECKSUM; + } + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_get_tag (gcry_cipher_hd_t c, unsigned char *outtag, + size_t taglen) +{ + return _gcry_cipher_poly1305_tag (c, outtag, taglen, 0); +} + +gcry_err_code_t +_gcry_cipher_poly1305_check_tag (gcry_cipher_hd_t c, const unsigned char *intag, + size_t taglen) +{ + return _gcry_cipher_poly1305_tag (c, (unsigned char *) intag, taglen, 1); +} + + +void +_gcry_cipher_poly1305_setkey (gcry_cipher_hd_t c) +{ + c->u_mode.poly1305.aadcount[0] = 0; + c->u_mode.poly1305.aadcount[1] = 0; + + c->u_mode.poly1305.datacount[0] = 0; + c->u_mode.poly1305.datacount[1] = 0; + + c->u_mode.poly1305.bytecount_over_limits = 0; + c->u_mode.poly1305.aad_finalized = 0; + c->marks.tag = 0; + c->marks.iv = 0; +} + + +gcry_err_code_t +_gcry_cipher_poly1305_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + byte tmpbuf[64]; /* size of ChaCha20 block */ + gcry_err_code_t err; + + /* IV must be 96-bits */ + if (!iv && ivlen != (96 / 8)) + return GPG_ERR_INV_ARG; + + memset(&c->u_mode.poly1305.ctx, 0, sizeof(c->u_mode.poly1305.ctx)); + + c->u_mode.poly1305.aadcount[0] = 0; + c->u_mode.poly1305.aadcount[1] = 0; + + c->u_mode.poly1305.datacount[0] = 0; + c->u_mode.poly1305.datacount[1] = 0; + + c->u_mode.poly1305.bytecount_over_limits = 0; + c->u_mode.poly1305.aad_finalized = 0; + c->marks.tag = 0; + c->marks.iv = 0; + + /* Set up IV for stream cipher. */ + c->spec->setiv (&c->context.c, iv, ivlen); + + /* Get the first block from ChaCha20. */ + memset(tmpbuf, 0, sizeof(tmpbuf)); + c->spec->stencrypt(&c->context.c, tmpbuf, tmpbuf, sizeof(tmpbuf)); + + /* Use the first 32-bytes as Poly1305 key. */ + err = _gcry_poly1305_init (&c->u_mode.poly1305.ctx, tmpbuf, POLY1305_KEYLEN); + + wipememory(tmpbuf, sizeof(tmpbuf)); + + if (err) + return err; + + c->marks.iv = 1; + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-selftest.c b/comm/third_party/libgcrypt/cipher/cipher-selftest.c new file mode 100644 index 0000000000..d7f38a4261 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-selftest.c @@ -0,0 +1,512 @@ +/* cipher-selftest.c - Helper functions for bulk encryption selftests. + * Copyright (C) 2013,2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#ifdef HAVE_SYSLOG +# include +#endif /*HAVE_SYSLOG*/ + +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-selftest.h" +#include "cipher-internal.h" + +#ifdef HAVE_STDINT_H +# include /* uintptr_t */ +#elif defined(HAVE_INTTYPES_H) +# include +#else +/* In this case, uintptr_t is provided by config.h. */ +#endif + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + + +/* Return an allocated buffers of size CONTEXT_SIZE with an alignment + of 16. The caller must free that buffer using the address returned + at R_MEM. Returns NULL and sets ERRNO on failure. */ +void * +_gcry_cipher_selftest_alloc_ctx (const int context_size, unsigned char **r_mem) +{ + int offs; + unsigned int ctx_aligned_size, memsize; + + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + 16; + + *r_mem = xtrycalloc (1, memsize); + if (!*r_mem) + return NULL; + + offs = (16 - ((uintptr_t)*r_mem & 15)) & 15; + return (void*)(*r_mem + offs); +} + + +/* Run the self-tests for -CBC-, tests bulk CBC + decryption. Returns NULL on success. */ +const char * +_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey_func, + gcry_cipher_encrypt_t encrypt_one, + const int nblocks, const int blocksize, + const int context_size) +{ + cipher_bulk_ops_t bulk_ops = { 0, }; + int i, offs; + unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem; + unsigned int ctx_aligned_size, memsize; + + static const unsigned char key[16] ATTR_ALIGNED_16 = { + 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22 + }; + + /* Allocate buffers, align first two elements to 16 bytes and latter to + block size. */ + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16; + + mem = xtrycalloc (1, memsize); + if (!mem) + return "failed to allocate memory"; + + offs = (16 - ((uintptr_t)mem & 15)) & 15; + ctx = (void*)(mem + offs); + iv = ctx + ctx_aligned_size; + iv2 = iv + blocksize; + plaintext = iv2 + blocksize; + plaintext2 = plaintext + nblocks * blocksize; + ciphertext = plaintext2 + nblocks * blocksize; + + /* Initialize ctx */ + if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR) + { + xfree(mem); + return "setkey failed"; + } + + /* Test single block code path */ + memset (iv, 0x4e, blocksize); + memset (iv2, 0x4e, blocksize); + for (i = 0; i < blocksize; i++) + plaintext[i] = i; + + /* CBC manually. */ + buf_xor (ciphertext, iv, plaintext, blocksize); + encrypt_one (ctx, ciphertext, ciphertext); + memcpy (iv, ciphertext, blocksize); + + /* CBC decrypt. */ + bulk_ops.cbc_dec (ctx, iv2, plaintext2, ciphertext, 1); + if (memcmp (plaintext2, plaintext, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (plaintext mismatch)", cipher, + blocksize * 8); +#else + (void)cipher; /* Not used. */ +#endif + return "selftest for CBC failed - see syslog for details"; + } + + if (memcmp (iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (IV mismatch)", cipher, blocksize * 8); +#endif + return "selftest for CBC failed - see syslog for details"; + } + + /* Test parallelized code paths */ + memset (iv, 0x5f, blocksize); + memset (iv2, 0x5f, blocksize); + + for (i = 0; i < nblocks * blocksize; i++) + plaintext[i] = i; + + /* Create CBC ciphertext manually. */ + for (i = 0; i < nblocks * blocksize; i+=blocksize) + { + buf_xor (&ciphertext[i], iv, &plaintext[i], blocksize); + encrypt_one (ctx, &ciphertext[i], &ciphertext[i]); + memcpy (iv, &ciphertext[i], blocksize); + } + + /* Decrypt using bulk CBC and compare result. */ + bulk_ops.cbc_dec (ctx, iv2, plaintext2, ciphertext, nblocks); + + if (memcmp (plaintext2, plaintext, nblocks * blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (plaintext mismatch, parallel path)", + cipher, blocksize * 8); +#endif + return "selftest for CBC failed - see syslog for details"; + } + if (memcmp (iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CBC-%d test failed (IV mismatch, parallel path)", + cipher, blocksize * 8); +#endif + return "selftest for CBC failed - see syslog for details"; + } + + xfree (mem); + return NULL; +} + +/* Run the self-tests for -CFB-, tests bulk CFB + decryption. Returns NULL on success. */ +const char * +_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey_func, + gcry_cipher_encrypt_t encrypt_one, + const int nblocks, const int blocksize, + const int context_size) +{ + cipher_bulk_ops_t bulk_ops = { 0, }; + int i, offs; + unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *iv, *iv2, *mem; + unsigned int ctx_aligned_size, memsize; + + static const unsigned char key[16] ATTR_ALIGNED_16 = { + 0x11,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x33 + }; + + /* Allocate buffers, align first two elements to 16 bytes and latter to + block size. */ + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 3) + 16; + + mem = xtrycalloc (1, memsize); + if (!mem) + return "failed to allocate memory"; + + offs = (16 - ((uintptr_t)mem & 15)) & 15; + ctx = (void*)(mem + offs); + iv = ctx + ctx_aligned_size; + iv2 = iv + blocksize; + plaintext = iv2 + blocksize; + plaintext2 = plaintext + nblocks * blocksize; + ciphertext = plaintext2 + nblocks * blocksize; + + /* Initialize ctx */ + if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR) + { + xfree(mem); + return "setkey failed"; + } + + /* Test single block code path */ + memset(iv, 0xd3, blocksize); + memset(iv2, 0xd3, blocksize); + for (i = 0; i < blocksize; i++) + plaintext[i] = i; + + /* CFB manually. */ + encrypt_one (ctx, ciphertext, iv); + buf_xor_2dst (iv, ciphertext, plaintext, blocksize); + + /* CFB decrypt. */ + bulk_ops.cfb_dec (ctx, iv2, plaintext2, ciphertext, 1); + if (memcmp(plaintext2, plaintext, blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (plaintext mismatch)", cipher, + blocksize * 8); +#else + (void)cipher; /* Not used. */ +#endif + return "selftest for CFB failed - see syslog for details"; + } + + if (memcmp(iv2, iv, blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (IV mismatch)", cipher, blocksize * 8); +#endif + return "selftest for CFB failed - see syslog for details"; + } + + /* Test parallelized code paths */ + memset(iv, 0xe6, blocksize); + memset(iv2, 0xe6, blocksize); + + for (i = 0; i < nblocks * blocksize; i++) + plaintext[i] = i; + + /* Create CFB ciphertext manually. */ + for (i = 0; i < nblocks * blocksize; i+=blocksize) + { + encrypt_one (ctx, &ciphertext[i], iv); + buf_xor_2dst (iv, &ciphertext[i], &plaintext[i], blocksize); + } + + /* Decrypt using bulk CBC and compare result. */ + bulk_ops.cfb_dec (ctx, iv2, plaintext2, ciphertext, nblocks); + + if (memcmp(plaintext2, plaintext, nblocks * blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (plaintext mismatch, parallel path)", + cipher, blocksize * 8); +#endif + return "selftest for CFB failed - see syslog for details"; + } + if (memcmp(iv2, iv, blocksize)) + { + xfree(mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CFB-%d test failed (IV mismatch, parallel path)", cipher, + blocksize * 8); +#endif + return "selftest for CFB failed - see syslog for details"; + } + + xfree(mem); + return NULL; +} + +/* Run the self-tests for -CTR-, tests IV increment + of bulk CTR encryption. Returns NULL on success. */ +const char * +_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey_func, + gcry_cipher_encrypt_t encrypt_one, + const int nblocks, const int blocksize, + const int context_size) +{ + cipher_bulk_ops_t bulk_ops = { 0, }; + int i, j, offs, diff; + unsigned char *ctx, *plaintext, *plaintext2, *ciphertext, *ciphertext2, + *iv, *iv2, *mem; + unsigned int ctx_aligned_size, memsize; + + static const unsigned char key[16] ATTR_ALIGNED_16 = { + 0x06,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x21 + }; + + /* Allocate buffers, align first two elements to 16 bytes and latter to + block size. */ + ctx_aligned_size = context_size + 15; + ctx_aligned_size -= ctx_aligned_size & 0xf; + + memsize = ctx_aligned_size + (blocksize * 2) + (blocksize * nblocks * 4) + 16; + + mem = xtrycalloc (1, memsize); + if (!mem) + return "failed to allocate memory"; + + offs = (16 - ((uintptr_t)mem & 15)) & 15; + ctx = (void*)(mem + offs); + iv = ctx + ctx_aligned_size; + iv2 = iv + blocksize; + plaintext = iv2 + blocksize; + plaintext2 = plaintext + nblocks * blocksize; + ciphertext = plaintext2 + nblocks * blocksize; + ciphertext2 = ciphertext + nblocks * blocksize; + + /* Initialize ctx */ + if (setkey_func (ctx, key, sizeof(key), &bulk_ops) != GPG_ERR_NO_ERROR) + { + xfree(mem); + return "setkey failed"; + } + + /* Test single block code path */ + memset (iv, 0xff, blocksize); + for (i = 0; i < blocksize; i++) + plaintext[i] = i; + + /* CTR manually. */ + encrypt_one (ctx, ciphertext, iv); + for (i = 0; i < blocksize; i++) + ciphertext[i] ^= plaintext[i]; + for (i = blocksize; i > 0; i--) + { + iv[i-1]++; + if (iv[i-1]) + break; + } + + memset (iv2, 0xff, blocksize); + bulk_ops.ctr_enc (ctx, iv2, plaintext2, ciphertext, 1); + + if (memcmp (plaintext2, plaintext, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (plaintext mismatch)", cipher, + blocksize * 8); +#else + (void)cipher; /* Not used. */ +#endif + return "selftest for CTR failed - see syslog for details"; + } + + if (memcmp (iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (IV mismatch)", cipher, + blocksize * 8); +#endif + return "selftest for CTR failed - see syslog for details"; + } + + /* Test bulk encryption with typical IV. */ + memset(iv, 0x57, blocksize-4); + iv[blocksize-1] = 1; + iv[blocksize-2] = 0; + iv[blocksize-3] = 0; + iv[blocksize-4] = 0; + memset(iv2, 0x57, blocksize-4); + iv2[blocksize-1] = 1; + iv2[blocksize-2] = 0; + iv2[blocksize-3] = 0; + iv2[blocksize-4] = 0; + + for (i = 0; i < blocksize * nblocks; i++) + plaintext2[i] = plaintext[i] = i; + + /* Create CTR ciphertext manually. */ + for (i = 0; i < blocksize * nblocks; i+=blocksize) + { + encrypt_one (ctx, &ciphertext[i], iv); + for (j = 0; j < blocksize; j++) + ciphertext[i+j] ^= plaintext[i+j]; + for (j = blocksize; j > 0; j--) + { + iv[j-1]++; + if (iv[j-1]) + break; + } + } + + bulk_ops.ctr_enc (ctx, iv2, ciphertext2, plaintext2, nblocks); + + if (memcmp (ciphertext2, ciphertext, blocksize * nblocks)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (ciphertext mismatch, bulk)", cipher, + blocksize * 8); +#endif + return "selftest for CTR failed - see syslog for details"; + } + if (memcmp(iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (IV mismatch, bulk)", cipher, + blocksize * 8); +#endif + return "selftest for CTR failed - see syslog for details"; + } + + /* Test parallelized code paths (check counter overflow handling) */ + for (diff = 0; diff < nblocks; diff++) { + memset(iv, 0xff, blocksize); + iv[blocksize-1] -= diff; + iv[0] = iv[1] = 0; + iv[2] = 0x07; + + for (i = 0; i < blocksize * nblocks; i++) + plaintext[i] = i; + + /* Create CTR ciphertext manually. */ + for (i = 0; i < blocksize * nblocks; i+=blocksize) + { + encrypt_one (ctx, &ciphertext[i], iv); + for (j = 0; j < blocksize; j++) + ciphertext[i+j] ^= plaintext[i+j]; + for (j = blocksize; j > 0; j--) + { + iv[j-1]++; + if (iv[j-1]) + break; + } + } + + /* Decrypt using bulk CTR and compare result. */ + memset(iv2, 0xff, blocksize); + iv2[blocksize-1] -= diff; + iv2[0] = iv2[1] = 0; + iv2[2] = 0x07; + + bulk_ops.ctr_enc (ctx, iv2, plaintext2, ciphertext, nblocks); + + if (memcmp (plaintext2, plaintext, blocksize * nblocks)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (plaintext mismatch, diff: %d)", cipher, + blocksize * 8, diff); +#endif + return "selftest for CTR failed - see syslog for details"; + } + if (memcmp(iv2, iv, blocksize)) + { + xfree (mem); +#ifdef HAVE_SYSLOG + syslog (LOG_USER|LOG_WARNING, "Libgcrypt warning: " + "%s-CTR-%d test failed (IV mismatch, diff: %d)", cipher, + blocksize * 8, diff); +#endif + return "selftest for CTR failed - see syslog for details"; + } + } + + xfree (mem); + return NULL; +} diff --git a/comm/third_party/libgcrypt/cipher/cipher-selftest.h b/comm/third_party/libgcrypt/cipher/cipher-selftest.h new file mode 100644 index 0000000000..c3090ad122 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-selftest.h @@ -0,0 +1,69 @@ +/* cipher-selftest.h - Helper functions for bulk encryption selftests. + * Copyright (C) 2013,2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef G10_SELFTEST_HELP_H +#define G10_SELFTEST_HELP_H + +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" + +typedef void (*gcry_cipher_bulk_cbc_dec_t)(void *context, unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks); + +typedef void (*gcry_cipher_bulk_cfb_dec_t)(void *context, unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks); + +typedef void (*gcry_cipher_bulk_ctr_enc_t)(void *context, unsigned char *iv, + void *outbuf_arg, + const void *inbuf_arg, + size_t nblocks); + +/* Helper function to allocate an aligned context for selftests. */ +void *_gcry_cipher_selftest_alloc_ctx (const int context_size, + unsigned char **r_mem); + + +/* Helper function for bulk CBC decryption selftest */ +const char * +_gcry_selftest_helper_cbc (const char *cipher, gcry_cipher_setkey_t setkey, + gcry_cipher_encrypt_t encrypt_one, + const int nblocks, const int blocksize, + const int context_size); + +/* Helper function for bulk CFB decryption selftest */ +const char * +_gcry_selftest_helper_cfb (const char *cipher, gcry_cipher_setkey_t setkey, + gcry_cipher_encrypt_t encrypt_one, + const int nblocks, const int blocksize, + const int context_size); + +/* Helper function for bulk CTR encryption selftest */ +const char * +_gcry_selftest_helper_ctr (const char *cipher, gcry_cipher_setkey_t setkey, + gcry_cipher_encrypt_t encrypt_one, + const int nblocks, const int blocksize, + const int context_size); + +#endif /*G10_SELFTEST_HELP_H*/ diff --git a/comm/third_party/libgcrypt/cipher/cipher-xts.c b/comm/third_party/libgcrypt/cipher/cipher-xts.c new file mode 100644 index 0000000000..0522a271a1 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher-xts.c @@ -0,0 +1,189 @@ +/* cipher-xts.c - XTS mode implementation + * Copyright (C) 2017 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "./cipher-internal.h" + + +static inline void xts_gfmul_byA (unsigned char *out, const unsigned char *in) +{ + u64 hi = buf_get_le64 (in + 8); + u64 lo = buf_get_le64 (in + 0); + u64 carry = -(hi >> 63) & 0x87; + + hi = (hi << 1) + (lo >> 63); + lo = (lo << 1) ^ carry; + + buf_put_le64 (out + 8, hi); + buf_put_le64 (out + 0, lo); +} + + +static inline void xts_inc128 (unsigned char *seqno) +{ + u64 lo = buf_get_le64 (seqno + 0); + u64 hi = buf_get_le64 (seqno + 8); + + hi += !(++lo); + + buf_put_le64 (seqno + 0, lo); + buf_put_le64 (seqno + 8, hi); +} + + +gcry_err_code_t +_gcry_cipher_xts_crypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen, + int encrypt) +{ + gcry_cipher_encrypt_t tweak_fn = c->spec->encrypt; + gcry_cipher_encrypt_t crypt_fn = + encrypt ? c->spec->encrypt : c->spec->decrypt; + union + { + cipher_context_alignment_t xcx; + byte x1[GCRY_XTS_BLOCK_LEN]; + u64 x64[GCRY_XTS_BLOCK_LEN / sizeof(u64)]; + } tmp; + unsigned int burn, nburn; + size_t nblocks; + + if (c->spec->blocksize != GCRY_XTS_BLOCK_LEN) + return GPG_ERR_CIPHER_ALGO; + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if (inbuflen < GCRY_XTS_BLOCK_LEN) + return GPG_ERR_BUFFER_TOO_SHORT; + + /* Data-unit max length: 2^20 blocks. */ + if (inbuflen > GCRY_XTS_BLOCK_LEN << 20) + return GPG_ERR_INV_LENGTH; + + nblocks = inbuflen / GCRY_XTS_BLOCK_LEN; + nblocks -= !encrypt && (inbuflen % GCRY_XTS_BLOCK_LEN) != 0; + + /* Generate first tweak value. */ + burn = tweak_fn (c->u_mode.xts.tweak_context, c->u_ctr.ctr, c->u_iv.iv); + + /* Use a bulk method if available. */ + if (nblocks && c->bulk.xts_crypt) + { + c->bulk.xts_crypt (&c->context.c, c->u_ctr.ctr, outbuf, inbuf, nblocks, + encrypt); + inbuf += nblocks * GCRY_XTS_BLOCK_LEN; + outbuf += nblocks * GCRY_XTS_BLOCK_LEN; + inbuflen -= nblocks * GCRY_XTS_BLOCK_LEN; + nblocks = 0; + } + + /* If we don't have a bulk method use the standard method. We also + use this method for the a remaining partial block. */ + + while (nblocks) + { + /* Xor-Encrypt/Decrypt-Xor block. */ + cipher_block_xor (tmp.x64, inbuf, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); + burn = nburn > burn ? nburn : burn; + cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + + outbuf += GCRY_XTS_BLOCK_LEN; + inbuf += GCRY_XTS_BLOCK_LEN; + inbuflen -= GCRY_XTS_BLOCK_LEN; + nblocks--; + + /* Generate next tweak. */ + xts_gfmul_byA (c->u_ctr.ctr, c->u_ctr.ctr); + } + + /* Handle remaining data with ciphertext stealing. */ + if (inbuflen) + { + if (!encrypt) + { + gcry_assert (inbuflen > GCRY_XTS_BLOCK_LEN); + gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN * 2); + + /* Generate last tweak. */ + xts_gfmul_byA (tmp.x1, c->u_ctr.ctr); + + /* Decrypt last block first. */ + cipher_block_xor (outbuf, inbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, outbuf, outbuf); + burn = nburn > burn ? nburn : burn; + cipher_block_xor (outbuf, outbuf, tmp.x64, GCRY_XTS_BLOCK_LEN); + + inbuflen -= GCRY_XTS_BLOCK_LEN; + inbuf += GCRY_XTS_BLOCK_LEN; + outbuf += GCRY_XTS_BLOCK_LEN; + } + + gcry_assert (inbuflen < GCRY_XTS_BLOCK_LEN); + outbuf -= GCRY_XTS_BLOCK_LEN; + + /* Steal ciphertext from previous block. */ + cipher_block_cpy (tmp.x64, outbuf, GCRY_XTS_BLOCK_LEN); + buf_cpy (tmp.x64, inbuf, inbuflen); + buf_cpy (outbuf + GCRY_XTS_BLOCK_LEN, outbuf, inbuflen); + + /* Decrypt/Encrypt last block. */ + cipher_block_xor (tmp.x64, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + nburn = crypt_fn (&c->context.c, tmp.x1, tmp.x1); + burn = nburn > burn ? nburn : burn; + cipher_block_xor (outbuf, tmp.x64, c->u_ctr.ctr, GCRY_XTS_BLOCK_LEN); + } + + /* Auto-increment data-unit sequence number */ + xts_inc128 (c->u_iv.iv); + + wipememory (&tmp, sizeof(tmp)); + wipememory (c->u_ctr.ctr, sizeof(c->u_ctr.ctr)); + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_xts_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 1); +} + + +gcry_err_code_t +_gcry_cipher_xts_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return _gcry_cipher_xts_crypt (c, outbuf, outbuflen, inbuf, inbuflen, 0); +} diff --git a/comm/third_party/libgcrypt/cipher/cipher.c b/comm/third_party/libgcrypt/cipher/cipher.c new file mode 100644 index 0000000000..1039dff728 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/cipher.c @@ -0,0 +1,1767 @@ +/* cipher.c - cipher dispatcher + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003 + * 2005, 2007, 2008, 2009, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "../src/gcrypt-testapi.h" +#include "cipher.h" +#include "./cipher-internal.h" + + +/* This is the list of the default ciphers, which are included in + libgcrypt. */ +static gcry_cipher_spec_t * const cipher_list[] = + { +#if USE_BLOWFISH + &_gcry_cipher_spec_blowfish, +#endif +#if USE_DES + &_gcry_cipher_spec_des, + &_gcry_cipher_spec_tripledes, +#endif +#if USE_ARCFOUR + &_gcry_cipher_spec_arcfour, +#endif +#if USE_CAST5 + &_gcry_cipher_spec_cast5, +#endif +#if USE_AES + &_gcry_cipher_spec_aes, + &_gcry_cipher_spec_aes192, + &_gcry_cipher_spec_aes256, +#endif +#if USE_TWOFISH + &_gcry_cipher_spec_twofish, + &_gcry_cipher_spec_twofish128, +#endif +#if USE_SERPENT + &_gcry_cipher_spec_serpent128, + &_gcry_cipher_spec_serpent192, + &_gcry_cipher_spec_serpent256, +#endif +#if USE_RFC2268 + &_gcry_cipher_spec_rfc2268_40, + &_gcry_cipher_spec_rfc2268_128, +#endif +#if USE_SEED + &_gcry_cipher_spec_seed, +#endif +#if USE_CAMELLIA + &_gcry_cipher_spec_camellia128, + &_gcry_cipher_spec_camellia192, + &_gcry_cipher_spec_camellia256, +#endif +#ifdef USE_IDEA + &_gcry_cipher_spec_idea, +#endif +#if USE_SALSA20 + &_gcry_cipher_spec_salsa20, + &_gcry_cipher_spec_salsa20r12, +#endif +#if USE_GOST28147 + &_gcry_cipher_spec_gost28147, + &_gcry_cipher_spec_gost28147_mesh, +#endif +#if USE_CHACHA20 + &_gcry_cipher_spec_chacha20, +#endif +#if USE_SM4 + &_gcry_cipher_spec_sm4, +#endif + NULL + }; + +/* Cipher implementations starting with index 0 (enum gcry_cipher_algos) */ +static gcry_cipher_spec_t * const cipher_list_algo0[] = + { + NULL, /* GCRY_CIPHER_NONE */ +#ifdef USE_IDEA + &_gcry_cipher_spec_idea, +#else + NULL, +#endif +#if USE_DES + &_gcry_cipher_spec_tripledes, +#else + NULL, +#endif +#if USE_CAST5 + &_gcry_cipher_spec_cast5, +#else + NULL, +#endif +#if USE_BLOWFISH + &_gcry_cipher_spec_blowfish, +#else + NULL, +#endif + NULL, /* GCRY_CIPHER_SAFER_SK128 */ + NULL, /* GCRY_CIPHER_DES_SK */ +#if USE_AES + &_gcry_cipher_spec_aes, + &_gcry_cipher_spec_aes192, + &_gcry_cipher_spec_aes256, +#else + NULL, + NULL, + NULL, +#endif +#if USE_TWOFISH + &_gcry_cipher_spec_twofish +#else + NULL +#endif + }; + +/* Cipher implementations starting with index 301 (enum gcry_cipher_algos) */ +static gcry_cipher_spec_t * const cipher_list_algo301[] = + { +#if USE_ARCFOUR + &_gcry_cipher_spec_arcfour, +#else + NULL, +#endif +#if USE_DES + &_gcry_cipher_spec_des, +#else + NULL, +#endif +#if USE_TWOFISH + &_gcry_cipher_spec_twofish128, +#else + NULL, +#endif +#if USE_SERPENT + &_gcry_cipher_spec_serpent128, + &_gcry_cipher_spec_serpent192, + &_gcry_cipher_spec_serpent256, +#else + NULL, + NULL, + NULL, +#endif +#if USE_RFC2268 + &_gcry_cipher_spec_rfc2268_40, + &_gcry_cipher_spec_rfc2268_128, +#else + NULL, + NULL, +#endif +#if USE_SEED + &_gcry_cipher_spec_seed, +#else + NULL, +#endif +#if USE_CAMELLIA + &_gcry_cipher_spec_camellia128, + &_gcry_cipher_spec_camellia192, + &_gcry_cipher_spec_camellia256, +#else + NULL, + NULL, + NULL, +#endif +#if USE_SALSA20 + &_gcry_cipher_spec_salsa20, + &_gcry_cipher_spec_salsa20r12, +#else + NULL, + NULL, +#endif +#if USE_GOST28147 + &_gcry_cipher_spec_gost28147, +#else + NULL, +#endif +#if USE_CHACHA20 + &_gcry_cipher_spec_chacha20, +#else + NULL, +#endif +#if USE_GOST28147 + &_gcry_cipher_spec_gost28147_mesh, +#else + NULL, +#endif +#if USE_SM4 + &_gcry_cipher_spec_sm4, +#else + NULL, +#endif + }; + + +static void _gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode); + + +static int +map_algo (int algo) +{ + return algo; +} + + +/* Return the spec structure for the cipher algorithm ALGO. For + an unknown algorithm NULL is returned. */ +static gcry_cipher_spec_t * +spec_from_algo (int algo) +{ + gcry_cipher_spec_t *spec = NULL; + + algo = map_algo (algo); + + if (algo >= 0 && algo < DIM(cipher_list_algo0)) + spec = cipher_list_algo0[algo]; + else if (algo >= 301 && algo < 301 + DIM(cipher_list_algo301)) + spec = cipher_list_algo301[algo - 301]; + + if (spec) + gcry_assert (spec->algo == algo); + + return spec; +} + + +/* Lookup a cipher's spec by its name. */ +static gcry_cipher_spec_t * +spec_from_name (const char *name) +{ + gcry_cipher_spec_t *spec; + int idx; + const char **aliases; + + for (idx=0; (spec = cipher_list[idx]); idx++) + { + if (!stricmp (name, spec->name)) + return spec; + if (spec->aliases) + { + for (aliases = spec->aliases; *aliases; aliases++) + if (!stricmp (name, *aliases)) + return spec; + } + } + + return NULL; +} + + +/* Lookup a cipher's spec by its OID. */ +static gcry_cipher_spec_t * +spec_from_oid (const char *oid) +{ + gcry_cipher_spec_t *spec; + gcry_cipher_oid_spec_t *oid_specs; + int idx, j; + + for (idx=0; (spec = cipher_list[idx]); idx++) + { + oid_specs = spec->oids; + if (oid_specs) + { + for (j = 0; oid_specs[j].oid; j++) + if (!stricmp (oid, oid_specs[j].oid)) + return spec; + } + } + + return NULL; +} + + +/* Locate the OID in the oid table and return the spec or NULL if not + found. An optional "oid." or "OID." prefix in OID is ignored, the + OID is expected to be in standard IETF dotted notation. A pointer + to the OID specification of the module implementing this algorithm + is return in OID_SPEC unless passed as NULL.*/ +static gcry_cipher_spec_t * +search_oid (const char *oid, gcry_cipher_oid_spec_t *oid_spec) +{ + gcry_cipher_spec_t *spec; + int i; + + if (!oid) + return NULL; + + if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4)) + oid += 4; + + spec = spec_from_oid (oid); + if (spec && spec->oids) + { + for (i = 0; spec->oids[i].oid; i++) + if (!stricmp (oid, spec->oids[i].oid)) + { + if (oid_spec) + *oid_spec = spec->oids[i]; + return spec; + } + } + + return NULL; +} + + +/* Map STRING to the cipher algorithm identifier. Returns the + algorithm ID of the cipher for the given name or 0 if the name is + not known. It is valid to pass NULL for STRING which results in a + return value of 0. */ +int +_gcry_cipher_map_name (const char *string) +{ + gcry_cipher_spec_t *spec; + + if (!string) + return 0; + + /* If the string starts with a digit (optionally prefixed with + either "OID." or "oid."), we first look into our table of ASN.1 + object identifiers to figure out the algorithm */ + + spec = search_oid (string, NULL); + if (spec) + return spec->algo; + + spec = spec_from_name (string); + if (spec) + return spec->algo; + + return 0; +} + + +/* Given a STRING with an OID in dotted decimal notation, this + function returns the cipher mode (GCRY_CIPHER_MODE_*) associated + with that OID or 0 if no mode is known. Passing NULL for string + yields a return value of 0. */ +int +_gcry_cipher_mode_from_oid (const char *string) +{ + gcry_cipher_spec_t *spec; + gcry_cipher_oid_spec_t oid_spec; + + if (!string) + return 0; + + spec = search_oid (string, &oid_spec); + if (spec) + return oid_spec.mode; + + return 0; +} + + +/* Map the cipher algorithm identifier ALGORITHM to a string + representing this algorithm. This string is the default name as + used by Libgcrypt. A "?" is returned for an unknown algorithm. + NULL is never returned. */ +const char * +_gcry_cipher_algo_name (int algorithm) +{ + gcry_cipher_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec? spec->name : "?"; +} + + +/* Flag the cipher algorithm with the identifier ALGORITHM as + disabled. There is no error return, the function does nothing for + unknown algorithms. Disabled algorithms are virtually not + available in Libgcrypt. This is not thread safe and should thus be + called early. */ +static void +disable_cipher_algo (int algo) +{ + gcry_cipher_spec_t *spec = spec_from_algo (algo); + + if (spec) + spec->flags.disabled = 1; +} + + +/* Return 0 if the cipher algorithm with identifier ALGORITHM is + available. Returns a basic error code value if it is not + available. */ +static gcry_err_code_t +check_cipher_algo (int algorithm) +{ + gcry_cipher_spec_t *spec; + + spec = spec_from_algo (algorithm); + if (spec && !spec->flags.disabled) + return 0; + + return GPG_ERR_CIPHER_ALGO; +} + + +/* Return the standard length in bits of the key for the cipher + algorithm with the identifier ALGORITHM. */ +static unsigned int +cipher_get_keylen (int algorithm) +{ + gcry_cipher_spec_t *spec; + unsigned len = 0; + + spec = spec_from_algo (algorithm); + if (spec) + { + len = spec->keylen; + if (!len) + log_bug ("cipher %d w/o key length\n", algorithm); + } + + return len; +} + + +/* Return the block length of the cipher algorithm with the identifier + ALGORITHM. This function return 0 for an invalid algorithm. */ +static unsigned int +cipher_get_blocksize (int algorithm) +{ + gcry_cipher_spec_t *spec; + unsigned len = 0; + + spec = spec_from_algo (algorithm); + if (spec) + { + len = spec->blocksize; + if (!len) + log_bug ("cipher %d w/o blocksize\n", algorithm); + } + + return len; +} + + +/* + Open a cipher handle for use with cipher algorithm ALGORITHM, using + the cipher mode MODE (one of the GCRY_CIPHER_MODE_*) and return a + handle in HANDLE. Put NULL into HANDLE and return an error code if + something goes wrong. FLAGS may be used to modify the + operation. The defined flags are: + + GCRY_CIPHER_SECURE: allocate all internal buffers in secure memory. + GCRY_CIPHER_ENABLE_SYNC: Enable the sync operation as used in OpenPGP. + GCRY_CIPHER_CBC_CTS: Enable CTS mode. + GCRY_CIPHER_CBC_MAC: Enable MAC mode. + + Values for these flags may be combined using OR. + */ +gcry_err_code_t +_gcry_cipher_open (gcry_cipher_hd_t *handle, + int algo, int mode, unsigned int flags) +{ + gcry_err_code_t rc; + gcry_cipher_hd_t h = NULL; + + if (mode >= GCRY_CIPHER_MODE_INTERNAL) + rc = GPG_ERR_INV_CIPHER_MODE; + else + rc = _gcry_cipher_open_internal (&h, algo, mode, flags); + + *handle = rc ? NULL : h; + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_open_internal (gcry_cipher_hd_t *handle, + int algo, int mode, unsigned int flags) +{ + int secure = (flags & GCRY_CIPHER_SECURE); + gcry_cipher_spec_t *spec; + gcry_cipher_hd_t h = NULL; + gcry_err_code_t err; + + /* If the application missed to call the random poll function, we do + it here to ensure that it is used once in a while. */ + _gcry_fast_random_poll (); + + spec = spec_from_algo (algo); + if (!spec) + err = GPG_ERR_CIPHER_ALGO; + else if (spec->flags.disabled) + err = GPG_ERR_CIPHER_ALGO; + else + err = 0; + + /* check flags */ + if ((! err) + && ((flags & ~(0 + | GCRY_CIPHER_SECURE + | GCRY_CIPHER_ENABLE_SYNC + | GCRY_CIPHER_CBC_CTS + | GCRY_CIPHER_CBC_MAC)) + || ((flags & GCRY_CIPHER_CBC_CTS) && (flags & GCRY_CIPHER_CBC_MAC)))) + err = GPG_ERR_CIPHER_ALGO; + + /* check that a valid mode has been requested */ + if (! err) + switch (mode) + { + case GCRY_CIPHER_MODE_CCM: + if (spec->blocksize != GCRY_CCM_BLOCK_LEN) + err = GPG_ERR_INV_CIPHER_MODE; + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_XTS: + if (spec->blocksize != GCRY_XTS_BLOCK_LEN) + err = GPG_ERR_INV_CIPHER_MODE; + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_ECB: + case GCRY_CIPHER_MODE_CBC: + case GCRY_CIPHER_MODE_CFB: + case GCRY_CIPHER_MODE_CFB8: + case GCRY_CIPHER_MODE_OFB: + case GCRY_CIPHER_MODE_CTR: + case GCRY_CIPHER_MODE_AESWRAP: + case GCRY_CIPHER_MODE_CMAC: + case GCRY_CIPHER_MODE_EAX: + case GCRY_CIPHER_MODE_GCM: + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_POLY1305: + if (!spec->stencrypt || !spec->stdecrypt || !spec->setiv) + err = GPG_ERR_INV_CIPHER_MODE; + else if (spec->algo != GCRY_CIPHER_CHACHA20) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_OCB: + /* Note that our implementation allows only for 128 bit block + length algorithms. Lower block lengths would be possible + but we do not implement them because they limit the + security too much. */ + if (!spec->encrypt || !spec->decrypt) + err = GPG_ERR_INV_CIPHER_MODE; + else if (spec->blocksize != (128/8)) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_STREAM: + if (!spec->stencrypt || !spec->stdecrypt) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_NONE: + /* This mode may be used for debugging. It copies the main + text verbatim to the ciphertext. We do not allow this in + fips mode or if no debug flag has been set. */ + if (fips_mode () || !_gcry_get_debug_flag (0)) + err = GPG_ERR_INV_CIPHER_MODE; + break; + + default: + err = GPG_ERR_INV_CIPHER_MODE; + } + + /* Perform selftest here and mark this with a flag in cipher_table? + No, we should not do this as it takes too long. Further it does + not make sense to exclude algorithms with failing selftests at + runtime: If a selftest fails there is something seriously wrong + with the system and thus we better die immediately. */ + + if (! err) + { + size_t size = (sizeof (*h) + + 2 * spec->contextsize + - sizeof (cipher_context_alignment_t) +#ifdef NEED_16BYTE_ALIGNED_CONTEXT + + 15 /* Space for leading alignment gap. */ +#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/ + ); + + /* Space needed per mode. */ + switch (mode) + { + case GCRY_CIPHER_MODE_XTS: + /* Additional cipher context for tweak. */ + size += 2 * spec->contextsize + 15; + break; + + default: + break; + } + + if (secure) + h = xtrycalloc_secure (1, size); + else + h = xtrycalloc (1, size); + + if (! h) + err = gpg_err_code_from_syserror (); + else + { + size_t off = 0; + char *tc; + +#ifdef NEED_16BYTE_ALIGNED_CONTEXT + if ( ((uintptr_t)h & 0x0f) ) + { + /* The malloced block is not aligned on a 16 byte + boundary. Correct for this. */ + off = 16 - ((uintptr_t)h & 0x0f); + h = (void*)((char*)h + off); + } +#endif /*NEED_16BYTE_ALIGNED_CONTEXT*/ + + h->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL; + h->actual_handle_size = size - off; + h->handle_offset = off; + h->spec = spec; + h->algo = algo; + h->mode = mode; + h->flags = flags; + + /* Setup mode routines. */ + _gcry_cipher_setup_mode_ops(h, mode); + + /* Setup defaults depending on the mode. */ + switch (mode) + { + case GCRY_CIPHER_MODE_OCB: + h->u_mode.ocb.taglen = 16; /* Bytes. */ + break; + + case GCRY_CIPHER_MODE_XTS: + tc = h->context.c + spec->contextsize * 2; + tc += (16 - (uintptr_t)tc % 16) % 16; + h->u_mode.xts.tweak_context = tc; + + break; + + default: + break; + } + } + } + + /* Done. */ + + *handle = err ? NULL : h; + + return err; +} + + +/* Release all resources associated with the cipher handle H. H may be + NULL in which case this is a no-operation. */ +void +_gcry_cipher_close (gcry_cipher_hd_t h) +{ + size_t off; + + if (!h) + return; + + if ((h->magic != CTX_MAGIC_SECURE) + && (h->magic != CTX_MAGIC_NORMAL)) + _gcry_fatal_error(GPG_ERR_INTERNAL, + "gcry_cipher_close: already closed/invalid handle"); + else + h->magic = 0; + + /* We always want to wipe out the memory even when the context has + been allocated in secure memory. The user might have disabled + secure memory or is using his own implementation which does not + do the wiping. To accomplish this we need to keep track of the + actual size of this structure because we have no way to known + how large the allocated area was when using a standard malloc. */ + off = h->handle_offset; + wipememory (h, h->actual_handle_size); + + xfree ((char*)h - off); +} + + +/* Set the key to be used for the encryption context C to KEY with + length KEYLEN. The length should match the required length. */ +static gcry_err_code_t +cipher_setkey (gcry_cipher_hd_t c, byte *key, size_t keylen) +{ + gcry_err_code_t rc; + + if (c->mode == GCRY_CIPHER_MODE_XTS) + { + /* XTS uses two keys. */ + if (keylen % 2) + return GPG_ERR_INV_KEYLEN; + keylen /= 2; + + if (fips_mode ()) + { + /* Reject key if subkeys Key_1 and Key_2 are equal. + See "Implementation Guidance for FIPS 140-2, A.9 XTS-AES + Key Generation Requirements" for details. */ + if (buf_eq_const (key, key + keylen, keylen)) + return GPG_ERR_WEAK_KEY; + } + } + + rc = c->spec->setkey (&c->context.c, key, keylen, &c->bulk); + if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY)) + { + /* Duplicate initial context. */ + memcpy ((void *) ((char *) &c->context.c + c->spec->contextsize), + (void *) &c->context.c, + c->spec->contextsize); + c->marks.key = 1; + + switch (c->mode) + { + case GCRY_CIPHER_MODE_CMAC: + rc = _gcry_cipher_cmac_set_subkeys (c); + break; + + case GCRY_CIPHER_MODE_EAX: + rc = _gcry_cipher_eax_setkey (c); + break; + + case GCRY_CIPHER_MODE_GCM: + _gcry_cipher_gcm_setkey (c); + break; + + case GCRY_CIPHER_MODE_OCB: + _gcry_cipher_ocb_setkey (c); + break; + + case GCRY_CIPHER_MODE_POLY1305: + _gcry_cipher_poly1305_setkey (c); + break; + + case GCRY_CIPHER_MODE_XTS: + /* Setup tweak cipher with second part of XTS key. */ + rc = c->spec->setkey (c->u_mode.xts.tweak_context, key + keylen, + keylen, &c->bulk); + if (!rc || (c->marks.allow_weak_key && rc == GPG_ERR_WEAK_KEY)) + { + /* Duplicate initial tweak context. */ + memcpy (c->u_mode.xts.tweak_context + c->spec->contextsize, + c->u_mode.xts.tweak_context, c->spec->contextsize); + } + else + c->marks.key = 0; + break; + + default: + break; + }; + } + else + c->marks.key = 0; + + return rc; +} + + +/* Set the IV to be used for the encryption context C to IV with + length IVLEN. The length should match the required length. */ +static gcry_err_code_t +cipher_setiv (gcry_cipher_hd_t c, const byte *iv, size_t ivlen) +{ + /* If the cipher has its own IV handler, we use only this one. This + is currently used for stream ciphers requiring a nonce. */ + if (c->spec->setiv) + { + c->spec->setiv (&c->context.c, iv, ivlen); + return 0; + } + + memset (c->u_iv.iv, 0, c->spec->blocksize); + if (iv) + { + if (ivlen != c->spec->blocksize) + { + log_info ("WARNING: cipher_setiv: ivlen=%u blklen=%u\n", + (unsigned int)ivlen, (unsigned int)c->spec->blocksize); + fips_signal_error ("IV length does not match blocklength"); + } + if (ivlen > c->spec->blocksize) + ivlen = c->spec->blocksize; + memcpy (c->u_iv.iv, iv, ivlen); + c->marks.iv = 1; + } + else + c->marks.iv = 0; + c->unused = 0; + + return 0; +} + + +/* Reset the cipher context to the initial context. This is basically + the same as an release followed by a new. */ +static void +cipher_reset (gcry_cipher_hd_t c) +{ + unsigned int marks_key, marks_allow_weak_key; + + marks_key = c->marks.key; + marks_allow_weak_key = c->marks.allow_weak_key; + + memcpy (&c->context.c, + (char *) &c->context.c + c->spec->contextsize, + c->spec->contextsize); + memset (&c->marks, 0, sizeof c->marks); + memset (c->u_iv.iv, 0, c->spec->blocksize); + memset (c->lastiv, 0, c->spec->blocksize); + memset (c->u_ctr.ctr, 0, c->spec->blocksize); + c->unused = 0; + + c->marks.key = marks_key; + c->marks.allow_weak_key = marks_allow_weak_key; + + switch (c->mode) + { + case GCRY_CIPHER_MODE_CMAC: + _gcry_cmac_reset(&c->u_mode.cmac); + break; + + case GCRY_CIPHER_MODE_EAX: + _gcry_cmac_reset(&c->u_mode.eax.cmac_header); + _gcry_cmac_reset(&c->u_mode.eax.cmac_ciphertext); + break; + + case GCRY_CIPHER_MODE_GCM: + /* Only clear head of u_mode, keep ghash_key and gcm_table. */ + { + byte *u_mode_pos = (void *)&c->u_mode; + byte *ghash_key_pos = c->u_mode.gcm.u_ghash_key.key; + size_t u_mode_head_length = ghash_key_pos - u_mode_pos; + + memset (&c->u_mode, 0, u_mode_head_length); + } + break; + + case GCRY_CIPHER_MODE_POLY1305: + memset (&c->u_mode.poly1305, 0, sizeof c->u_mode.poly1305); + break; + + case GCRY_CIPHER_MODE_CCM: + memset (&c->u_mode.ccm, 0, sizeof c->u_mode.ccm); + break; + + case GCRY_CIPHER_MODE_OCB: + /* Do not clear precalculated L-values */ + { + byte *u_mode_head_pos = (void *)&c->u_mode.ocb; + byte *u_mode_tail_pos = (void *)&c->u_mode.ocb.tag; + size_t u_mode_head_length = u_mode_tail_pos - u_mode_head_pos; + size_t u_mode_tail_length = sizeof(c->u_mode.ocb) - u_mode_head_length; + + memset (u_mode_tail_pos, 0, u_mode_tail_length); + + /* Setup default taglen. */ + c->u_mode.ocb.taglen = 16; + } + break; + + case GCRY_CIPHER_MODE_XTS: + memcpy (c->u_mode.xts.tweak_context, + c->u_mode.xts.tweak_context + c->spec->contextsize, + c->spec->contextsize); + break; + + default: + break; /* u_mode unused by other modes. */ + } +} + + + +static gcry_err_code_t +do_ecb_crypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen, + gcry_cipher_encrypt_t crypt_fn) +{ + unsigned int blocksize = c->spec->blocksize; + size_t n, nblocks; + unsigned int burn, nburn; + + if (outbuflen < inbuflen) + return GPG_ERR_BUFFER_TOO_SHORT; + if ((inbuflen % blocksize)) + return GPG_ERR_INV_LENGTH; + + nblocks = inbuflen / blocksize; + burn = 0; + + for (n=0; n < nblocks; n++ ) + { + nburn = crypt_fn (&c->context.c, outbuf, inbuf); + burn = nburn > burn ? nburn : burn; + inbuf += blocksize; + outbuf += blocksize; + } + + if (burn > 0) + _gcry_burn_stack (burn + 4 * sizeof(void *)); + + return 0; +} + +static gcry_err_code_t +do_ecb_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->encrypt); +} + +static gcry_err_code_t +do_ecb_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + return do_ecb_crypt (c, outbuf, outbuflen, inbuf, inbuflen, c->spec->decrypt); +} + + +static gcry_err_code_t +do_stream_encrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + (void)outbuflen; + c->spec->stencrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen); + return 0; +} + +static gcry_err_code_t +do_stream_decrypt (gcry_cipher_hd_t c, + unsigned char *outbuf, size_t outbuflen, + const unsigned char *inbuf, size_t inbuflen) +{ + (void)outbuflen; + c->spec->stdecrypt (&c->context.c, outbuf, (void *)inbuf, inbuflen); + return 0; +} + + +static gcry_err_code_t +do_encrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t rc; + + (void)outbuflen; + + switch (c->mode) + { + case GCRY_CIPHER_MODE_CMAC: + rc = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_NONE: + if (fips_mode () || !_gcry_get_debug_flag (0)) + { + fips_signal_error ("cipher mode NONE used"); + rc = GPG_ERR_INV_CIPHER_MODE; + } + else + { + if (inbuf != outbuf) + memmove (outbuf, inbuf, inbuflen); + rc = 0; + } + break; + + default: + log_fatal ("cipher_encrypt: invalid mode %d\n", c->mode ); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + +static gcry_err_code_t +do_decrypt_none_unknown (gcry_cipher_hd_t c, byte *outbuf, size_t outbuflen, + const byte *inbuf, size_t inbuflen) +{ + gcry_err_code_t rc; + + (void)outbuflen; + + switch (c->mode) + { + case GCRY_CIPHER_MODE_CMAC: + rc = GPG_ERR_INV_CIPHER_MODE; + break; + + case GCRY_CIPHER_MODE_NONE: + if (fips_mode () || !_gcry_get_debug_flag (0)) + { + fips_signal_error ("cipher mode NONE used"); + rc = GPG_ERR_INV_CIPHER_MODE; + } + else + { + if (inbuf != outbuf) + memmove (outbuf, inbuf, inbuflen); + rc = 0; + } + break; + + default: + log_fatal ("cipher_decrypt: invalid mode %d\n", c->mode ); + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + + return rc; +} + + +/**************** + * Encrypt IN and write it to OUT. If IN is NULL, in-place encryption has + * been requested. + */ +gcry_err_code_t +_gcry_cipher_encrypt (gcry_cipher_hd_t h, void *out, size_t outsize, + const void *in, size_t inlen) +{ + gcry_err_code_t rc; + + if (!in) /* Caller requested in-place encryption. */ + { + in = out; + inlen = outsize; + } + + if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) + { + log_error ("cipher_encrypt: key not set\n"); + return GPG_ERR_MISSING_KEY; + } + + rc = h->mode_ops.encrypt (h, out, outsize, in, inlen); + + /* Failsafe: Make sure that the plaintext will never make it into + OUT if the encryption returned an error. */ + if (rc && out) + memset (out, 0x42, outsize); + + return rc; +} + + +/**************** + * Decrypt IN and write it to OUT. If IN is NULL, in-place encryption has + * been requested. + */ +gcry_err_code_t +_gcry_cipher_decrypt (gcry_cipher_hd_t h, void *out, size_t outsize, + const void *in, size_t inlen) +{ + if (!in) /* Caller requested in-place encryption. */ + { + in = out; + inlen = outsize; + } + + if (h->mode != GCRY_CIPHER_MODE_NONE && !h->marks.key) + { + log_error ("cipher_decrypt: key not set\n"); + return GPG_ERR_MISSING_KEY; + } + + return h->mode_ops.decrypt (h, out, outsize, in, inlen); +} + + +/**************** + * Used for PGP's somewhat strange CFB mode. Only works if + * the corresponding flag is set. + */ +static void +cipher_sync (gcry_cipher_hd_t c) +{ + if ((c->flags & GCRY_CIPHER_ENABLE_SYNC) && c->unused) + { + memmove (c->u_iv.iv + c->unused, + c->u_iv.iv, c->spec->blocksize - c->unused); + memcpy (c->u_iv.iv, + c->lastiv + c->spec->blocksize - c->unused, c->unused); + c->unused = 0; + } +} + + +gcry_err_code_t +_gcry_cipher_setkey (gcry_cipher_hd_t hd, const void *key, size_t keylen) +{ + return cipher_setkey (hd, (void*)key, keylen); +} + + +gcry_err_code_t +_gcry_cipher_setiv (gcry_cipher_hd_t hd, const void *iv, size_t ivlen) +{ + return hd->mode_ops.setiv (hd, iv, ivlen); +} + + +/* Set counter for CTR mode. (CTR,CTRLEN) must denote a buffer of + block size length, or (NULL,0) to set the CTR to the all-zero + block. */ +gpg_err_code_t +_gcry_cipher_setctr (gcry_cipher_hd_t hd, const void *ctr, size_t ctrlen) +{ + if (ctr && ctrlen == hd->spec->blocksize) + { + memcpy (hd->u_ctr.ctr, ctr, hd->spec->blocksize); + hd->unused = 0; + } + else if (!ctr || !ctrlen) + { + memset (hd->u_ctr.ctr, 0, hd->spec->blocksize); + hd->unused = 0; + } + else + return GPG_ERR_INV_ARG; + + return 0; +} + +gpg_err_code_t +_gcry_cipher_getctr (gcry_cipher_hd_t hd, void *ctr, size_t ctrlen) +{ + if (ctr && ctrlen == hd->spec->blocksize) + memcpy (ctr, hd->u_ctr.ctr, hd->spec->blocksize); + else + return GPG_ERR_INV_ARG; + + return 0; +} + + +gcry_err_code_t +_gcry_cipher_authenticate (gcry_cipher_hd_t hd, const void *abuf, + size_t abuflen) +{ + gcry_err_code_t rc; + + if (hd->mode_ops.authenticate) + { + rc = hd->mode_ops.authenticate (hd, abuf, abuflen); + } + else + { + log_error ("gcry_cipher_authenticate: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + } + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_gettag (gcry_cipher_hd_t hd, void *outtag, size_t taglen) +{ + gcry_err_code_t rc; + + if (hd->mode_ops.get_tag) + { + rc = hd->mode_ops.get_tag (hd, outtag, taglen); + } + else + { + log_error ("gcry_cipher_gettag: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + } + + return rc; +} + + +gcry_err_code_t +_gcry_cipher_checktag (gcry_cipher_hd_t hd, const void *intag, size_t taglen) +{ + gcry_err_code_t rc; + + if (hd->mode_ops.check_tag) + { + rc = hd->mode_ops.check_tag (hd, intag, taglen); + } + else + { + log_error ("gcry_cipher_checktag: invalid mode %d\n", hd->mode); + rc = GPG_ERR_INV_CIPHER_MODE; + } + + return rc; +} + + + +static void +_gcry_cipher_setup_mode_ops(gcry_cipher_hd_t c, int mode) +{ + /* Setup encryption and decryption routines. */ + switch (mode) + { + case GCRY_CIPHER_MODE_STREAM: + c->mode_ops.encrypt = do_stream_encrypt; + c->mode_ops.decrypt = do_stream_decrypt; + break; + + case GCRY_CIPHER_MODE_ECB: + c->mode_ops.encrypt = do_ecb_encrypt; + c->mode_ops.decrypt = do_ecb_decrypt; + break; + + case GCRY_CIPHER_MODE_CBC: + if (!(c->flags & GCRY_CIPHER_CBC_CTS)) + { + c->mode_ops.encrypt = _gcry_cipher_cbc_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cbc_decrypt; + } + else + { + c->mode_ops.encrypt = _gcry_cipher_cbc_cts_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cbc_cts_decrypt; + } + break; + + case GCRY_CIPHER_MODE_CFB: + c->mode_ops.encrypt = _gcry_cipher_cfb_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cfb_decrypt; + break; + + case GCRY_CIPHER_MODE_CFB8: + c->mode_ops.encrypt = _gcry_cipher_cfb8_encrypt; + c->mode_ops.decrypt = _gcry_cipher_cfb8_decrypt; + break; + + case GCRY_CIPHER_MODE_OFB: + c->mode_ops.encrypt = _gcry_cipher_ofb_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ofb_encrypt; + break; + + case GCRY_CIPHER_MODE_CTR: + c->mode_ops.encrypt = _gcry_cipher_ctr_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ctr_encrypt; + break; + + case GCRY_CIPHER_MODE_AESWRAP: + c->mode_ops.encrypt = _gcry_cipher_aeswrap_encrypt; + c->mode_ops.decrypt = _gcry_cipher_aeswrap_decrypt; + break; + + case GCRY_CIPHER_MODE_CCM: + c->mode_ops.encrypt = _gcry_cipher_ccm_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ccm_decrypt; + break; + + case GCRY_CIPHER_MODE_EAX: + c->mode_ops.encrypt = _gcry_cipher_eax_encrypt; + c->mode_ops.decrypt = _gcry_cipher_eax_decrypt; + break; + + case GCRY_CIPHER_MODE_GCM: + c->mode_ops.encrypt = _gcry_cipher_gcm_encrypt; + c->mode_ops.decrypt = _gcry_cipher_gcm_decrypt; + break; + + case GCRY_CIPHER_MODE_POLY1305: + c->mode_ops.encrypt = _gcry_cipher_poly1305_encrypt; + c->mode_ops.decrypt = _gcry_cipher_poly1305_decrypt; + break; + + case GCRY_CIPHER_MODE_OCB: + c->mode_ops.encrypt = _gcry_cipher_ocb_encrypt; + c->mode_ops.decrypt = _gcry_cipher_ocb_decrypt; + break; + + case GCRY_CIPHER_MODE_XTS: + c->mode_ops.encrypt = _gcry_cipher_xts_encrypt; + c->mode_ops.decrypt = _gcry_cipher_xts_decrypt; + break; + + default: + c->mode_ops.encrypt = do_encrypt_none_unknown; + c->mode_ops.decrypt = do_decrypt_none_unknown; + break; + } + + /* Setup IV setting routine. */ + switch (mode) + { + case GCRY_CIPHER_MODE_CCM: + c->mode_ops.setiv = _gcry_cipher_ccm_set_nonce; + break; + + case GCRY_CIPHER_MODE_EAX: + c->mode_ops.setiv = _gcry_cipher_eax_set_nonce; + break; + + case GCRY_CIPHER_MODE_GCM: + c->mode_ops.setiv = _gcry_cipher_gcm_setiv; + break; + + case GCRY_CIPHER_MODE_POLY1305: + c->mode_ops.setiv = _gcry_cipher_poly1305_setiv; + break; + + case GCRY_CIPHER_MODE_OCB: + c->mode_ops.setiv = _gcry_cipher_ocb_set_nonce; + break; + + default: + c->mode_ops.setiv = cipher_setiv; + break; + } + + + /* Setup authentication routines for AEAD modes. */ + switch (mode) + { + case GCRY_CIPHER_MODE_CCM: + c->mode_ops.authenticate = _gcry_cipher_ccm_authenticate; + c->mode_ops.get_tag = _gcry_cipher_ccm_get_tag; + c->mode_ops.check_tag = _gcry_cipher_ccm_check_tag; + break; + + case GCRY_CIPHER_MODE_CMAC: + c->mode_ops.authenticate = _gcry_cipher_cmac_authenticate; + c->mode_ops.get_tag = _gcry_cipher_cmac_get_tag; + c->mode_ops.check_tag = _gcry_cipher_cmac_check_tag; + break; + + case GCRY_CIPHER_MODE_EAX: + c->mode_ops.authenticate = _gcry_cipher_eax_authenticate; + c->mode_ops.get_tag = _gcry_cipher_eax_get_tag; + c->mode_ops.check_tag = _gcry_cipher_eax_check_tag; + break; + + case GCRY_CIPHER_MODE_GCM: + c->mode_ops.authenticate = _gcry_cipher_gcm_authenticate; + c->mode_ops.get_tag = _gcry_cipher_gcm_get_tag; + c->mode_ops.check_tag = _gcry_cipher_gcm_check_tag; + break; + + case GCRY_CIPHER_MODE_POLY1305: + c->mode_ops.authenticate = _gcry_cipher_poly1305_authenticate; + c->mode_ops.get_tag = _gcry_cipher_poly1305_get_tag; + c->mode_ops.check_tag = _gcry_cipher_poly1305_check_tag; + break; + + case GCRY_CIPHER_MODE_OCB: + c->mode_ops.authenticate = _gcry_cipher_ocb_authenticate; + c->mode_ops.get_tag = _gcry_cipher_ocb_get_tag; + c->mode_ops.check_tag = _gcry_cipher_ocb_check_tag; + break; + + default: + c->mode_ops.authenticate = NULL; + c->mode_ops.get_tag = NULL; + c->mode_ops.check_tag = NULL; + break; + } +} + + +gcry_err_code_t +_gcry_cipher_ctl (gcry_cipher_hd_t h, int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_RESET: + cipher_reset (h); + break; + + case GCRYCTL_FINALIZE: + if (!h || buffer || buflen) + return GPG_ERR_INV_ARG; + h->marks.finalize = 1; + break; + + case GCRYCTL_CFB_SYNC: + cipher_sync( h ); + break; + + case GCRYCTL_SET_CBC_CTS: + if (buflen) + if (h->flags & GCRY_CIPHER_CBC_MAC) + rc = GPG_ERR_INV_FLAG; + else + h->flags |= GCRY_CIPHER_CBC_CTS; + else + h->flags &= ~GCRY_CIPHER_CBC_CTS; + break; + + case GCRYCTL_SET_CBC_MAC: + if (buflen) + if (h->flags & GCRY_CIPHER_CBC_CTS) + rc = GPG_ERR_INV_FLAG; + else + h->flags |= GCRY_CIPHER_CBC_MAC; + else + h->flags &= ~GCRY_CIPHER_CBC_MAC; + break; + + case GCRYCTL_SET_CCM_LENGTHS: + { + u64 params[3]; + size_t encryptedlen; + size_t aadlen; + size_t authtaglen; + + if (h->mode != GCRY_CIPHER_MODE_CCM) + return GPG_ERR_INV_CIPHER_MODE; + + if (!buffer || buflen != 3 * sizeof(u64)) + return GPG_ERR_INV_ARG; + + /* This command is used to pass additional length parameters needed + by CCM mode to initialize CBC-MAC. */ + memcpy (params, buffer, sizeof(params)); + encryptedlen = params[0]; + aadlen = params[1]; + authtaglen = params[2]; + + rc = _gcry_cipher_ccm_set_lengths (h, encryptedlen, aadlen, authtaglen); + } + break; + + case GCRYCTL_SET_TAGLEN: + if (!h || !buffer || buflen != sizeof(int) ) + return GPG_ERR_INV_ARG; + switch (h->mode) + { + case GCRY_CIPHER_MODE_OCB: + switch (*(int*)buffer) + { + case 8: case 12: case 16: + h->u_mode.ocb.taglen = *(int*)buffer; + break; + default: + rc = GPG_ERR_INV_LENGTH; /* Invalid tag length. */ + break; + } + break; + + default: + rc =GPG_ERR_INV_CIPHER_MODE; + break; + } + break; + + case GCRYCTL_DISABLE_ALGO: + /* This command expects NULL for H and BUFFER to point to an + integer with the algo number. */ + if( h || !buffer || buflen != sizeof(int) ) + return GPG_ERR_CIPHER_ALGO; + disable_cipher_algo( *(int*)buffer ); + break; + + case PRIV_CIPHERCTL_DISABLE_WEAK_KEY: /* (private) */ + if (h->spec->set_extra_info) + rc = h->spec->set_extra_info + (&h->context.c, CIPHER_INFO_NO_WEAK_KEY, NULL, 0); + else + rc = GPG_ERR_NOT_SUPPORTED; + break; + + case PRIV_CIPHERCTL_GET_INPUT_VECTOR: /* (private) */ + /* This is the input block as used in CFB and OFB mode which has + initially been set as IV. The returned format is: + 1 byte Actual length of the block in bytes. + n byte The block. + If the provided buffer is too short, an error is returned. */ + if (buflen < (1 + h->spec->blocksize)) + rc = GPG_ERR_TOO_SHORT; + else + { + unsigned char *ivp; + unsigned char *dst = buffer; + int n = h->unused; + + if (!n) + n = h->spec->blocksize; + gcry_assert (n <= h->spec->blocksize); + *dst++ = n; + ivp = h->u_iv.iv + h->spec->blocksize - n; + while (n--) + *dst++ = *ivp++; + } + break; + + case GCRYCTL_SET_SBOX: + if (h->spec->set_extra_info) + rc = h->spec->set_extra_info + (&h->context.c, GCRYCTL_SET_SBOX, buffer, buflen); + else + rc = GPG_ERR_NOT_SUPPORTED; + break; + + case GCRYCTL_SET_ALLOW_WEAK_KEY: + /* Expecting BUFFER to be NULL and buflen to be on/off flag (0 or 1). */ + if (!h || buffer || buflen > 1) + return GPG_ERR_CIPHER_ALGO; + h->marks.allow_weak_key = buflen ? 1 : 0; + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Return information about the cipher handle H. CMD is the kind of + * information requested. + * + * CMD may be one of: + * + * GCRYCTL_GET_TAGLEN: + * Return the length of the tag for an AE algorithm mode. An + * error is returned for modes which do not support a tag. + * BUFFER must be given as NULL. On success the result is stored + * at NBYTES. The taglen is returned in bytes. + * + * The function returns 0 on success or an error code. + */ +gcry_err_code_t +_gcry_cipher_info (gcry_cipher_hd_t h, int cmd, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_GET_TAGLEN: + if (!h || buffer || !nbytes) + rc = GPG_ERR_INV_ARG; + else + { + switch (h->mode) + { + case GCRY_CIPHER_MODE_OCB: + *nbytes = h->u_mode.ocb.taglen; + break; + + case GCRY_CIPHER_MODE_CCM: + *nbytes = h->u_mode.ccm.authlen; + break; + + case GCRY_CIPHER_MODE_EAX: + *nbytes = h->spec->blocksize; + break; + + case GCRY_CIPHER_MODE_GCM: + *nbytes = GCRY_GCM_BLOCK_LEN; + break; + + case GCRY_CIPHER_MODE_POLY1305: + *nbytes = POLY1305_TAGLEN; + break; + + default: + rc = GPG_ERR_INV_CIPHER_MODE; + break; + } + } + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + +/* Return information about the given cipher algorithm ALGO. + + WHAT select the kind of information returned: + + GCRYCTL_GET_KEYLEN: + Return the length of the key. If the algorithm ALGO + supports multiple key lengths, the maximum supported key length + is returned. The key length is returned as number of octets. + BUFFER and NBYTES must be zero. + + GCRYCTL_GET_BLKLEN: + Return the blocklength of the algorithm ALGO counted in octets. + BUFFER and NBYTES must be zero. + + GCRYCTL_TEST_ALGO: + Returns 0 if the specified algorithm ALGO is available for use. + BUFFER and NBYTES must be zero. + + Note: Because this function is in most cases used to return an + integer value, we can make it easier for the caller to just look at + the return value. The caller will in all cases consult the value + and thereby detecting whether a error occurred or not (i.e. while + checking the block size) + */ +gcry_err_code_t +_gcry_cipher_algo_info (int algo, int what, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + unsigned int ui; + + switch (what) + { + case GCRYCTL_GET_KEYLEN: + if (buffer || (! nbytes)) + rc = GPG_ERR_CIPHER_ALGO; + else + { + ui = cipher_get_keylen (algo); + if ((ui > 0) && (ui <= 512)) + *nbytes = (size_t) ui / 8; + else + /* The only reason for an error is an invalid algo. */ + rc = GPG_ERR_CIPHER_ALGO; + } + break; + + case GCRYCTL_GET_BLKLEN: + if (buffer || (! nbytes)) + rc = GPG_ERR_CIPHER_ALGO; + else + { + ui = cipher_get_blocksize (algo); + if ((ui > 0) && (ui < 10000)) + *nbytes = ui; + else + { + /* The only reason is an invalid algo or a strange + blocksize. */ + rc = GPG_ERR_CIPHER_ALGO; + } + } + break; + + case GCRYCTL_TEST_ALGO: + if (buffer || nbytes) + rc = GPG_ERR_INV_ARG; + else + rc = check_cipher_algo (algo); + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* This function returns length of the key for algorithm ALGO. If the + algorithm supports multiple key lengths, the maximum supported key + length is returned. On error 0 is returned. The key length is + returned as number of octets. + + This is a convenience functions which should be preferred over + gcry_cipher_algo_info because it allows for proper type + checking. */ +size_t +_gcry_cipher_get_algo_keylen (int algo) +{ + size_t n; + + if (_gcry_cipher_algo_info (algo, GCRYCTL_GET_KEYLEN, NULL, &n)) + n = 0; + return n; +} + + +/* This functions returns the blocklength of the algorithm ALGO + counted in octets. On error 0 is returned. + + This is a convenience functions which should be preferred over + gcry_cipher_algo_info because it allows for proper type + checking. */ +size_t +_gcry_cipher_get_algo_blklen (int algo) +{ + size_t n; + + if (_gcry_cipher_algo_info( algo, GCRYCTL_GET_BLKLEN, NULL, &n)) + n = 0; + return n; +} + + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_cipher_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_cipher_spec_t *spec; + + for (idx = 0; (spec = cipher_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +/* Run the selftests for cipher algorithm ALGO with optional reporting + function REPORT. */ +gpg_error_t +_gcry_cipher_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec = 0; + gcry_cipher_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec && !spec->flags.disabled && spec->selftest) + ec = spec->selftest (algo, extended, report); + else + { + ec = GPG_ERR_CIPHER_ALGO; + if (report) + report ("cipher", algo, "module", + (spec && !spec->flags.disabled)? + "no selftest available" : + spec? "algorithm disabled" : "algorithm not found"); + } + + return gpg_error (ec); +} diff --git a/comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S b/comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S new file mode 100644 index 0000000000..060abdfe9a --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/crc-armv8-aarch64-ce.S @@ -0,0 +1,497 @@ +/* crc-armv8-aarch64-ce.S - ARMv8/CE PMULL accelerated CRC implementation + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include "asm-common-aarch64.h" + +#if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) + +.cpu generic+simd+crypto + +.text + + +/* Structure of crc32_consts_s */ + +#define consts_k(idx) ((idx) * 8) +#define consts_my_p(idx) (consts_k(6) + (idx) * 8) + +/* Constants */ + +.align 6 +.Lcrc32_constants: +.Lcrc32_partial_fold_input_mask: + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + .byte 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 +.Lcrc32_refl_shuf_shift: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 + .byte 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f +.Lcrc32_shuf_shift: + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff +.Lcrc32_bswap_shuf: + .byte 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08 + .byte 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00 + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + .byte 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + + +/* + * void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + * const struct crc32_consts_s *consts); + */ +.align 3 +.globl _gcry_crc32r_armv8_ce_bulk +ELF(.type _gcry_crc32r_armv8_ce_bulk,%function;) +_gcry_crc32r_armv8_ce_bulk: + /* input: + * x0: pcrc + * x1: inbuf + * x2: inlen + * x3: consts + */ + CFI_STARTPROC() + + GET_DATA_POINTER(x7, .Lcrc32_constants) + add x9, x3, #consts_k(5 - 1) + cmp x2, #128 + + b.lo .Lcrc32r_fold_by_one_setup + + eor v4.16b, v4.16b, v4.16b + add x4, x3, #consts_k(1 - 1) + ld1 {v4.s}[0], [x0] /* load pcrc */ + ld1 {v0.16b-v3.16b}, [x1], #64 /* load 64 bytes of input */ + sub x2, x2, #64 + ld1 {v6.16b}, [x4] + eor v0.16b, v0.16b, v4.16b + + add x4, x3, #consts_k(3 - 1) + add x5, x3, #consts_my_p(0) + +.Lcrc32r_fold_by_four: + + /* Fold by 4. */ + ld1 {v16.16b-v19.16b}, [x1], #64 /* load 64 bytes of input */ + sub x2, x2, #64 + pmull v20.1q, v0.1d, v6.1d + pmull v21.1q, v1.1d, v6.1d + pmull v22.1q, v2.1d, v6.1d + pmull v23.1q, v3.1d, v6.1d + cmp x2, #64 + pmull2 v24.1q, v0.2d, v6.2d + pmull2 v25.1q, v1.2d, v6.2d + pmull2 v26.1q, v2.2d, v6.2d + pmull2 v27.1q, v3.2d, v6.2d + eor v0.16b, v20.16b, v16.16b + eor v1.16b, v21.16b, v17.16b + eor v2.16b, v22.16b, v18.16b + eor v3.16b, v23.16b, v19.16b + eor v0.16b, v0.16b, v24.16b + eor v1.16b, v1.16b, v25.16b + eor v2.16b, v2.16b, v26.16b + eor v3.16b, v3.16b, v27.16b + b.hs .Lcrc32r_fold_by_four + + ld1 {v6.16b}, [x4] + ld1 {v5.16b}, [x5] + + cmp x2, #16 + + /* Fold 4 to 1. */ + + pmull v16.1q, v0.1d, v6.1d + pmull2 v4.1q, v0.2d, v6.2d + eor v0.16b, v16.16b, v1.16b + eor v0.16b, v0.16b, v4.16b + + pmull v16.1q, v0.1d, v6.1d + pmull2 v4.1q, v0.2d, v6.2d + eor v0.16b, v16.16b, v2.16b + eor v0.16b, v0.16b, v4.16b + + pmull v16.1q, v0.1d, v6.1d + pmull2 v4.1q, v0.2d, v6.2d + eor v0.16b, v16.16b, v3.16b + eor v0.16b, v0.16b, v4.16b + + b.lo .Lcrc32r_fold_by_one_done + b .Lcrc32r_fold_by_one + +.Lcrc32r_fold_by_one_setup: + + eor v1.16b, v1.16b, v1.16b + add x4, x3, #consts_k(3 - 1) + add x5, x3, #consts_my_p(0) + sub x2, x2, #16 + ld1 {v1.s}[0], [x0] /* load pcrc */ + ld1 {v0.16b}, [x1], #16 /* load 16 bytes of input */ + cmp x2, #16 + ld1 {v6.16b}, [x4] /* load k3k4 */ + ld1 {v5.16b}, [x5] /* load my_p */ + eor v0.16b, v0.16b, v1.16b + b.lo .Lcrc32r_fold_by_one_done + +.Lcrc32r_fold_by_one: + sub x2, x2, #16 + ld1 {v2.16b}, [x1], #16 /* load 16 bytes of input */ + pmull v3.1q, v0.1d, v6.1d + pmull2 v1.1q, v0.2d, v6.2d + cmp x2, #16 + eor v0.16b, v3.16b, v2.16b + eor v0.16b, v0.16b, v1.16b + + b.hs .Lcrc32r_fold_by_one + +.Lcrc32r_fold_by_one_done: + + cmp x2, #0 + b.eq .Lcrc32r_final_fold + + /* Partial fold. */ + + add x4, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + add x5, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + 16 + add x6, x7, #.Lcrc32_partial_fold_input_mask - .Lcrc32_constants + sub x8, x2, #16 + add x4, x4, x2 + add x5, x5, x2 + add x6, x6, x2 + add x8, x1, x8 + + /* Load last input and add padding zeros. */ + ld1 {v4.16b}, [x4] + eor x2, x2, x2 + ld1 {v3.16b}, [x5] + ld1 {v2.16b}, [x6] + tbl v30.16b, {v0.16b}, v4.16b + ld1 {v4.16b}, [x8] + tbl v1.16b, {v0.16b}, v3.16b + + pmull v0.1q, v30.1d, v6.1d + and v2.16b, v2.16b, v4.16b + pmull2 v31.1q, v30.2d, v6.2d + orr v2.16b, v2.16b, v1.16b + eor v0.16b, v0.16b, v31.16b + eor v0.16b, v0.16b, v2.16b + +.Lcrc32r_final_fold: + + /* Final fold. */ + + eor v2.16b, v2.16b, v2.16b /* zero reg */ + ld1 {v7.16b}, [x9] + + /* reduce 128-bits to 96-bits */ + ext v6.16b, v6.16b, v6.16b, #8 /* swap high and low parts */ + mov v1.16b, v0.16b + pmull v0.1q, v0.1d, v6.1d + ext v6.16b, v5.16b, v5.16b, #8 /* swap high and low parts */ + ext v1.16b, v1.16b, v2.16b, #8 /* high to low, high zeroed */ + eor v3.16b, v0.16b, v1.16b + + /* reduce 96-bits to 64-bits */ + eor v1.16b, v1.16b, v1.16b + ext v0.16b, v3.16b, v2.16b, #4 /* [00][00][x2][x1] */ + mov v1.s[0], v3.s[0] /* [00][00][00][x0] */ + eor v3.16b, v3.16b, v3.16b + pmull v1.1q, v1.1d, v7.1d /* [00][00][xx][xx] */ + eor v0.16b, v0.16b, v1.16b /* top 64-bit are zero */ + + /* barrett reduction */ + mov v3.s[1], v0.s[0] /* [00][00][x1][00] */ + ext v0.16b, v2.16b, v0.16b, #12 /* [??][x1][??][00] */ + pmull v1.1q, v3.1d, v5.1d /* [00][xx][xx][00] */ + pmull v1.1q, v1.1d, v6.1d /* [00][xx][xx][00] */ + eor v0.16b, v0.16b, v1.16b + + /* store CRC */ + st1 {v0.s}[2], [x0] + + ret + CFI_ENDPROC() +ELF(.size _gcry_crc32r_armv8_ce_bulk,.-_gcry_crc32r_armv8_ce_bulk;) + +/* + * void _gcry_crc32r_armv8_ce_reduction_4 (u32 *pcrc, u32 data, u32 crc, + * const struct crc32_consts_s *consts); + */ +.align 3 +.globl _gcry_crc32r_armv8_ce_reduction_4 +ELF(.type _gcry_crc32r_armv8_ce_reduction_4,%function;) +_gcry_crc32r_armv8_ce_reduction_4: + /* input: + * w0: data + * w1: crc + * x2: crc32 constants + */ + CFI_STARTPROC() + + eor v0.16b, v0.16b, v0.16b + add x2, x2, #consts_my_p(0) + eor v1.16b, v1.16b, v1.16b + ld1 {v5.16b}, [x2] + + mov v0.s[0], w0 + pmull v0.1q, v0.1d, v5.1d /* [00][00][xx][xx] */ + mov v1.s[1], w1 + mov v0.s[2], v0.s[0] /* [00][x0][x1][x0] */ + pmull2 v0.1q, v0.2d, v5.2d /* [00][00][xx][xx] */ + eor v0.16b, v0.16b, v1.16b + + mov w0, v0.s[1] + + ret + CFI_ENDPROC() +ELF(.size _gcry_crc32r_armv8_ce_reduction_4,.-_gcry_crc32r_armv8_ce_reduction_4;) + +/* + * void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + * const struct crc32_consts_s *consts); + */ +.align 3 +.globl _gcry_crc32_armv8_ce_bulk +ELF(.type _gcry_crc32_armv8_ce_bulk,%function;) +_gcry_crc32_armv8_ce_bulk: + /* input: + * x0: pcrc + * x1: inbuf + * x2: inlen + * x3: consts + */ + CFI_STARTPROC() + + GET_DATA_POINTER(x7, .Lcrc32_constants) + add x4, x7, #.Lcrc32_bswap_shuf - .Lcrc32_constants + cmp x2, #128 + ld1 {v7.16b}, [x4] + + b.lo .Lcrc32_fold_by_one_setup + + eor v4.16b, v4.16b, v4.16b + add x4, x3, #consts_k(1 - 1) + ld1 {v4.s}[0], [x0] /* load pcrc */ + ld1 {v0.16b-v3.16b}, [x1], #64 /* load 64 bytes of input */ + sub x2, x2, #64 + ld1 {v6.16b}, [x4] + eor v0.16b, v0.16b, v4.16b + ext v4.16b, v6.16b, v6.16b, #8 + tbl v0.16b, { v0.16b }, v7.16b /* byte swap */ + tbl v1.16b, { v1.16b }, v7.16b /* byte swap */ + tbl v2.16b, { v2.16b }, v7.16b /* byte swap */ + tbl v3.16b, { v3.16b }, v7.16b /* byte swap */ + + add x4, x3, #consts_k(3 - 1) + add x5, x3, #consts_my_p(0) + +.Lcrc32_fold_by_four: + + /* Fold by 4. */ + ld1 {v16.16b-v19.16b}, [x1], #64 /* load 64 bytes of input */ + sub x2, x2, #64 + tbl v16.16b, { v16.16b }, v7.16b /* byte swap */ + tbl v17.16b, { v17.16b }, v7.16b /* byte swap */ + tbl v18.16b, { v18.16b }, v7.16b /* byte swap */ + tbl v19.16b, { v19.16b }, v7.16b /* byte swap */ + cmp x2, #64 + pmull2 v20.1q, v0.2d, v4.2d + pmull2 v21.1q, v1.2d, v4.2d + pmull2 v22.1q, v2.2d, v4.2d + pmull2 v23.1q, v3.2d, v4.2d + pmull v24.1q, v0.1d, v4.1d + pmull v25.1q, v1.1d, v4.1d + pmull v26.1q, v2.1d, v4.1d + pmull v27.1q, v3.1d, v4.1d + eor v0.16b, v20.16b, v16.16b + eor v1.16b, v21.16b, v17.16b + eor v2.16b, v22.16b, v18.16b + eor v3.16b, v23.16b, v19.16b + eor v0.16b, v0.16b, v24.16b + eor v1.16b, v1.16b, v25.16b + eor v2.16b, v2.16b, v26.16b + eor v3.16b, v3.16b, v27.16b + b.hs .Lcrc32_fold_by_four + + ld1 {v6.16b}, [x4] + ld1 {v5.16b}, [x5] + ext v6.16b, v6.16b, v6.16b, #8 + ext v5.16b, v5.16b, v5.16b, #8 + + cmp x2, #16 + + /* Fold 4 to 1. */ + + pmull2 v16.1q, v0.2d, v6.2d + pmull v4.1q, v0.1d, v6.1d + eor v0.16b, v16.16b, v1.16b + eor v0.16b, v0.16b, v4.16b + + pmull2 v16.1q, v0.2d, v6.2d + pmull v4.1q, v0.1d, v6.1d + eor v0.16b, v16.16b, v2.16b + eor v0.16b, v0.16b, v4.16b + + pmull2 v16.1q, v0.2d, v6.2d + pmull v4.1q, v0.1d, v6.1d + eor v0.16b, v16.16b, v3.16b + eor v0.16b, v0.16b, v4.16b + + b.lo .Lcrc32_fold_by_one_done + b .Lcrc32_fold_by_one + +.Lcrc32_fold_by_one_setup: + + eor v1.16b, v1.16b, v1.16b + add x4, x3, #consts_k(3 - 1) + add x5, x3, #consts_my_p(0) + ld1 {v1.s}[0], [x0] /* load pcrc */ + sub x2, x2, #16 + ld1 {v0.16b}, [x1], #16 /* load 16 bytes of input */ + ld1 {v6.16b}, [x4] /* load k3k4 */ + ld1 {v5.16b}, [x5] /* load my_p */ + eor v0.16b, v0.16b, v1.16b + cmp x2, #16 + ext v6.16b, v6.16b, v6.16b, #8 /* swap high and low parts */ + ext v5.16b, v5.16b, v5.16b, #8 /* swap high and low parts */ + tbl v0.16b, { v0.16b }, v7.16b /* byte swap */ + b.lo .Lcrc32_fold_by_one_done + +.Lcrc32_fold_by_one: + sub x2, x2, #16 + ld1 {v2.16b}, [x1], #16 /* load 16 bytes of input */ + pmull2 v3.1q, v0.2d, v6.2d + tbl v2.16b, { v2.16b }, v7.16b /* byte swap */ + pmull v1.1q, v0.1d, v6.1d + cmp x2, #16 + eor v0.16b, v3.16b, v2.16b + eor v0.16b, v0.16b, v1.16b + + b.hs .Lcrc32_fold_by_one + +.Lcrc32_fold_by_one_done: + + cmp x2, #0 + b.eq .Lcrc32_final_fold + + /* Partial fold. */ + + add x4, x7, #.Lcrc32_refl_shuf_shift - .Lcrc32_constants + 32 + add x5, x7, #.Lcrc32_shuf_shift - .Lcrc32_constants + 16 + add x6, x7, #.Lcrc32_partial_fold_input_mask - .Lcrc32_constants + sub x8, x2, #16 + sub x4, x4, x2 + add x5, x5, x2 + add x6, x6, x2 + add x8, x1, x8 + + /* Load last input and add padding zeros. */ + ld1 {v4.16b}, [x4] + eor x2, x2, x2 + ld1 {v3.16b}, [x5] + ld1 {v2.16b}, [x6] + tbl v30.16b, {v0.16b}, v4.16b + ld1 {v4.16b}, [x8] + tbl v1.16b, {v0.16b}, v3.16b + and v2.16b, v2.16b, v4.16b + + pmull2 v0.1q, v30.2d, v6.2d + orr v2.16b, v2.16b, v1.16b + pmull v1.1q, v30.1d, v6.1d + tbl v2.16b, {v2.16b}, v7.16b /* byte swap */ + eor v0.16b, v0.16b, v1.16b + eor v0.16b, v0.16b, v2.16b + +.Lcrc32_final_fold: + + /* Final fold. */ + + eor v2.16b, v2.16b, v2.16b /* zero reg */ + + /* reduce 128-bits to 96-bits */ + add x4, x3, #consts_k(4) + ext v3.16b, v6.16b, v6.16b, #8 /* swap high and low parts */ + eor v6.16b, v6.16b, v6.16b + mov v1.16b, v0.16b + pmull2 v0.1q, v0.2d, v3.2d + ld1 {v6.d}[1], [x4] /* load k4 */ + ext v1.16b, v2.16b, v1.16b, #8 /* low to high, low zeroed */ + eor v3.16b, v0.16b, v1.16b /* bottom 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + eor v0.16b, v0.16b, v0.16b + eor v1.16b, v1.16b, v1.16b + mov v0.s[1], v3.s[1] /* [00][00][x1][00] */ + mov v1.s[2], v3.s[3] /* [00][x3][00][00] */ + mov v0.s[2], v3.s[2] /* [00][x2][x1][00] */ + eor v3.16b, v3.16b, v3.16b + pmull2 v1.1q, v1.2d, v6.2d /* [00][xx][xx][00] */ + eor v0.16b, v0.16b, v1.16b /* top and bottom 32-bit are zero */ + + /* barrett reduction */ + mov v3.s[0], v0.s[1] /* [00][00][00][x1] */ + pmull2 v0.1q, v0.2d, v5.2d /* [00][xx][xx][xx] */ + ext v0.16b, v0.16b, v2.16b, #4 /* [00][00][xx][xx] */ + pmull v0.1q, v0.1d, v5.1d + eor v0.16b, v0.16b, v3.16b + + /* store CRC in input endian */ + rev32 v0.8b, v0.8b /* byte swap */ + st1 {v0.s}[0], [x0] + + ret + CFI_ENDPROC() +ELF(.size _gcry_crc32_armv8_ce_bulk,.-_gcry_crc32_armv8_ce_bulk;) + +/* + * void _gcry_crc32_armv8_ce_reduction_4 (u32 *pcrc, u32 data, u32 crc, + * const struct crc32_consts_s *consts); + */ +.align 3 +.globl _gcry_crc32_armv8_ce_reduction_4 +ELF(.type _gcry_crc32_armv8_ce_reduction_4,%function;) +_gcry_crc32_armv8_ce_reduction_4: + /* input: + * w0: data + * w1: crc + * x2: crc32 constants + */ + CFI_STARTPROC() + + eor v0.16b, v0.16b, v0.16b + add x2, x2, #consts_my_p(0) + eor v1.16b, v1.16b, v1.16b + ld1 {v5.16b}, [x2] + + mov v0.s[1], w0 + pmull v0.1q, v0.1d, v5.1d /* [00][xx][xx][00] */ + mov v1.s[0], w1 + pmull2 v0.1q, v0.2d, v5.2d /* [00][00][xx][xx] */ + eor v0.16b, v0.16b, v1.16b + + rev32 v0.8b, v0.8b /* Return in input endian */ + mov w0, v0.s[0] + + ret + CFI_ENDPROC() +ELF(.size _gcry_crc32_armv8_ce_reduction_4,.-_gcry_crc32_armv8_ce_reduction_4;) + +#endif diff --git a/comm/third_party/libgcrypt/cipher/crc-armv8-ce.c b/comm/third_party/libgcrypt/cipher/crc-armv8-ce.c new file mode 100644 index 0000000000..17e5554821 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/crc-armv8-ce.c @@ -0,0 +1,229 @@ +/* crc-armv8-ce.c - ARMv8-CE PMULL accelerated CRC implementation + * Copyright (C) 2019 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + */ + +#include +#include +#include +#include + +#include "g10lib.h" + +#include "bithelp.h" +#include "bufhelp.h" + + +#if defined(ENABLE_ARM_CRYPTO_SUPPORT) && defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) + + +#define ALIGNED_16 __attribute__ ((aligned (16))) + + +struct u16_unaligned_s +{ + u16 a; +} __attribute__((packed, aligned (1), may_alias)); + +struct u32_unaligned_s +{ + u32 a; +} __attribute__((packed, aligned (1), may_alias)); + + +/* Constants structure for generic reflected/non-reflected CRC32 PMULL + * functions. */ +struct crc32_consts_s +{ + /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */ + u64 k[6]; + /* my_p: { floor(x^64 / P(x)), P(x) } */ + u64 my_p[2]; +}; + +/* PMULL constants for CRC32 and CRC32RFC1510. */ +static const struct crc32_consts_s crc32_consts ALIGNED_16 = +{ + { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */ + U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */ + U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */ + U64_C(0x163cd6124), 0 /* y = 2 */ + }, + { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */ + U64_C(0x1f7011641), U64_C(0x1db710641) + } +}; + +/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */ +static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 = +{ + { /* k[6] = x^(32*y) mod P(x) << 32*/ + U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */ + U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */ + U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */ + }, + { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */ + U64_C(0x1f845fe24), U64_C(0x1864cfb00) + } +}; + + +u32 _gcry_crc32r_armv8_ce_reduction_4 (u32 data, u32 crc, + const struct crc32_consts_s *consts); +void _gcry_crc32r_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts); + +u32 _gcry_crc32_armv8_ce_reduction_4 (u32 data, u32 crc, + const struct crc32_consts_s *consts); +void _gcry_crc32_armv8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts); + + +static inline void +crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + u32 crc = *pcrc; + u32 data; + + while (inlen >= 4) + { + data = ((const struct u32_unaligned_s *)inbuf)->a; + data ^= crc; + + inlen -= 4; + inbuf += 4; + + crc = _gcry_crc32r_armv8_ce_reduction_4 (data, 0, consts); + } + + switch (inlen) + { + case 0: + break; + case 1: + data = inbuf[0]; + data ^= crc; + data <<= 24; + crc >>= 8; + crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts); + break; + case 2: + data = ((const struct u16_unaligned_s *)inbuf)->a; + data ^= crc; + data <<= 16; + crc >>= 16; + crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts); + break; + case 3: + data = ((const struct u16_unaligned_s *)inbuf)->a; + data |= inbuf[2] << 16; + data ^= crc; + data <<= 8; + crc >>= 24; + crc = _gcry_crc32r_armv8_ce_reduction_4 (data, crc, consts); + break; + } + + *pcrc = crc; +} + +static inline void +crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + u32 crc = *pcrc; + u32 data; + + while (inlen >= 4) + { + data = ((const struct u32_unaligned_s *)inbuf)->a; + data ^= crc; + data = _gcry_bswap32(data); + + inlen -= 4; + inbuf += 4; + + crc = _gcry_crc32_armv8_ce_reduction_4 (data, 0, consts); + } + + switch (inlen) + { + case 0: + break; + case 1: + data = inbuf[0]; + data ^= crc; + data = data & 0xffU; + crc = _gcry_bswap32(crc >> 8); + crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts); + break; + case 2: + data = ((const struct u16_unaligned_s *)inbuf)->a; + data ^= crc; + data = _gcry_bswap32(data << 16); + crc = _gcry_bswap32(crc >> 16); + crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts); + break; + case 3: + data = ((const struct u16_unaligned_s *)inbuf)->a; + data |= inbuf[2] << 16; + data ^= crc; + data = _gcry_bswap32(data << 8); + crc = crc & 0xff000000U; + crc = _gcry_crc32_armv8_ce_reduction_4 (data, crc, consts); + break; + } + + *pcrc = crc; +} + +void +_gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc32_consts; + + if (!inlen) + return; + + if (inlen >= 16) + _gcry_crc32r_armv8_ce_bulk (pcrc, inbuf, inlen, consts); + else + crc32r_less_than_16 (pcrc, inbuf, inlen, consts); +} + +void +_gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc24rfc2440_consts; + + if (!inlen) + return; + + /* Note: *pcrc in input endian. */ + + if (inlen >= 16) + _gcry_crc32_armv8_ce_bulk (pcrc, inbuf, inlen, consts); + else + crc32_less_than_16 (pcrc, inbuf, inlen, consts); +} + +#endif diff --git a/comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c b/comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c new file mode 100644 index 0000000000..8c8b1915ab --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/crc-intel-pclmul.c @@ -0,0 +1,939 @@ +/* crc-intel-pclmul.c - Intel PCLMUL accelerated CRC implementation + * Copyright (C) 2016 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + */ + +#include +#include +#include +#include + +#include "g10lib.h" + +#include "bithelp.h" +#include "bufhelp.h" + + +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) && \ + __GNUC__ >= 4 && \ + ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) + + +#if _GCRY_GCC_VERSION >= 40400 /* 4.4 */ +/* Prevent compiler from issuing SSE instructions between asm blocks. */ +# pragma GCC target("no-sse") +#endif +#if __clang__ +# pragma clang attribute push (__attribute__((target("no-sse"))), apply_to = function) +#endif + + +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE + + +#define ALIGNED_16 __attribute__ ((aligned (16))) + + +struct u16_unaligned_s +{ + u16 a; +} __attribute__((packed, aligned (1), may_alias)); + + +/* Constants structure for generic reflected/non-reflected CRC32 CLMUL + * functions. */ +struct crc32_consts_s +{ + /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */ + u64 k[6]; + /* my_p: { floor(x^64 / P(x)), P(x) } */ + u64 my_p[2]; +}; + + +/* CLMUL constants for CRC32 and CRC32RFC1510. */ +static const struct crc32_consts_s crc32_consts ALIGNED_16 = +{ + { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */ + U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */ + U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */ + U64_C(0x163cd6124), 0 /* y = 2 */ + }, + { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */ + U64_C(0x1f7011641), U64_C(0x1db710641) + } +}; + +/* CLMUL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */ +static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_16 = +{ + { /* k[6] = x^(32*y) mod P(x) << 32*/ + U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */ + U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */ + U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */ + }, + { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */ + U64_C(0x1f845fe24), U64_C(0x1864cfb00) + } +}; + +/* Common constants for CRC32 algorithms. */ +static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_16 = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const byte crc32_shuf_shift[3 * 16] ALIGNED_16 = + { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const byte *crc32_bswap_shuf = &crc32_shuf_shift[16]; +static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_16 = + { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const u64 crc32_merge9to15_shuf[15 - 9 + 1][2] ALIGNED_16 = + { + { U64_C(0x0706050403020100), U64_C(0xffffffffffffff0f) }, /* 9 */ + { U64_C(0x0706050403020100), U64_C(0xffffffffffff0f0e) }, + { U64_C(0x0706050403020100), U64_C(0xffffffffff0f0e0d) }, + { U64_C(0x0706050403020100), U64_C(0xffffffff0f0e0d0c) }, + { U64_C(0x0706050403020100), U64_C(0xffffff0f0e0d0c0b) }, + { U64_C(0x0706050403020100), U64_C(0xffff0f0e0d0c0b0a) }, + { U64_C(0x0706050403020100), U64_C(0xff0f0e0d0c0b0a09) }, /* 15 */ + }; +static const u64 crc32_merge5to7_shuf[7 - 5 + 1][2] ALIGNED_16 = + { + { U64_C(0xffffff0703020100), U64_C(0xffffffffffffffff) }, /* 5 */ + { U64_C(0xffff070603020100), U64_C(0xffffffffffffffff) }, + { U64_C(0xff07060503020100), U64_C(0xffffffffffffffff) }, /* 7 */ + }; + +/* PCLMUL functions for reflected CRC32. */ +static ASM_FUNC_ATTR_INLINE void +crc32_reflected_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + if (inlen >= 8 * 16) + { + asm volatile ("movd %[crc], %%xmm4\n\t" + "movdqu %[inbuf_0], %%xmm0\n\t" + "movdqu %[inbuf_1], %%xmm1\n\t" + "movdqu %[inbuf_2], %%xmm2\n\t" + "movdqu %[inbuf_3], %%xmm3\n\t" + "pxor %%xmm4, %%xmm0\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]), + [crc] "m" (*pcrc) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + + asm volatile ("movdqa %[k1k2], %%xmm4\n\t" + : + : [k1k2] "m" (consts->k[1 - 1]) + ); + + /* Fold by 4. */ + while (inlen >= 4 * 16) + { + asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" + "movdqa %%xmm0, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + + "movdqu %[inbuf_1], %%xmm5\n\t" + "movdqa %%xmm1, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm1\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "pxor %%xmm6, %%xmm1\n\t" + + "movdqu %[inbuf_2], %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm2\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm6, %%xmm2\n\t" + + "movdqu %[inbuf_3], %%xmm5\n\t" + "movdqa %%xmm3, %%xmm6\n\t" + "pclmulqdq $0x00, %%xmm4, %%xmm3\n\t" + "pclmulqdq $0x11, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm6, %%xmm3\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + } + + asm volatile ("movdqa %[k3k4], %%xmm6\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + /* Fold 4 to 1. */ + + asm volatile ("movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm4\n\t" + "pxor %%xmm3, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + : + : + ); + } + else + { + asm volatile ("movd %[crc], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [inbuf] "m" (*inbuf), + [crc] "m" (*pcrc), + [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + inbuf += 16; + inlen -= 16; + } + + /* Fold by 1. */ + if (inlen >= 16) + { + while (inlen >= 16) + { + /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ + asm volatile ("movdqu %[inbuf], %%xmm2\n\t" + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + + inbuf += 16; + inlen -= 16; + } + } + + /* Partial fold. */ + if (inlen) + { + /* Load last input and add padding zeros. */ + asm volatile ("movdqu %[shr_shuf], %%xmm3\n\t" + "movdqu %[shl_shuf], %%xmm4\n\t" + "movdqu %[mask], %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pshufb %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf], %%xmm4\n\t" + "pshufb %%xmm3, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "por %%xmm1, %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x00, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*(inbuf - 16 + inlen)), + [mask] "m" (crc32_partial_fold_input_mask[inlen]), + [shl_shuf] "m" (crc32_refl_shuf_shift[inlen]), + [shr_shuf] "m" (crc32_refl_shuf_shift[inlen + 16]) + ); + + inbuf += inlen; + inlen -= inlen; + } + + /* Final fold. */ + asm volatile (/* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" + "psrldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + /* reduce 96-bits to 64-bits */ + "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ + "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ + "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ + + /* barrett reduction */ + "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ + "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ + "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC */ + "pextrd $2, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + ); +} + +static ASM_FUNC_ATTR_INLINE void +crc32_reflected_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + if (inlen < 4) + { + u32 crc = *pcrc; + u32 data; + + asm volatile ("movdqa %[my_p], %%xmm5\n\t" + : + : [my_p] "m" (consts->my_p[0]) + ); + + if (inlen == 1) + { + data = inbuf[0]; + data ^= crc; + data <<= 24; + crc >>= 8; + } + else if (inlen == 2) + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data ^= crc; + data <<= 16; + crc >>= 16; + } + else + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data |= inbuf[2] << 16; + data ^= crc; + data <<= 8; + crc >>= 24; + } + + /* Barrett reduction */ + asm volatile ("movd %[in], %%xmm0\n\t" + "movd %[crc], %%xmm1\n\t" + + "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "psllq $32, %%xmm1\n\t" + "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" + + "pextrd $1, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [in] "rm" (data), + [crc] "rm" (crc) + ); + } + else if (inlen == 4) + { + /* Barrett reduction */ + asm volatile ("movd %[crc], %%xmm1\n\t" + "movd %[in], %%xmm0\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pshufd $0xfc, %%xmm0, %%xmm0\n\t" /* [00][00][00][x] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + + "pextrd $1, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [in] "m" (*inbuf), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]) + ); + } + else + { + asm volatile ("movdqu %[shuf], %%xmm4\n\t" + "movd %[crc], %%xmm1\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + : + : [shuf] "m" (crc32_refl_shuf_shift[inlen]), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]), + [k3k4] "m" (consts->k[3 - 1]) + ); + + if (inlen >= 8) + { + asm volatile ("movq %[inbuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + if (inlen > 8) + { + asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ + "movq %[inbuf_tail], %%xmm2\n\t" + "punpcklqdq %%xmm2, %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf_tail] "m" (inbuf[inlen - 8]), + [merge_shuf] "m" + (*crc32_merge9to15_shuf[inlen - 9]) + ); + } + } + else + { + asm volatile ("movd %[inbuf], %%xmm0\n\t" + "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf), + [inbuf_tail] "m" (inbuf[inlen - 4]), + [merge_shuf] "m" + (*crc32_merge5to7_shuf[inlen - 5]) + ); + } + + /* Final fold. */ + asm volatile ("pxor %%xmm1, %%xmm0\n\t" + "pshufb %%xmm4, %%xmm0\n\t" + + /* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm0\n\t" + "psrldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* top 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + "pshufd $0xfc, %%xmm0, %%xmm1\n\t" /* [00][00][00][x] */ + "pshufd $0xf9, %%xmm0, %%xmm0\n\t" /* [00][00][x>>64][x>>32] */ + "pclmulqdq $0x00, %[k5], %%xmm1\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" /* top 64-bit are zero */ + + /* barrett reduction */ + "pshufd $0xf3, %%xmm0, %%xmm1\n\t" /* [00][00][x>>32][00] */ + "pslldq $4, %%xmm0\n\t" /* [??][x>>32][??][??] */ + "pclmulqdq $0x00, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x10, %%xmm5, %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC */ + "pextrd $2, %%xmm0, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + ); + } +} + +/* PCLMUL functions for non-reflected CRC32. */ +static ASM_FUNC_ATTR_INLINE void +crc32_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + asm volatile ("movdqa %[bswap], %%xmm7\n\t" + : + : [bswap] "m" (*crc32_bswap_shuf) + ); + + if (inlen >= 8 * 16) + { + asm volatile ("movd %[crc], %%xmm4\n\t" + "movdqu %[inbuf_0], %%xmm0\n\t" + "movdqu %[inbuf_1], %%xmm1\n\t" + "movdqu %[inbuf_2], %%xmm2\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf_3], %%xmm3\n\t" + "pshufb %%xmm7, %%xmm0\n\t" + "pshufb %%xmm7, %%xmm1\n\t" + "pshufb %%xmm7, %%xmm2\n\t" + "pshufb %%xmm7, %%xmm3\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]), + [crc] "m" (*pcrc) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + + asm volatile ("movdqa %[k1k2], %%xmm4\n\t" + : + : [k1k2] "m" (consts->k[1 - 1]) + ); + + /* Fold by 4. */ + while (inlen >= 4 * 16) + { + asm volatile ("movdqu %[inbuf_0], %%xmm5\n\t" + "movdqa %%xmm0, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm0\n\t" + "pxor %%xmm6, %%xmm0\n\t" + + "movdqu %[inbuf_1], %%xmm5\n\t" + "movdqa %%xmm1, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm1\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "pxor %%xmm6, %%xmm1\n\t" + + "movdqu %[inbuf_2], %%xmm5\n\t" + "movdqa %%xmm2, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm2\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm2\n\t" + "pxor %%xmm6, %%xmm2\n\t" + + "movdqu %[inbuf_3], %%xmm5\n\t" + "movdqa %%xmm3, %%xmm6\n\t" + "pshufb %%xmm7, %%xmm5\n\t" + "pclmulqdq $0x01, %%xmm4, %%xmm3\n\t" + "pclmulqdq $0x10, %%xmm4, %%xmm6\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "pxor %%xmm6, %%xmm3\n\t" + : + : [inbuf_0] "m" (inbuf[0 * 16]), + [inbuf_1] "m" (inbuf[1 * 16]), + [inbuf_2] "m" (inbuf[2 * 16]), + [inbuf_3] "m" (inbuf[3 * 16]) + ); + + inbuf += 4 * 16; + inlen -= 4 * 16; + } + + asm volatile ("movdqa %[k3k4], %%xmm6\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + /* Fold 4 to 1. */ + + asm volatile ("movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + + "movdqa %%xmm0, %%xmm4\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm4\n\t" + "pxor %%xmm3, %%xmm0\n\t" + "pxor %%xmm4, %%xmm0\n\t" + : + : + ); + } + else + { + asm volatile ("movd %[crc], %%xmm1\n\t" + "movdqu %[inbuf], %%xmm0\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + "pxor %%xmm1, %%xmm0\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "pshufb %%xmm7, %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf), + [crc] "m" (*pcrc), + [k3k4] "m" (consts->k[3 - 1]), + [my_p] "m" (consts->my_p[0]) + ); + + inbuf += 16; + inlen -= 16; + } + + /* Fold by 1. */ + if (inlen >= 16) + { + while (inlen >= 16) + { + /* Load next block to XMM2. Fold XMM0 to XMM0:XMM1. */ + asm volatile ("movdqu %[inbuf], %%xmm2\n\t" + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pshufb %%xmm7, %%xmm2\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + + inbuf += 16; + inlen -= 16; + } + } + + /* Partial fold. */ + if (inlen) + { + /* Load last input and add padding zeros. */ + asm volatile ("movdqu %[shl_shuf], %%xmm4\n\t" + "movdqu %[shr_shuf], %%xmm3\n\t" + "movdqu %[mask], %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pshufb %%xmm4, %%xmm0\n\t" + "movdqu %[inbuf], %%xmm4\n\t" + "pshufb %%xmm3, %%xmm1\n\t" + "pand %%xmm4, %%xmm2\n\t" + "por %%xmm1, %%xmm2\n\t" + + "pshufb %%xmm7, %%xmm2\n\t" + + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x01, %%xmm6, %%xmm0\n\t" + "pclmulqdq $0x10, %%xmm6, %%xmm1\n\t" + "pxor %%xmm2, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + : + : [inbuf] "m" (*(inbuf - 16 + inlen)), + [mask] "m" (crc32_partial_fold_input_mask[inlen]), + [shl_shuf] "m" (crc32_refl_shuf_shift[32 - inlen]), + [shr_shuf] "m" (crc32_shuf_shift[inlen + 16]) + ); + + inbuf += inlen; + inlen -= inlen; + } + + /* Final fold. */ + asm volatile (/* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t" + "pslldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */ + "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */ + + /* barrett reduction */ + "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */ + "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */ + "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + : "eax" ); +} + +static ASM_FUNC_ATTR_INLINE void +crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + if (inlen < 4) + { + u32 crc = *pcrc; + u32 data; + + asm volatile ("movdqa %[my_p], %%xmm5\n\t" + : + : [my_p] "m" (consts->my_p[0]) + ); + + if (inlen == 1) + { + data = inbuf[0]; + data ^= crc; + data = _gcry_bswap32(data << 24); + crc = _gcry_bswap32(crc >> 8); + } + else if (inlen == 2) + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data ^= crc; + data = _gcry_bswap32(data << 16); + crc = _gcry_bswap32(crc >> 16); + } + else + { + data = ((const struct u16_unaligned_s *)inbuf)->a; + data |= inbuf[2] << 16; + data ^= crc; + data = _gcry_bswap32(data << 8); + crc = _gcry_bswap32(crc >> 24); + } + + /* Barrett reduction */ + asm volatile ("movd %[in], %%xmm0\n\t" + "psllq $32, %%xmm0\n\t" /* [00][00][xx][00] */ + "movd %[crc], %%xmm1\n\t" + + "pclmulqdq $0x00, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : [in] "r" (data), + [crc] "r" (crc) + : "eax" ); + } + else if (inlen == 4) + { + /* Barrett reduction */ + asm volatile ("movd %[crc], %%xmm0\n\t" + "movd %[in], %%xmm1\n\t" + "movdqa %[my_p], %%xmm5\n\t" + : + : [in] "m" (*inbuf), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]) + : "cc" ); + + asm volatile ("pxor %%xmm1, %%xmm0\n\t" + "pshufb %[bswap], %%xmm0\n\t" /* [xx][00][00][00] */ + + "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x11, %%xmm5, %%xmm0\n\t" /* [00][00][xx][xx] */ + : + : [bswap] "m" (*crc32_bswap_shuf) + : "cc" ); + + asm volatile (/* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : + : "eax", "cc" ); + } + else + { + asm volatile ("movdqu %[shuf], %%xmm7\n\t" + "movd %[crc], %%xmm1\n\t" + "movdqa %[my_p], %%xmm5\n\t" + "movdqa %[k3k4], %%xmm6\n\t" + : + : [shuf] "m" (crc32_shuf_shift[32 - inlen]), + [crc] "m" (*pcrc), + [my_p] "m" (consts->my_p[0]), + [k3k4] "m" (consts->k[3 - 1]) + ); + + if (inlen >= 8) + { + asm volatile ("movq %[inbuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf) + ); + if (inlen > 8) + { + asm volatile (/*"pinsrq $1, %[inbuf_tail], %%xmm0\n\t"*/ + "movq %[inbuf_tail], %%xmm2\n\t" + "punpcklqdq %%xmm2, %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf_tail] "m" (inbuf[inlen - 8]), + [merge_shuf] "m" + (*crc32_merge9to15_shuf[inlen - 9]) + ); + } + } + else + { + asm volatile ("movd %[inbuf], %%xmm0\n\t" + "pinsrd $1, %[inbuf_tail], %%xmm0\n\t" + "pshufb %[merge_shuf], %%xmm0\n\t" + : + : [inbuf] "m" (*inbuf), + [inbuf_tail] "m" (inbuf[inlen - 4]), + [merge_shuf] "m" + (*crc32_merge5to7_shuf[inlen - 5]) + ); + } + + /* Final fold. */ + asm volatile ("pxor %%xmm1, %%xmm0\n\t" + "pshufb %%xmm7, %%xmm0\n\t" + + /* reduce 128-bits to 96-bits */ + "movdqa %%xmm0, %%xmm1\n\t" + "pclmulqdq $0x11, %%xmm6, %%xmm0\n\t" + "pslldq $8, %%xmm1\n\t" + "pxor %%xmm1, %%xmm0\n\t" /* bottom 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + "pshufd $0x30, %%xmm0, %%xmm1\n\t" /* [00][x>>96][00][00] */ + "pshufd $0x24, %%xmm0, %%xmm0\n\t" /* [00][xx][xx][00] */ + "pclmulqdq $0x01, %[k5], %%xmm1\n\t" /* [00][xx][xx][00] */ + "pxor %%xmm1, %%xmm0\n\t" /* top and bottom 32-bit are zero */ + + /* barrett reduction */ + "pshufd $0x01, %%xmm0, %%xmm1\n\t" /* [00][00][00][x>>32] */ + "pclmulqdq $0x01, %%xmm5, %%xmm0\n\t" /* [00][xx][xx][xx] */ + "psrldq $4, %%xmm0\n\t" /* [00][00][xx][xx] */ + "pclmulqdq $0x10, %%xmm5, %%xmm0\n\t" + "pxor %%xmm1, %%xmm0\n\t" + + /* store CRC in input endian */ + "movd %%xmm0, %%eax\n\t" + "bswapl %%eax\n\t" + "movl %%eax, %[out]\n\t" + : [out] "=m" (*pcrc) + : [k5] "m" (consts->k[5 - 1]) + : "eax" ); + } +} + +void ASM_FUNC_ATTR +_gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc32_consts; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[2 * 16]; + + /* XMM6-XMM7 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif + + if (!inlen) + return; + + if (inlen >= 16) + crc32_reflected_bulk(pcrc, inbuf, inlen, consts); + else + crc32_reflected_less_than_16(pcrc, inbuf, inlen, consts); + +#if defined(__x86_64__) && defined(__WIN64__) + /* Restore used registers. */ + asm volatile("movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif +} + +void ASM_FUNC_ATTR +_gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc24rfc2440_consts; +#if defined(__x86_64__) && defined(__WIN64__) + char win64tmp[2 * 16]; + + /* XMM6-XMM7 need to be restored after use. */ + asm volatile ("movdqu %%xmm6, 0*16(%0)\n\t" + "movdqu %%xmm7, 1*16(%0)\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif + + if (!inlen) + return; + + /* Note: *pcrc in input endian. */ + + if (inlen >= 16) + crc32_bulk(pcrc, inbuf, inlen, consts); + else + crc32_less_than_16(pcrc, inbuf, inlen, consts); + +#if defined(__x86_64__) && defined(__WIN64__) + /* Restore used registers. */ + asm volatile("movdqu 0*16(%0), %%xmm6\n\t" + "movdqu 1*16(%0), %%xmm7\n\t" + : + : "r" (win64tmp) + : "memory"); +#endif +} + +#if __clang__ +# pragma clang attribute pop +#endif + +#endif /* USE_INTEL_PCLMUL */ diff --git a/comm/third_party/libgcrypt/cipher/crc-ppc.c b/comm/third_party/libgcrypt/cipher/crc-ppc.c new file mode 100644 index 0000000000..b9a40130ce --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/crc-ppc.c @@ -0,0 +1,656 @@ +/* crc-ppc.c - POWER8 vpmsum accelerated CRC implementation + * Copyright (C) 2019-2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + */ + +#include +#include +#include +#include + +#include "g10lib.h" + +#include "bithelp.h" +#include "bufhelp.h" + + +#if defined(ENABLE_PPC_CRYPTO_SUPPORT) && \ + defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) && \ + __GNUC__ >= 4 + +#include +#include "bufhelp.h" + + +#define ALWAYS_INLINE inline __attribute__((always_inline)) +#define NO_INLINE __attribute__((noinline)) +#define NO_INSTRUMENT_FUNCTION __attribute__((no_instrument_function)) + +#define ASM_FUNC_ATTR NO_INSTRUMENT_FUNCTION +#define ASM_FUNC_ATTR_INLINE ASM_FUNC_ATTR ALWAYS_INLINE +#define ASM_FUNC_ATTR_NOINLINE ASM_FUNC_ATTR NO_INLINE + +#define ALIGNED_64 __attribute__ ((aligned (64))) + + +typedef vector unsigned char vector16x_u8; +typedef vector unsigned int vector4x_u32; +typedef vector unsigned long long vector2x_u64; + + +/* Constants structure for generic reflected/non-reflected CRC32 PMULL + * functions. */ +struct crc32_consts_s +{ + /* k: { x^(32*17), x^(32*15), x^(32*5), x^(32*3), x^(32*2), 0 } mod P(x) */ + unsigned long long k[6]; + /* my_p: { floor(x^64 / P(x)), P(x) } */ + unsigned long long my_p[2]; +}; + +/* PMULL constants for CRC32 and CRC32RFC1510. */ +static const struct crc32_consts_s crc32_consts ALIGNED_64 = +{ + { /* k[6] = reverse_33bits( x^(32*y) mod P(x) ) */ + U64_C(0x154442bd4), U64_C(0x1c6e41596), /* y = { 17, 15 } */ + U64_C(0x1751997d0), U64_C(0x0ccaa009e), /* y = { 5, 3 } */ + U64_C(0x163cd6124), 0 /* y = 2 */ + }, + { /* my_p[2] = reverse_33bits ( { floor(x^64 / P(x)), P(x) } ) */ + U64_C(0x1f7011641), U64_C(0x1db710641) + } +}; + +/* PMULL constants for CRC24RFC2440 (polynomial multiplied with x⁸). */ +static const struct crc32_consts_s crc24rfc2440_consts ALIGNED_64 = +{ + { /* k[6] = x^(32*y) mod P(x) << 32*/ + U64_C(0x08289a00) << 32, U64_C(0x74b44a00) << 32, /* y = { 17, 15 } */ + U64_C(0xc4b14d00) << 32, U64_C(0xfd7e0c00) << 32, /* y = { 5, 3 } */ + U64_C(0xd9fe8c00) << 32, 0 /* y = 2 */ + }, + { /* my_p[2] = { floor(x^64 / P(x)), P(x) } */ + U64_C(0x1f845fe24), U64_C(0x1864cfb00) + } +}; + + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vpmsumd(vector2x_u64 a, vector2x_u64 b) +{ + __asm__("vpmsumd %0, %1, %2" + : "=v" (a) + : "v" (a), "v" (b)); + return a; +} + + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_swap_u64(vector2x_u64 a) +{ + __asm__("xxswapd %x0, %x1" + : "=wa" (a) + : "wa" (a)); + return a; +} + + +static ASM_FUNC_ATTR_INLINE vector4x_u32 +vec_sld_u32(vector4x_u32 a, vector4x_u32 b, unsigned int idx) +{ + return vec_sld (a, b, (4 * idx) & 15); +} + + +static const byte crc32_partial_fold_input_mask[16 + 16] ALIGNED_64 = + { + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + }; +static const byte crc32_shuf_shift[3 * 16] ALIGNED_64 = + { + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x0f, 0x0e, 0x0d, 0x0c, 0x0b, 0x0a, 0x09, 0x08, + 0x07, 0x06, 0x05, 0x04, 0x03, 0x02, 0x01, 0x00, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + }; +static const byte crc32_refl_shuf_shift[3 * 16] ALIGNED_64 = + { + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, 0x1f, + }; +static const vector16x_u8 bswap_const ALIGNED_64 = + { 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 }; + + +#define CRC_VEC_SWAP(v) ({ vector2x_u64 __vecu64 = (v); \ + vec_perm(__vecu64, __vecu64, bswap_const); }) + +#ifdef WORDS_BIGENDIAN +# define CRC_VEC_U64_DEF(lo, hi) { (hi), (lo) } +# define CRC_VEC_U64_LOAD(offs, ptr) \ + asm_swap_u64(asm_vec_u64_load(offs, ptr)) +# define CRC_VEC_U64_LOAD_LE(offs, ptr) \ + CRC_VEC_SWAP(asm_vec_u64_load(offs, ptr)) +# define CRC_VEC_U64_LOAD_BE(offs, ptr) \ + asm_vec_u64_load(offs, ptr) +# define CRC_VEC_SWAP_TO_LE(v) CRC_VEC_SWAP(v) +# define CRC_VEC_SWAP_TO_BE(v) (v) +# define VEC_U64_LO 1 +# define VEC_U64_HI 0 + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load(unsigned long offset, const void *ptr) +{ + vector2x_u64 vecu64; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvd2x %x0,0,%1\n\t" + : "=wa" (vecu64) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvd2x %x0,%1,%2\n\t" + : "=wa" (vecu64) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return vecu64; +} +#else +# define CRC_VEC_U64_DEF(lo, hi) { (lo), (hi) } +# define CRC_VEC_U64_LOAD(offs, ptr) asm_vec_u64_load_le(offs, ptr) +# define CRC_VEC_U64_LOAD_LE(offs, ptr) asm_vec_u64_load_le(offs, ptr) +# define CRC_VEC_U64_LOAD_BE(offs, ptr) asm_vec_u64_load_be(offs, ptr) +# define CRC_VEC_SWAP_TO_LE(v) (v) +# define CRC_VEC_SWAP_TO_BE(v) CRC_VEC_SWAP(v) +# define VEC_U64_LO 0 +# define VEC_U64_HI 1 + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load_le(unsigned long offset, const void *ptr) +{ + vector2x_u64 vecu64; +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ volatile ("lxvd2x %x0,0,%1\n\t" + : "=wa" (vecu64) + : "r" ((uintptr_t)ptr) + : "memory"); + else +#endif + __asm__ volatile ("lxvd2x %x0,%1,%2\n\t" + : "=wa" (vecu64) + : "r" (offset), "r" ((uintptr_t)ptr) + : "memory", "r0"); + return asm_swap_u64(vecu64); +} + +static ASM_FUNC_ATTR_INLINE vector2x_u64 +asm_vec_u64_load_be(unsigned int offset, const void *ptr) +{ + static const vector16x_u8 vec_load_le_const = + { ~7, ~6, ~5, ~4, ~3, ~2, ~1, ~0, ~15, ~14, ~13, ~12, ~11, ~10, ~9, ~8 }; + vector2x_u64 vecu64; + +#if __GNUC__ >= 4 + if (__builtin_constant_p (offset) && offset == 0) + __asm__ ("lxvd2x %%vs32,0,%1\n\t" + "vperm %0,%%v0,%%v0,%2\n\t" + : "=v" (vecu64) + : "r" ((uintptr_t)(ptr)), "v" (vec_load_le_const) + : "memory", "v0"); +#endif + else + __asm__ ("lxvd2x %%vs32,%1,%2\n\t" + "vperm %0,%%v0,%%v0,%3\n\t" + : "=v" (vecu64) + : "r" (offset), "r" ((uintptr_t)(ptr)), + "v" (vec_load_le_const) + : "memory", "r0", "v0"); + + return vecu64; +} +#endif + + +static ASM_FUNC_ATTR_INLINE void +crc32r_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + vector4x_u32 zero = { 0, 0, 0, 0 }; + vector2x_u64 low_64bit_mask = CRC_VEC_U64_DEF((u64)-1, 0); + vector2x_u64 low_32bit_mask = CRC_VEC_U64_DEF((u32)-1, 0); + vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]); + vector2x_u64 k1k2 = CRC_VEC_U64_LOAD(0, &consts->k[1 - 1]); + vector2x_u64 k3k4 = CRC_VEC_U64_LOAD(0, &consts->k[3 - 1]); + vector2x_u64 k4lo = CRC_VEC_U64_DEF(k3k4[VEC_U64_HI], 0); + vector2x_u64 k5lo = CRC_VEC_U64_LOAD(0, &consts->k[5 - 1]); + vector2x_u64 crc = CRC_VEC_U64_DEF(*pcrc, 0); + vector2x_u64 crc0, crc1, crc2, crc3; + vector2x_u64 v0; + + if (inlen >= 8 * 16) + { + crc0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf); + crc0 ^= crc; + crc1 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf); + crc2 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf); + crc3 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf); + + inbuf += 4 * 16; + inlen -= 4 * 16; + + /* Fold by 4. */ + while (inlen >= 4 * 16) + { + v0 = CRC_VEC_U64_LOAD_LE(0 * 16, inbuf); + crc0 = asm_vpmsumd(crc0, k1k2) ^ v0; + + v0 = CRC_VEC_U64_LOAD_LE(1 * 16, inbuf); + crc1 = asm_vpmsumd(crc1, k1k2) ^ v0; + + v0 = CRC_VEC_U64_LOAD_LE(2 * 16, inbuf); + crc2 = asm_vpmsumd(crc2, k1k2) ^ v0; + + v0 = CRC_VEC_U64_LOAD_LE(3 * 16, inbuf); + crc3 = asm_vpmsumd(crc3, k1k2) ^ v0; + + inbuf += 4 * 16; + inlen -= 4 * 16; + } + + /* Fold 4 to 1. */ + crc1 ^= asm_vpmsumd(crc0, k3k4); + crc2 ^= asm_vpmsumd(crc1, k3k4); + crc3 ^= asm_vpmsumd(crc2, k3k4); + crc = crc3; + } + else + { + v0 = CRC_VEC_U64_LOAD_LE(0, inbuf); + crc ^= v0; + + inbuf += 16; + inlen -= 16; + } + + /* Fold by 1. */ + while (inlen >= 16) + { + v0 = CRC_VEC_U64_LOAD_LE(0, inbuf); + crc = asm_vpmsumd(k3k4, crc); + crc ^= v0; + + inbuf += 16; + inlen -= 16; + } + + /* Partial fold. */ + if (inlen) + { + /* Load last input and add padding zeros. */ + vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask); + vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(inlen, crc32_refl_shuf_shift); + vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_refl_shuf_shift); + + v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf); + v0 &= mask; + + crc = CRC_VEC_SWAP_TO_LE(crc); + v0 |= (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero, + (vector16x_u8)shr_shuf); + crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero, + (vector16x_u8)shl_shuf); + crc = asm_vpmsumd(k3k4, crc); + crc ^= v0; + + inbuf += inlen; + inlen -= inlen; + } + + /* Final fold. */ + + /* reduce 128-bits to 96-bits */ + v0 = asm_swap_u64(crc); + v0 &= low_64bit_mask; + crc = asm_vpmsumd(k4lo, crc); + crc ^= v0; + + /* reduce 96-bits to 64-bits */ + v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc, + (vector4x_u32)crc, 3); /* [x0][x3][x2][x1] */ + v0 &= low_64bit_mask; /* [00][00][x2][x1] */ + crc = crc & low_32bit_mask; /* [00][00][00][x0] */ + crc = v0 ^ asm_vpmsumd(k5lo, crc); /* [00][00][xx][xx] */ + + /* barrett reduction */ + v0 = crc << 32; /* [00][00][x0][00] */ + v0 = asm_vpmsumd(my_p, v0); + v0 = asm_swap_u64(v0); + v0 = asm_vpmsumd(my_p, v0); + crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc, + zero, 1); /* [00][x1][x0][00] */ + crc ^= v0; + + *pcrc = (u32)crc[VEC_U64_HI]; +} + + +static ASM_FUNC_ATTR_INLINE u32 +crc32r_ppc8_ce_reduction_4 (u32 data, u32 crc, + const struct crc32_consts_s *consts) +{ + vector4x_u32 zero = { 0, 0, 0, 0 }; + vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]); + vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data, 0); + v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */ + v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0, + zero, 3); /* [x0][00][00][00] */ + v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)v0, + (vector4x_u32)v0, 3); /* [00][x0][00][00] */ + v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */ + return (v0[VEC_U64_LO] >> 32) ^ crc; +} + + +static ASM_FUNC_ATTR_INLINE void +crc32r_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + u32 crc = *pcrc; + u32 data; + + while (inlen >= 4) + { + data = buf_get_le32(inbuf); + data ^= crc; + + inlen -= 4; + inbuf += 4; + + crc = crc32r_ppc8_ce_reduction_4 (data, 0, consts); + } + + switch (inlen) + { + case 0: + break; + case 1: + data = inbuf[0]; + data ^= crc; + data <<= 24; + crc >>= 8; + crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts); + break; + case 2: + data = inbuf[0] << 0; + data |= inbuf[1] << 8; + data ^= crc; + data <<= 16; + crc >>= 16; + crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts); + break; + case 3: + data = inbuf[0] << 0; + data |= inbuf[1] << 8; + data |= inbuf[2] << 16; + data ^= crc; + data <<= 8; + crc >>= 24; + crc = crc32r_ppc8_ce_reduction_4 (data, crc, consts); + break; + } + + *pcrc = crc; +} + + +static ASM_FUNC_ATTR_INLINE void +crc32_ppc8_ce_bulk (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + vector4x_u32 zero = { 0, 0, 0, 0 }; + vector2x_u64 low_96bit_mask = CRC_VEC_U64_DEF(~0, ~((u64)(u32)-1 << 32)); + vector2x_u64 p_my = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->my_p[0])); + vector2x_u64 p_my_lo, p_my_hi; + vector2x_u64 k2k1 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[1 - 1])); + vector2x_u64 k4k3 = asm_swap_u64(CRC_VEC_U64_LOAD(0, &consts->k[3 - 1])); + vector2x_u64 k4hi = CRC_VEC_U64_DEF(0, consts->k[4 - 1]); + vector2x_u64 k5hi = CRC_VEC_U64_DEF(0, consts->k[5 - 1]); + vector2x_u64 crc = CRC_VEC_U64_DEF(0, _gcry_bswap64(*pcrc)); + vector2x_u64 crc0, crc1, crc2, crc3; + vector2x_u64 v0; + + if (inlen >= 8 * 16) + { + crc0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf); + crc0 ^= crc; + crc1 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf); + crc2 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf); + crc3 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf); + + inbuf += 4 * 16; + inlen -= 4 * 16; + + /* Fold by 4. */ + while (inlen >= 4 * 16) + { + v0 = CRC_VEC_U64_LOAD_BE(0 * 16, inbuf); + crc0 = asm_vpmsumd(crc0, k2k1) ^ v0; + + v0 = CRC_VEC_U64_LOAD_BE(1 * 16, inbuf); + crc1 = asm_vpmsumd(crc1, k2k1) ^ v0; + + v0 = CRC_VEC_U64_LOAD_BE(2 * 16, inbuf); + crc2 = asm_vpmsumd(crc2, k2k1) ^ v0; + + v0 = CRC_VEC_U64_LOAD_BE(3 * 16, inbuf); + crc3 = asm_vpmsumd(crc3, k2k1) ^ v0; + + inbuf += 4 * 16; + inlen -= 4 * 16; + } + + /* Fold 4 to 1. */ + crc1 ^= asm_vpmsumd(crc0, k4k3); + crc2 ^= asm_vpmsumd(crc1, k4k3); + crc3 ^= asm_vpmsumd(crc2, k4k3); + crc = crc3; + } + else + { + v0 = CRC_VEC_U64_LOAD_BE(0, inbuf); + crc ^= v0; + + inbuf += 16; + inlen -= 16; + } + + /* Fold by 1. */ + while (inlen >= 16) + { + v0 = CRC_VEC_U64_LOAD_BE(0, inbuf); + crc = asm_vpmsumd(k4k3, crc); + crc ^= v0; + + inbuf += 16; + inlen -= 16; + } + + /* Partial fold. */ + if (inlen) + { + /* Load last input and add padding zeros. */ + vector2x_u64 mask = CRC_VEC_U64_LOAD_LE(inlen, crc32_partial_fold_input_mask); + vector2x_u64 shl_shuf = CRC_VEC_U64_LOAD_LE(32 - inlen, crc32_refl_shuf_shift); + vector2x_u64 shr_shuf = CRC_VEC_U64_LOAD_LE(inlen + 16, crc32_shuf_shift); + + v0 = CRC_VEC_U64_LOAD_LE(inlen - 16, inbuf); + v0 &= mask; + + crc = CRC_VEC_SWAP_TO_LE(crc); + crc2 = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero, + (vector16x_u8)shr_shuf); + v0 |= crc2; + v0 = CRC_VEC_SWAP(v0); + crc = (vector2x_u64)vec_perm((vector16x_u8)crc, (vector16x_u8)zero, + (vector16x_u8)shl_shuf); + crc = asm_vpmsumd(k4k3, crc); + crc ^= v0; + + inbuf += inlen; + inlen -= inlen; + } + + /* Final fold. */ + + /* reduce 128-bits to 96-bits */ + v0 = (vector2x_u64)vec_sld_u32((vector4x_u32)crc, + (vector4x_u32)zero, 2); + crc = asm_vpmsumd(k4hi, crc); + crc ^= v0; /* bottom 32-bit are zero */ + + /* reduce 96-bits to 64-bits */ + v0 = crc & low_96bit_mask; /* [00][x2][x1][00] */ + crc >>= 32; /* [00][x3][00][x0] */ + crc = asm_vpmsumd(k5hi, crc); /* [00][xx][xx][00] */ + crc ^= v0; /* top and bottom 32-bit are zero */ + + /* barrett reduction */ + p_my_hi = p_my; + p_my_lo = p_my; + p_my_hi[VEC_U64_LO] = 0; + p_my_lo[VEC_U64_HI] = 0; + v0 = crc >> 32; /* [00][00][00][x1] */ + crc = asm_vpmsumd(p_my_hi, crc); /* [00][xx][xx][xx] */ + crc = (vector2x_u64)vec_sld_u32((vector4x_u32)crc, + (vector4x_u32)crc, 3); /* [x0][00][x2][x1] */ + crc = asm_vpmsumd(p_my_lo, crc); /* [00][xx][xx][xx] */ + crc ^= v0; + + *pcrc = _gcry_bswap32(crc[VEC_U64_LO]); +} + + +static ASM_FUNC_ATTR_INLINE u32 +crc32_ppc8_ce_reduction_4 (u32 data, u32 crc, + const struct crc32_consts_s *consts) +{ + vector2x_u64 my_p = CRC_VEC_U64_LOAD(0, &consts->my_p[0]); + vector2x_u64 v0 = CRC_VEC_U64_DEF((u64)data << 32, 0); + v0 = asm_vpmsumd(v0, my_p); /* [00][x1][x0][00] */ + v0[VEC_U64_LO] = 0; /* [00][x1][00][00] */ + v0 = asm_vpmsumd(v0, my_p); /* [00][00][xx][xx] */ + return _gcry_bswap32(v0[VEC_U64_LO]) ^ crc; +} + + +static ASM_FUNC_ATTR_INLINE void +crc32_less_than_16 (u32 *pcrc, const byte *inbuf, size_t inlen, + const struct crc32_consts_s *consts) +{ + u32 crc = *pcrc; + u32 data; + + while (inlen >= 4) + { + data = buf_get_le32(inbuf); + data ^= crc; + data = _gcry_bswap32(data); + + inlen -= 4; + inbuf += 4; + + crc = crc32_ppc8_ce_reduction_4 (data, 0, consts); + } + + switch (inlen) + { + case 0: + break; + case 1: + data = inbuf[0]; + data ^= crc; + data = data & 0xffU; + crc = crc >> 8; + crc = crc32_ppc8_ce_reduction_4 (data, crc, consts); + break; + case 2: + data = inbuf[0] << 0; + data |= inbuf[1] << 8; + data ^= crc; + data = _gcry_bswap32(data << 16); + crc = crc >> 16; + crc = crc32_ppc8_ce_reduction_4 (data, crc, consts); + break; + case 3: + data = inbuf[0] << 0; + data |= inbuf[1] << 8; + data |= inbuf[2] << 16; + data ^= crc; + data = _gcry_bswap32(data << 8); + crc = crc >> 24; + crc = crc32_ppc8_ce_reduction_4 (data, crc, consts); + break; + } + + *pcrc = crc; +} + +void ASM_FUNC_ATTR +_gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc32_consts; + + if (!inlen) + return; + + if (inlen >= 16) + crc32r_ppc8_ce_bulk (pcrc, inbuf, inlen, consts); + else + crc32r_less_than_16 (pcrc, inbuf, inlen, consts); +} + +void ASM_FUNC_ATTR +_gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen) +{ + const struct crc32_consts_s *consts = &crc24rfc2440_consts; + + if (!inlen) + return; + + /* Note: *pcrc in input endian. */ + + if (inlen >= 16) + crc32_ppc8_ce_bulk (pcrc, inbuf, inlen, consts); + else + crc32_less_than_16 (pcrc, inbuf, inlen, consts); +} + +#endif diff --git a/comm/third_party/libgcrypt/cipher/crc.c b/comm/third_party/libgcrypt/cipher/crc.c new file mode 100644 index 0000000000..6d70f644f7 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/crc.c @@ -0,0 +1,955 @@ +/* crc.c - Cyclic redundancy checks. + * Copyright (C) 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" + +#include "bithelp.h" +#include "bufhelp.h" + + +/* USE_INTEL_PCLMUL indicates whether to compile CRC with Intel PCLMUL/SSE4.1 + * code. */ +#undef USE_INTEL_PCLMUL +#if defined(ENABLE_PCLMUL_SUPPORT) && defined(ENABLE_SSE41_SUPPORT) +# if ((defined(__i386__) && SIZEOF_UNSIGNED_LONG == 4) || defined(__x86_64__)) +# if __GNUC__ >= 4 +# define USE_INTEL_PCLMUL 1 +# endif +# endif +#endif /* USE_INTEL_PCLMUL */ + +/* USE_ARM_PMULL indicates whether to compile GCM with ARMv8 PMULL code. */ +#undef USE_ARM_PMULL +#if defined(ENABLE_ARM_CRYPTO_SUPPORT) +# if defined(__AARCH64EL__) && \ + defined(HAVE_COMPATIBLE_GCC_AARCH64_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_AARCH64_CRYPTO) +# define USE_ARM_PMULL 1 +# endif +#endif /* USE_ARM_PMULL */ + +/* USE_PPC_VPMSUM indicates whether to enable PowerPC vector + * accelerated code. */ +#undef USE_PPC_VPMSUM +#ifdef ENABLE_PPC_CRYPTO_SUPPORT +# if defined(HAVE_COMPATIBLE_CC_PPC_ALTIVEC) && \ + defined(HAVE_GCC_INLINE_ASM_PPC_ALTIVEC) +# if __GNUC__ >= 4 +# define USE_PPC_VPMSUM 1 +# endif +# endif +#endif /* USE_PPC_VPMSUM */ + + +typedef struct +{ + u32 CRC; +#ifdef USE_INTEL_PCLMUL + unsigned int use_pclmul:1; /* Intel PCLMUL shall be used. */ +#endif +#ifdef USE_ARM_PMULL + unsigned int use_pmull:1; /* ARMv8 PMULL shall be used. */ +#endif +#ifdef USE_PPC_VPMSUM + unsigned int use_vpmsum:1; /* POWER vpmsum shall be used. */ +#endif + byte buf[4]; +} +CRC_CONTEXT; + + +#ifdef USE_INTEL_PCLMUL +/*-- crc-intel-pclmul.c --*/ +void _gcry_crc32_intel_pclmul (u32 *pcrc, const byte *inbuf, size_t inlen); +void _gcry_crc24rfc2440_intel_pclmul (u32 *pcrc, const byte *inbuf, + size_t inlen); +#endif + +#ifdef USE_ARM_PMULL +/*-- crc-armv8-ce.c --*/ +void _gcry_crc32_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, size_t inlen); +void _gcry_crc24rfc2440_armv8_ce_pmull (u32 *pcrc, const byte *inbuf, + size_t inlen); +#endif + +#ifdef USE_PPC_VPMSUM +/*-- crc-ppc.c --*/ +void _gcry_crc32_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, size_t inlen); +void _gcry_crc24rfc2440_ppc8_vpmsum (u32 *pcrc, const byte *inbuf, + size_t inlen); +#endif + + +/* + * Code generated by universal_crc by Danjel McGougan + * + * CRC parameters used: + * bits: 32 + * poly: 0x04c11db7 + * init: 0xffffffff + * xor: 0xffffffff + * reverse: true + * non-direct: false + * + * CRC of the string "123456789" is 0xcbf43926 + */ + +static const u32 crc32_table[1024] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d, + 0x00000000, 0x191b3141, 0x32366282, 0x2b2d53c3, + 0x646cc504, 0x7d77f445, 0x565aa786, 0x4f4196c7, + 0xc8d98a08, 0xd1c2bb49, 0xfaefe88a, 0xe3f4d9cb, + 0xacb54f0c, 0xb5ae7e4d, 0x9e832d8e, 0x87981ccf, + 0x4ac21251, 0x53d92310, 0x78f470d3, 0x61ef4192, + 0x2eaed755, 0x37b5e614, 0x1c98b5d7, 0x05838496, + 0x821b9859, 0x9b00a918, 0xb02dfadb, 0xa936cb9a, + 0xe6775d5d, 0xff6c6c1c, 0xd4413fdf, 0xcd5a0e9e, + 0x958424a2, 0x8c9f15e3, 0xa7b24620, 0xbea97761, + 0xf1e8e1a6, 0xe8f3d0e7, 0xc3de8324, 0xdac5b265, + 0x5d5daeaa, 0x44469feb, 0x6f6bcc28, 0x7670fd69, + 0x39316bae, 0x202a5aef, 0x0b07092c, 0x121c386d, + 0xdf4636f3, 0xc65d07b2, 0xed705471, 0xf46b6530, + 0xbb2af3f7, 0xa231c2b6, 0x891c9175, 0x9007a034, + 0x179fbcfb, 0x0e848dba, 0x25a9de79, 0x3cb2ef38, + 0x73f379ff, 0x6ae848be, 0x41c51b7d, 0x58de2a3c, + 0xf0794f05, 0xe9627e44, 0xc24f2d87, 0xdb541cc6, + 0x94158a01, 0x8d0ebb40, 0xa623e883, 0xbf38d9c2, + 0x38a0c50d, 0x21bbf44c, 0x0a96a78f, 0x138d96ce, + 0x5ccc0009, 0x45d73148, 0x6efa628b, 0x77e153ca, + 0xbabb5d54, 0xa3a06c15, 0x888d3fd6, 0x91960e97, + 0xded79850, 0xc7cca911, 0xece1fad2, 0xf5facb93, + 0x7262d75c, 0x6b79e61d, 0x4054b5de, 0x594f849f, + 0x160e1258, 0x0f152319, 0x243870da, 0x3d23419b, + 0x65fd6ba7, 0x7ce65ae6, 0x57cb0925, 0x4ed03864, + 0x0191aea3, 0x188a9fe2, 0x33a7cc21, 0x2abcfd60, + 0xad24e1af, 0xb43fd0ee, 0x9f12832d, 0x8609b26c, + 0xc94824ab, 0xd05315ea, 0xfb7e4629, 0xe2657768, + 0x2f3f79f6, 0x362448b7, 0x1d091b74, 0x04122a35, + 0x4b53bcf2, 0x52488db3, 0x7965de70, 0x607eef31, + 0xe7e6f3fe, 0xfefdc2bf, 0xd5d0917c, 0xcccba03d, + 0x838a36fa, 0x9a9107bb, 0xb1bc5478, 0xa8a76539, + 0x3b83984b, 0x2298a90a, 0x09b5fac9, 0x10aecb88, + 0x5fef5d4f, 0x46f46c0e, 0x6dd93fcd, 0x74c20e8c, + 0xf35a1243, 0xea412302, 0xc16c70c1, 0xd8774180, + 0x9736d747, 0x8e2de606, 0xa500b5c5, 0xbc1b8484, + 0x71418a1a, 0x685abb5b, 0x4377e898, 0x5a6cd9d9, + 0x152d4f1e, 0x0c367e5f, 0x271b2d9c, 0x3e001cdd, + 0xb9980012, 0xa0833153, 0x8bae6290, 0x92b553d1, + 0xddf4c516, 0xc4eff457, 0xefc2a794, 0xf6d996d5, + 0xae07bce9, 0xb71c8da8, 0x9c31de6b, 0x852aef2a, + 0xca6b79ed, 0xd37048ac, 0xf85d1b6f, 0xe1462a2e, + 0x66de36e1, 0x7fc507a0, 0x54e85463, 0x4df36522, + 0x02b2f3e5, 0x1ba9c2a4, 0x30849167, 0x299fa026, + 0xe4c5aeb8, 0xfdde9ff9, 0xd6f3cc3a, 0xcfe8fd7b, + 0x80a96bbc, 0x99b25afd, 0xb29f093e, 0xab84387f, + 0x2c1c24b0, 0x350715f1, 0x1e2a4632, 0x07317773, + 0x4870e1b4, 0x516bd0f5, 0x7a468336, 0x635db277, + 0xcbfad74e, 0xd2e1e60f, 0xf9ccb5cc, 0xe0d7848d, + 0xaf96124a, 0xb68d230b, 0x9da070c8, 0x84bb4189, + 0x03235d46, 0x1a386c07, 0x31153fc4, 0x280e0e85, + 0x674f9842, 0x7e54a903, 0x5579fac0, 0x4c62cb81, + 0x8138c51f, 0x9823f45e, 0xb30ea79d, 0xaa1596dc, + 0xe554001b, 0xfc4f315a, 0xd7626299, 0xce7953d8, + 0x49e14f17, 0x50fa7e56, 0x7bd72d95, 0x62cc1cd4, + 0x2d8d8a13, 0x3496bb52, 0x1fbbe891, 0x06a0d9d0, + 0x5e7ef3ec, 0x4765c2ad, 0x6c48916e, 0x7553a02f, + 0x3a1236e8, 0x230907a9, 0x0824546a, 0x113f652b, + 0x96a779e4, 0x8fbc48a5, 0xa4911b66, 0xbd8a2a27, + 0xf2cbbce0, 0xebd08da1, 0xc0fdde62, 0xd9e6ef23, + 0x14bce1bd, 0x0da7d0fc, 0x268a833f, 0x3f91b27e, + 0x70d024b9, 0x69cb15f8, 0x42e6463b, 0x5bfd777a, + 0xdc656bb5, 0xc57e5af4, 0xee530937, 0xf7483876, + 0xb809aeb1, 0xa1129ff0, 0x8a3fcc33, 0x9324fd72, + 0x00000000, 0x01c26a37, 0x0384d46e, 0x0246be59, + 0x0709a8dc, 0x06cbc2eb, 0x048d7cb2, 0x054f1685, + 0x0e1351b8, 0x0fd13b8f, 0x0d9785d6, 0x0c55efe1, + 0x091af964, 0x08d89353, 0x0a9e2d0a, 0x0b5c473d, + 0x1c26a370, 0x1de4c947, 0x1fa2771e, 0x1e601d29, + 0x1b2f0bac, 0x1aed619b, 0x18abdfc2, 0x1969b5f5, + 0x1235f2c8, 0x13f798ff, 0x11b126a6, 0x10734c91, + 0x153c5a14, 0x14fe3023, 0x16b88e7a, 0x177ae44d, + 0x384d46e0, 0x398f2cd7, 0x3bc9928e, 0x3a0bf8b9, + 0x3f44ee3c, 0x3e86840b, 0x3cc03a52, 0x3d025065, + 0x365e1758, 0x379c7d6f, 0x35dac336, 0x3418a901, + 0x3157bf84, 0x3095d5b3, 0x32d36bea, 0x331101dd, + 0x246be590, 0x25a98fa7, 0x27ef31fe, 0x262d5bc9, + 0x23624d4c, 0x22a0277b, 0x20e69922, 0x2124f315, + 0x2a78b428, 0x2bbade1f, 0x29fc6046, 0x283e0a71, + 0x2d711cf4, 0x2cb376c3, 0x2ef5c89a, 0x2f37a2ad, + 0x709a8dc0, 0x7158e7f7, 0x731e59ae, 0x72dc3399, + 0x7793251c, 0x76514f2b, 0x7417f172, 0x75d59b45, + 0x7e89dc78, 0x7f4bb64f, 0x7d0d0816, 0x7ccf6221, + 0x798074a4, 0x78421e93, 0x7a04a0ca, 0x7bc6cafd, + 0x6cbc2eb0, 0x6d7e4487, 0x6f38fade, 0x6efa90e9, + 0x6bb5866c, 0x6a77ec5b, 0x68315202, 0x69f33835, + 0x62af7f08, 0x636d153f, 0x612bab66, 0x60e9c151, + 0x65a6d7d4, 0x6464bde3, 0x662203ba, 0x67e0698d, + 0x48d7cb20, 0x4915a117, 0x4b531f4e, 0x4a917579, + 0x4fde63fc, 0x4e1c09cb, 0x4c5ab792, 0x4d98dda5, + 0x46c49a98, 0x4706f0af, 0x45404ef6, 0x448224c1, + 0x41cd3244, 0x400f5873, 0x4249e62a, 0x438b8c1d, + 0x54f16850, 0x55330267, 0x5775bc3e, 0x56b7d609, + 0x53f8c08c, 0x523aaabb, 0x507c14e2, 0x51be7ed5, + 0x5ae239e8, 0x5b2053df, 0x5966ed86, 0x58a487b1, + 0x5deb9134, 0x5c29fb03, 0x5e6f455a, 0x5fad2f6d, + 0xe1351b80, 0xe0f771b7, 0xe2b1cfee, 0xe373a5d9, + 0xe63cb35c, 0xe7fed96b, 0xe5b86732, 0xe47a0d05, + 0xef264a38, 0xeee4200f, 0xeca29e56, 0xed60f461, + 0xe82fe2e4, 0xe9ed88d3, 0xebab368a, 0xea695cbd, + 0xfd13b8f0, 0xfcd1d2c7, 0xfe976c9e, 0xff5506a9, + 0xfa1a102c, 0xfbd87a1b, 0xf99ec442, 0xf85cae75, + 0xf300e948, 0xf2c2837f, 0xf0843d26, 0xf1465711, + 0xf4094194, 0xf5cb2ba3, 0xf78d95fa, 0xf64fffcd, + 0xd9785d60, 0xd8ba3757, 0xdafc890e, 0xdb3ee339, + 0xde71f5bc, 0xdfb39f8b, 0xddf521d2, 0xdc374be5, + 0xd76b0cd8, 0xd6a966ef, 0xd4efd8b6, 0xd52db281, + 0xd062a404, 0xd1a0ce33, 0xd3e6706a, 0xd2241a5d, + 0xc55efe10, 0xc49c9427, 0xc6da2a7e, 0xc7184049, + 0xc25756cc, 0xc3953cfb, 0xc1d382a2, 0xc011e895, + 0xcb4dafa8, 0xca8fc59f, 0xc8c97bc6, 0xc90b11f1, + 0xcc440774, 0xcd866d43, 0xcfc0d31a, 0xce02b92d, + 0x91af9640, 0x906dfc77, 0x922b422e, 0x93e92819, + 0x96a63e9c, 0x976454ab, 0x9522eaf2, 0x94e080c5, + 0x9fbcc7f8, 0x9e7eadcf, 0x9c381396, 0x9dfa79a1, + 0x98b56f24, 0x99770513, 0x9b31bb4a, 0x9af3d17d, + 0x8d893530, 0x8c4b5f07, 0x8e0de15e, 0x8fcf8b69, + 0x8a809dec, 0x8b42f7db, 0x89044982, 0x88c623b5, + 0x839a6488, 0x82580ebf, 0x801eb0e6, 0x81dcdad1, + 0x8493cc54, 0x8551a663, 0x8717183a, 0x86d5720d, + 0xa9e2d0a0, 0xa820ba97, 0xaa6604ce, 0xaba46ef9, + 0xaeeb787c, 0xaf29124b, 0xad6fac12, 0xacadc625, + 0xa7f18118, 0xa633eb2f, 0xa4755576, 0xa5b73f41, + 0xa0f829c4, 0xa13a43f3, 0xa37cfdaa, 0xa2be979d, + 0xb5c473d0, 0xb40619e7, 0xb640a7be, 0xb782cd89, + 0xb2cddb0c, 0xb30fb13b, 0xb1490f62, 0xb08b6555, + 0xbbd72268, 0xba15485f, 0xb853f606, 0xb9919c31, + 0xbcde8ab4, 0xbd1ce083, 0xbf5a5eda, 0xbe9834ed, + 0x00000000, 0xb8bc6765, 0xaa09c88b, 0x12b5afee, + 0x8f629757, 0x37def032, 0x256b5fdc, 0x9dd738b9, + 0xc5b428ef, 0x7d084f8a, 0x6fbde064, 0xd7018701, + 0x4ad6bfb8, 0xf26ad8dd, 0xe0df7733, 0x58631056, + 0x5019579f, 0xe8a530fa, 0xfa109f14, 0x42acf871, + 0xdf7bc0c8, 0x67c7a7ad, 0x75720843, 0xcdce6f26, + 0x95ad7f70, 0x2d111815, 0x3fa4b7fb, 0x8718d09e, + 0x1acfe827, 0xa2738f42, 0xb0c620ac, 0x087a47c9, + 0xa032af3e, 0x188ec85b, 0x0a3b67b5, 0xb28700d0, + 0x2f503869, 0x97ec5f0c, 0x8559f0e2, 0x3de59787, + 0x658687d1, 0xdd3ae0b4, 0xcf8f4f5a, 0x7733283f, + 0xeae41086, 0x525877e3, 0x40edd80d, 0xf851bf68, + 0xf02bf8a1, 0x48979fc4, 0x5a22302a, 0xe29e574f, + 0x7f496ff6, 0xc7f50893, 0xd540a77d, 0x6dfcc018, + 0x359fd04e, 0x8d23b72b, 0x9f9618c5, 0x272a7fa0, + 0xbafd4719, 0x0241207c, 0x10f48f92, 0xa848e8f7, + 0x9b14583d, 0x23a83f58, 0x311d90b6, 0x89a1f7d3, + 0x1476cf6a, 0xaccaa80f, 0xbe7f07e1, 0x06c36084, + 0x5ea070d2, 0xe61c17b7, 0xf4a9b859, 0x4c15df3c, + 0xd1c2e785, 0x697e80e0, 0x7bcb2f0e, 0xc377486b, + 0xcb0d0fa2, 0x73b168c7, 0x6104c729, 0xd9b8a04c, + 0x446f98f5, 0xfcd3ff90, 0xee66507e, 0x56da371b, + 0x0eb9274d, 0xb6054028, 0xa4b0efc6, 0x1c0c88a3, + 0x81dbb01a, 0x3967d77f, 0x2bd27891, 0x936e1ff4, + 0x3b26f703, 0x839a9066, 0x912f3f88, 0x299358ed, + 0xb4446054, 0x0cf80731, 0x1e4da8df, 0xa6f1cfba, + 0xfe92dfec, 0x462eb889, 0x549b1767, 0xec277002, + 0x71f048bb, 0xc94c2fde, 0xdbf98030, 0x6345e755, + 0x6b3fa09c, 0xd383c7f9, 0xc1366817, 0x798a0f72, + 0xe45d37cb, 0x5ce150ae, 0x4e54ff40, 0xf6e89825, + 0xae8b8873, 0x1637ef16, 0x048240f8, 0xbc3e279d, + 0x21e91f24, 0x99557841, 0x8be0d7af, 0x335cb0ca, + 0xed59b63b, 0x55e5d15e, 0x47507eb0, 0xffec19d5, + 0x623b216c, 0xda874609, 0xc832e9e7, 0x708e8e82, + 0x28ed9ed4, 0x9051f9b1, 0x82e4565f, 0x3a58313a, + 0xa78f0983, 0x1f336ee6, 0x0d86c108, 0xb53aa66d, + 0xbd40e1a4, 0x05fc86c1, 0x1749292f, 0xaff54e4a, + 0x322276f3, 0x8a9e1196, 0x982bbe78, 0x2097d91d, + 0x78f4c94b, 0xc048ae2e, 0xd2fd01c0, 0x6a4166a5, + 0xf7965e1c, 0x4f2a3979, 0x5d9f9697, 0xe523f1f2, + 0x4d6b1905, 0xf5d77e60, 0xe762d18e, 0x5fdeb6eb, + 0xc2098e52, 0x7ab5e937, 0x680046d9, 0xd0bc21bc, + 0x88df31ea, 0x3063568f, 0x22d6f961, 0x9a6a9e04, + 0x07bda6bd, 0xbf01c1d8, 0xadb46e36, 0x15080953, + 0x1d724e9a, 0xa5ce29ff, 0xb77b8611, 0x0fc7e174, + 0x9210d9cd, 0x2aacbea8, 0x38191146, 0x80a57623, + 0xd8c66675, 0x607a0110, 0x72cfaefe, 0xca73c99b, + 0x57a4f122, 0xef189647, 0xfdad39a9, 0x45115ecc, + 0x764dee06, 0xcef18963, 0xdc44268d, 0x64f841e8, + 0xf92f7951, 0x41931e34, 0x5326b1da, 0xeb9ad6bf, + 0xb3f9c6e9, 0x0b45a18c, 0x19f00e62, 0xa14c6907, + 0x3c9b51be, 0x842736db, 0x96929935, 0x2e2efe50, + 0x2654b999, 0x9ee8defc, 0x8c5d7112, 0x34e11677, + 0xa9362ece, 0x118a49ab, 0x033fe645, 0xbb838120, + 0xe3e09176, 0x5b5cf613, 0x49e959fd, 0xf1553e98, + 0x6c820621, 0xd43e6144, 0xc68bceaa, 0x7e37a9cf, + 0xd67f4138, 0x6ec3265d, 0x7c7689b3, 0xc4caeed6, + 0x591dd66f, 0xe1a1b10a, 0xf3141ee4, 0x4ba87981, + 0x13cb69d7, 0xab770eb2, 0xb9c2a15c, 0x017ec639, + 0x9ca9fe80, 0x241599e5, 0x36a0360b, 0x8e1c516e, + 0x866616a7, 0x3eda71c2, 0x2c6fde2c, 0x94d3b949, + 0x090481f0, 0xb1b8e695, 0xa30d497b, 0x1bb12e1e, + 0x43d23e48, 0xfb6e592d, 0xe9dbf6c3, 0x516791a6, + 0xccb0a91f, 0x740cce7a, 0x66b96194, 0xde0506f1 +}; + +/* CRC32 */ + +static inline u32 +crc32_next (u32 crc, byte data) +{ + return (crc >> 8) ^ crc32_table[(crc & 0xff) ^ data]; +} + +/* + * Process 4 bytes in one go + */ +static inline u32 +crc32_next4 (u32 crc, u32 data) +{ + crc ^= data; + crc = crc32_table[(crc & 0xff) + 0x300] ^ + crc32_table[((crc >> 8) & 0xff) + 0x200] ^ + crc32_table[((crc >> 16) & 0xff) + 0x100] ^ + crc32_table[(crc >> 24) & 0xff]; + return crc; +} + +static void +crc32_init (void *context, unsigned int flags) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + u32 hwf = _gcry_get_hw_features (); + +#ifdef USE_INTEL_PCLMUL + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif +#ifdef USE_ARM_PMULL + ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL); +#endif +#ifdef USE_PPC_VPMSUM + ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07); +#endif + + (void)flags; + (void)hwf; + + ctx->CRC = 0 ^ 0xffffffffL; +} + +static void +crc32_write (void *context, const void *inbuf_arg, size_t inlen) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + const byte *inbuf = inbuf_arg; + u32 crc; + +#ifdef USE_INTEL_PCLMUL + if (ctx->use_pclmul) + { + _gcry_crc32_intel_pclmul(&ctx->CRC, inbuf, inlen); + return; + } +#endif +#ifdef USE_ARM_PMULL + if (ctx->use_pmull) + { + _gcry_crc32_armv8_ce_pmull(&ctx->CRC, inbuf, inlen); + return; + } +#endif +#ifdef USE_PPC_VPMSUM + if (ctx->use_vpmsum) + { + _gcry_crc32_ppc8_vpmsum(&ctx->CRC, inbuf, inlen); + return; + } +#endif + + if (!inbuf || !inlen) + return; + + crc = ctx->CRC; + + while (inlen >= 16) + { + inlen -= 16; + crc = crc32_next4(crc, buf_get_le32(&inbuf[0])); + crc = crc32_next4(crc, buf_get_le32(&inbuf[4])); + crc = crc32_next4(crc, buf_get_le32(&inbuf[8])); + crc = crc32_next4(crc, buf_get_le32(&inbuf[12])); + inbuf += 16; + } + + while (inlen >= 4) + { + inlen -= 4; + crc = crc32_next4(crc, buf_get_le32(inbuf)); + inbuf += 4; + } + + while (inlen--) + { + crc = crc32_next(crc, *inbuf++); + } + + ctx->CRC = crc; +} + +static byte * +crc32_read (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + return ctx->buf; +} + +static void +crc32_final (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + ctx->CRC ^= 0xffffffffL; + buf_put_be32 (ctx->buf, ctx->CRC); +} + +/* CRC32 a'la RFC 1510 */ +/* CRC of the string "123456789" is 0x2dfd2d88 */ + +static void +crc32rfc1510_init (void *context, unsigned int flags) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + u32 hwf = _gcry_get_hw_features (); + +#ifdef USE_INTEL_PCLMUL + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif +#ifdef USE_ARM_PMULL + ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL); +#endif +#ifdef USE_PPC_VPMSUM + ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07); +#endif + + (void)flags; + (void)hwf; + + ctx->CRC = 0; +} + +static void +crc32rfc1510_final (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + buf_put_be32(ctx->buf, ctx->CRC); +} + +/* CRC24 a'la RFC 2440 */ +/* + * Code generated by universal_crc by Danjel McGougan + * + * CRC parameters used: + * bits: 24 + * poly: 0x864cfb + * init: 0xb704ce + * xor: 0x000000 + * reverse: false + * non-direct: false + * + * CRC of the string "123456789" is 0x21cf02 + */ + +static const u32 crc24_table[1024] = +{ + 0x00000000, 0x00fb4c86, 0x000dd58a, 0x00f6990c, + 0x00e1e693, 0x001aaa15, 0x00ec3319, 0x00177f9f, + 0x003981a1, 0x00c2cd27, 0x0034542b, 0x00cf18ad, + 0x00d86732, 0x00232bb4, 0x00d5b2b8, 0x002efe3e, + 0x00894ec5, 0x00720243, 0x00849b4f, 0x007fd7c9, + 0x0068a856, 0x0093e4d0, 0x00657ddc, 0x009e315a, + 0x00b0cf64, 0x004b83e2, 0x00bd1aee, 0x00465668, + 0x005129f7, 0x00aa6571, 0x005cfc7d, 0x00a7b0fb, + 0x00e9d10c, 0x00129d8a, 0x00e40486, 0x001f4800, + 0x0008379f, 0x00f37b19, 0x0005e215, 0x00feae93, + 0x00d050ad, 0x002b1c2b, 0x00dd8527, 0x0026c9a1, + 0x0031b63e, 0x00cafab8, 0x003c63b4, 0x00c72f32, + 0x00609fc9, 0x009bd34f, 0x006d4a43, 0x009606c5, + 0x0081795a, 0x007a35dc, 0x008cacd0, 0x0077e056, + 0x00591e68, 0x00a252ee, 0x0054cbe2, 0x00af8764, + 0x00b8f8fb, 0x0043b47d, 0x00b52d71, 0x004e61f7, + 0x00d2a319, 0x0029ef9f, 0x00df7693, 0x00243a15, + 0x0033458a, 0x00c8090c, 0x003e9000, 0x00c5dc86, + 0x00eb22b8, 0x00106e3e, 0x00e6f732, 0x001dbbb4, + 0x000ac42b, 0x00f188ad, 0x000711a1, 0x00fc5d27, + 0x005beddc, 0x00a0a15a, 0x00563856, 0x00ad74d0, + 0x00ba0b4f, 0x004147c9, 0x00b7dec5, 0x004c9243, + 0x00626c7d, 0x009920fb, 0x006fb9f7, 0x0094f571, + 0x00838aee, 0x0078c668, 0x008e5f64, 0x007513e2, + 0x003b7215, 0x00c03e93, 0x0036a79f, 0x00cdeb19, + 0x00da9486, 0x0021d800, 0x00d7410c, 0x002c0d8a, + 0x0002f3b4, 0x00f9bf32, 0x000f263e, 0x00f46ab8, + 0x00e31527, 0x001859a1, 0x00eec0ad, 0x00158c2b, + 0x00b23cd0, 0x00497056, 0x00bfe95a, 0x0044a5dc, + 0x0053da43, 0x00a896c5, 0x005e0fc9, 0x00a5434f, + 0x008bbd71, 0x0070f1f7, 0x008668fb, 0x007d247d, + 0x006a5be2, 0x00911764, 0x00678e68, 0x009cc2ee, + 0x00a44733, 0x005f0bb5, 0x00a992b9, 0x0052de3f, + 0x0045a1a0, 0x00beed26, 0x0048742a, 0x00b338ac, + 0x009dc692, 0x00668a14, 0x00901318, 0x006b5f9e, + 0x007c2001, 0x00876c87, 0x0071f58b, 0x008ab90d, + 0x002d09f6, 0x00d64570, 0x0020dc7c, 0x00db90fa, + 0x00ccef65, 0x0037a3e3, 0x00c13aef, 0x003a7669, + 0x00148857, 0x00efc4d1, 0x00195ddd, 0x00e2115b, + 0x00f56ec4, 0x000e2242, 0x00f8bb4e, 0x0003f7c8, + 0x004d963f, 0x00b6dab9, 0x004043b5, 0x00bb0f33, + 0x00ac70ac, 0x00573c2a, 0x00a1a526, 0x005ae9a0, + 0x0074179e, 0x008f5b18, 0x0079c214, 0x00828e92, + 0x0095f10d, 0x006ebd8b, 0x00982487, 0x00636801, + 0x00c4d8fa, 0x003f947c, 0x00c90d70, 0x003241f6, + 0x00253e69, 0x00de72ef, 0x0028ebe3, 0x00d3a765, + 0x00fd595b, 0x000615dd, 0x00f08cd1, 0x000bc057, + 0x001cbfc8, 0x00e7f34e, 0x00116a42, 0x00ea26c4, + 0x0076e42a, 0x008da8ac, 0x007b31a0, 0x00807d26, + 0x009702b9, 0x006c4e3f, 0x009ad733, 0x00619bb5, + 0x004f658b, 0x00b4290d, 0x0042b001, 0x00b9fc87, + 0x00ae8318, 0x0055cf9e, 0x00a35692, 0x00581a14, + 0x00ffaaef, 0x0004e669, 0x00f27f65, 0x000933e3, + 0x001e4c7c, 0x00e500fa, 0x001399f6, 0x00e8d570, + 0x00c62b4e, 0x003d67c8, 0x00cbfec4, 0x0030b242, + 0x0027cddd, 0x00dc815b, 0x002a1857, 0x00d154d1, + 0x009f3526, 0x006479a0, 0x0092e0ac, 0x0069ac2a, + 0x007ed3b5, 0x00859f33, 0x0073063f, 0x00884ab9, + 0x00a6b487, 0x005df801, 0x00ab610d, 0x00502d8b, + 0x00475214, 0x00bc1e92, 0x004a879e, 0x00b1cb18, + 0x00167be3, 0x00ed3765, 0x001bae69, 0x00e0e2ef, + 0x00f79d70, 0x000cd1f6, 0x00fa48fa, 0x0001047c, + 0x002ffa42, 0x00d4b6c4, 0x00222fc8, 0x00d9634e, + 0x00ce1cd1, 0x00355057, 0x00c3c95b, 0x003885dd, + 0x00000000, 0x00488f66, 0x00901ecd, 0x00d891ab, + 0x00db711c, 0x0093fe7a, 0x004b6fd1, 0x0003e0b7, + 0x00b6e338, 0x00fe6c5e, 0x0026fdf5, 0x006e7293, + 0x006d9224, 0x00251d42, 0x00fd8ce9, 0x00b5038f, + 0x006cc771, 0x00244817, 0x00fcd9bc, 0x00b456da, + 0x00b7b66d, 0x00ff390b, 0x0027a8a0, 0x006f27c6, + 0x00da2449, 0x0092ab2f, 0x004a3a84, 0x0002b5e2, + 0x00015555, 0x0049da33, 0x00914b98, 0x00d9c4fe, + 0x00d88ee3, 0x00900185, 0x0048902e, 0x00001f48, + 0x0003ffff, 0x004b7099, 0x0093e132, 0x00db6e54, + 0x006e6ddb, 0x0026e2bd, 0x00fe7316, 0x00b6fc70, + 0x00b51cc7, 0x00fd93a1, 0x0025020a, 0x006d8d6c, + 0x00b44992, 0x00fcc6f4, 0x0024575f, 0x006cd839, + 0x006f388e, 0x0027b7e8, 0x00ff2643, 0x00b7a925, + 0x0002aaaa, 0x004a25cc, 0x0092b467, 0x00da3b01, + 0x00d9dbb6, 0x009154d0, 0x0049c57b, 0x00014a1d, + 0x004b5141, 0x0003de27, 0x00db4f8c, 0x0093c0ea, + 0x0090205d, 0x00d8af3b, 0x00003e90, 0x0048b1f6, + 0x00fdb279, 0x00b53d1f, 0x006dacb4, 0x002523d2, + 0x0026c365, 0x006e4c03, 0x00b6dda8, 0x00fe52ce, + 0x00279630, 0x006f1956, 0x00b788fd, 0x00ff079b, + 0x00fce72c, 0x00b4684a, 0x006cf9e1, 0x00247687, + 0x00917508, 0x00d9fa6e, 0x00016bc5, 0x0049e4a3, + 0x004a0414, 0x00028b72, 0x00da1ad9, 0x009295bf, + 0x0093dfa2, 0x00db50c4, 0x0003c16f, 0x004b4e09, + 0x0048aebe, 0x000021d8, 0x00d8b073, 0x00903f15, + 0x00253c9a, 0x006db3fc, 0x00b52257, 0x00fdad31, + 0x00fe4d86, 0x00b6c2e0, 0x006e534b, 0x0026dc2d, + 0x00ff18d3, 0x00b797b5, 0x006f061e, 0x00278978, + 0x002469cf, 0x006ce6a9, 0x00b47702, 0x00fcf864, + 0x0049fbeb, 0x0001748d, 0x00d9e526, 0x00916a40, + 0x00928af7, 0x00da0591, 0x0002943a, 0x004a1b5c, + 0x0096a282, 0x00de2de4, 0x0006bc4f, 0x004e3329, + 0x004dd39e, 0x00055cf8, 0x00ddcd53, 0x00954235, + 0x002041ba, 0x0068cedc, 0x00b05f77, 0x00f8d011, + 0x00fb30a6, 0x00b3bfc0, 0x006b2e6b, 0x0023a10d, + 0x00fa65f3, 0x00b2ea95, 0x006a7b3e, 0x0022f458, + 0x002114ef, 0x00699b89, 0x00b10a22, 0x00f98544, + 0x004c86cb, 0x000409ad, 0x00dc9806, 0x00941760, + 0x0097f7d7, 0x00df78b1, 0x0007e91a, 0x004f667c, + 0x004e2c61, 0x0006a307, 0x00de32ac, 0x0096bdca, + 0x00955d7d, 0x00ddd21b, 0x000543b0, 0x004dccd6, + 0x00f8cf59, 0x00b0403f, 0x0068d194, 0x00205ef2, + 0x0023be45, 0x006b3123, 0x00b3a088, 0x00fb2fee, + 0x0022eb10, 0x006a6476, 0x00b2f5dd, 0x00fa7abb, + 0x00f99a0c, 0x00b1156a, 0x006984c1, 0x00210ba7, + 0x00940828, 0x00dc874e, 0x000416e5, 0x004c9983, + 0x004f7934, 0x0007f652, 0x00df67f9, 0x0097e89f, + 0x00ddf3c3, 0x00957ca5, 0x004ded0e, 0x00056268, + 0x000682df, 0x004e0db9, 0x00969c12, 0x00de1374, + 0x006b10fb, 0x00239f9d, 0x00fb0e36, 0x00b38150, + 0x00b061e7, 0x00f8ee81, 0x00207f2a, 0x0068f04c, + 0x00b134b2, 0x00f9bbd4, 0x00212a7f, 0x0069a519, + 0x006a45ae, 0x0022cac8, 0x00fa5b63, 0x00b2d405, + 0x0007d78a, 0x004f58ec, 0x0097c947, 0x00df4621, + 0x00dca696, 0x009429f0, 0x004cb85b, 0x0004373d, + 0x00057d20, 0x004df246, 0x009563ed, 0x00ddec8b, + 0x00de0c3c, 0x0096835a, 0x004e12f1, 0x00069d97, + 0x00b39e18, 0x00fb117e, 0x002380d5, 0x006b0fb3, + 0x0068ef04, 0x00206062, 0x00f8f1c9, 0x00b07eaf, + 0x0069ba51, 0x00213537, 0x00f9a49c, 0x00b12bfa, + 0x00b2cb4d, 0x00fa442b, 0x0022d580, 0x006a5ae6, + 0x00df5969, 0x0097d60f, 0x004f47a4, 0x0007c8c2, + 0x00042875, 0x004ca713, 0x009436b8, 0x00dcb9de, + 0x00000000, 0x00d70983, 0x00555f80, 0x00825603, + 0x0051f286, 0x0086fb05, 0x0004ad06, 0x00d3a485, + 0x0059a88b, 0x008ea108, 0x000cf70b, 0x00dbfe88, + 0x00085a0d, 0x00df538e, 0x005d058d, 0x008a0c0e, + 0x00491c91, 0x009e1512, 0x001c4311, 0x00cb4a92, + 0x0018ee17, 0x00cfe794, 0x004db197, 0x009ab814, + 0x0010b41a, 0x00c7bd99, 0x0045eb9a, 0x0092e219, + 0x0041469c, 0x00964f1f, 0x0014191c, 0x00c3109f, + 0x006974a4, 0x00be7d27, 0x003c2b24, 0x00eb22a7, + 0x00388622, 0x00ef8fa1, 0x006dd9a2, 0x00bad021, + 0x0030dc2f, 0x00e7d5ac, 0x006583af, 0x00b28a2c, + 0x00612ea9, 0x00b6272a, 0x00347129, 0x00e378aa, + 0x00206835, 0x00f761b6, 0x007537b5, 0x00a23e36, + 0x00719ab3, 0x00a69330, 0x0024c533, 0x00f3ccb0, + 0x0079c0be, 0x00aec93d, 0x002c9f3e, 0x00fb96bd, + 0x00283238, 0x00ff3bbb, 0x007d6db8, 0x00aa643b, + 0x0029a4ce, 0x00fead4d, 0x007cfb4e, 0x00abf2cd, + 0x00785648, 0x00af5fcb, 0x002d09c8, 0x00fa004b, + 0x00700c45, 0x00a705c6, 0x002553c5, 0x00f25a46, + 0x0021fec3, 0x00f6f740, 0x0074a143, 0x00a3a8c0, + 0x0060b85f, 0x00b7b1dc, 0x0035e7df, 0x00e2ee5c, + 0x00314ad9, 0x00e6435a, 0x00641559, 0x00b31cda, + 0x003910d4, 0x00ee1957, 0x006c4f54, 0x00bb46d7, + 0x0068e252, 0x00bfebd1, 0x003dbdd2, 0x00eab451, + 0x0040d06a, 0x0097d9e9, 0x00158fea, 0x00c28669, + 0x001122ec, 0x00c62b6f, 0x00447d6c, 0x009374ef, + 0x001978e1, 0x00ce7162, 0x004c2761, 0x009b2ee2, + 0x00488a67, 0x009f83e4, 0x001dd5e7, 0x00cadc64, + 0x0009ccfb, 0x00dec578, 0x005c937b, 0x008b9af8, + 0x00583e7d, 0x008f37fe, 0x000d61fd, 0x00da687e, + 0x00506470, 0x00876df3, 0x00053bf0, 0x00d23273, + 0x000196f6, 0x00d69f75, 0x0054c976, 0x0083c0f5, + 0x00a9041b, 0x007e0d98, 0x00fc5b9b, 0x002b5218, + 0x00f8f69d, 0x002fff1e, 0x00ada91d, 0x007aa09e, + 0x00f0ac90, 0x0027a513, 0x00a5f310, 0x0072fa93, + 0x00a15e16, 0x00765795, 0x00f40196, 0x00230815, + 0x00e0188a, 0x00371109, 0x00b5470a, 0x00624e89, + 0x00b1ea0c, 0x0066e38f, 0x00e4b58c, 0x0033bc0f, + 0x00b9b001, 0x006eb982, 0x00ecef81, 0x003be602, + 0x00e84287, 0x003f4b04, 0x00bd1d07, 0x006a1484, + 0x00c070bf, 0x0017793c, 0x00952f3f, 0x004226bc, + 0x00918239, 0x00468bba, 0x00c4ddb9, 0x0013d43a, + 0x0099d834, 0x004ed1b7, 0x00cc87b4, 0x001b8e37, + 0x00c82ab2, 0x001f2331, 0x009d7532, 0x004a7cb1, + 0x00896c2e, 0x005e65ad, 0x00dc33ae, 0x000b3a2d, + 0x00d89ea8, 0x000f972b, 0x008dc128, 0x005ac8ab, + 0x00d0c4a5, 0x0007cd26, 0x00859b25, 0x005292a6, + 0x00813623, 0x00563fa0, 0x00d469a3, 0x00036020, + 0x0080a0d5, 0x0057a956, 0x00d5ff55, 0x0002f6d6, + 0x00d15253, 0x00065bd0, 0x00840dd3, 0x00530450, + 0x00d9085e, 0x000e01dd, 0x008c57de, 0x005b5e5d, + 0x0088fad8, 0x005ff35b, 0x00dda558, 0x000aacdb, + 0x00c9bc44, 0x001eb5c7, 0x009ce3c4, 0x004bea47, + 0x00984ec2, 0x004f4741, 0x00cd1142, 0x001a18c1, + 0x009014cf, 0x00471d4c, 0x00c54b4f, 0x001242cc, + 0x00c1e649, 0x0016efca, 0x0094b9c9, 0x0043b04a, + 0x00e9d471, 0x003eddf2, 0x00bc8bf1, 0x006b8272, + 0x00b826f7, 0x006f2f74, 0x00ed7977, 0x003a70f4, + 0x00b07cfa, 0x00677579, 0x00e5237a, 0x00322af9, + 0x00e18e7c, 0x003687ff, 0x00b4d1fc, 0x0063d87f, + 0x00a0c8e0, 0x0077c163, 0x00f59760, 0x00229ee3, + 0x00f13a66, 0x002633e5, 0x00a465e6, 0x00736c65, + 0x00f9606b, 0x002e69e8, 0x00ac3feb, 0x007b3668, + 0x00a892ed, 0x007f9b6e, 0x00fdcd6d, 0x002ac4ee, + 0x00000000, 0x00520936, 0x00a4126c, 0x00f61b5a, + 0x004825d8, 0x001a2cee, 0x00ec37b4, 0x00be3e82, + 0x006b0636, 0x00390f00, 0x00cf145a, 0x009d1d6c, + 0x002323ee, 0x00712ad8, 0x00873182, 0x00d538b4, + 0x00d60c6c, 0x0084055a, 0x00721e00, 0x00201736, + 0x009e29b4, 0x00cc2082, 0x003a3bd8, 0x006832ee, + 0x00bd0a5a, 0x00ef036c, 0x00191836, 0x004b1100, + 0x00f52f82, 0x00a726b4, 0x00513dee, 0x000334d8, + 0x00ac19d8, 0x00fe10ee, 0x00080bb4, 0x005a0282, + 0x00e43c00, 0x00b63536, 0x00402e6c, 0x0012275a, + 0x00c71fee, 0x009516d8, 0x00630d82, 0x003104b4, + 0x008f3a36, 0x00dd3300, 0x002b285a, 0x0079216c, + 0x007a15b4, 0x00281c82, 0x00de07d8, 0x008c0eee, + 0x0032306c, 0x0060395a, 0x00962200, 0x00c42b36, + 0x00111382, 0x00431ab4, 0x00b501ee, 0x00e708d8, + 0x0059365a, 0x000b3f6c, 0x00fd2436, 0x00af2d00, + 0x00a37f36, 0x00f17600, 0x00076d5a, 0x0055646c, + 0x00eb5aee, 0x00b953d8, 0x004f4882, 0x001d41b4, + 0x00c87900, 0x009a7036, 0x006c6b6c, 0x003e625a, + 0x00805cd8, 0x00d255ee, 0x00244eb4, 0x00764782, + 0x0075735a, 0x00277a6c, 0x00d16136, 0x00836800, + 0x003d5682, 0x006f5fb4, 0x009944ee, 0x00cb4dd8, + 0x001e756c, 0x004c7c5a, 0x00ba6700, 0x00e86e36, + 0x005650b4, 0x00045982, 0x00f242d8, 0x00a04bee, + 0x000f66ee, 0x005d6fd8, 0x00ab7482, 0x00f97db4, + 0x00474336, 0x00154a00, 0x00e3515a, 0x00b1586c, + 0x006460d8, 0x003669ee, 0x00c072b4, 0x00927b82, + 0x002c4500, 0x007e4c36, 0x0088576c, 0x00da5e5a, + 0x00d96a82, 0x008b63b4, 0x007d78ee, 0x002f71d8, + 0x00914f5a, 0x00c3466c, 0x00355d36, 0x00675400, + 0x00b26cb4, 0x00e06582, 0x00167ed8, 0x004477ee, + 0x00fa496c, 0x00a8405a, 0x005e5b00, 0x000c5236, + 0x0046ff6c, 0x0014f65a, 0x00e2ed00, 0x00b0e436, + 0x000edab4, 0x005cd382, 0x00aac8d8, 0x00f8c1ee, + 0x002df95a, 0x007ff06c, 0x0089eb36, 0x00dbe200, + 0x0065dc82, 0x0037d5b4, 0x00c1ceee, 0x0093c7d8, + 0x0090f300, 0x00c2fa36, 0x0034e16c, 0x0066e85a, + 0x00d8d6d8, 0x008adfee, 0x007cc4b4, 0x002ecd82, + 0x00fbf536, 0x00a9fc00, 0x005fe75a, 0x000dee6c, + 0x00b3d0ee, 0x00e1d9d8, 0x0017c282, 0x0045cbb4, + 0x00eae6b4, 0x00b8ef82, 0x004ef4d8, 0x001cfdee, + 0x00a2c36c, 0x00f0ca5a, 0x0006d100, 0x0054d836, + 0x0081e082, 0x00d3e9b4, 0x0025f2ee, 0x0077fbd8, + 0x00c9c55a, 0x009bcc6c, 0x006dd736, 0x003fde00, + 0x003cead8, 0x006ee3ee, 0x0098f8b4, 0x00caf182, + 0x0074cf00, 0x0026c636, 0x00d0dd6c, 0x0082d45a, + 0x0057ecee, 0x0005e5d8, 0x00f3fe82, 0x00a1f7b4, + 0x001fc936, 0x004dc000, 0x00bbdb5a, 0x00e9d26c, + 0x00e5805a, 0x00b7896c, 0x00419236, 0x00139b00, + 0x00ada582, 0x00ffacb4, 0x0009b7ee, 0x005bbed8, + 0x008e866c, 0x00dc8f5a, 0x002a9400, 0x00789d36, + 0x00c6a3b4, 0x0094aa82, 0x0062b1d8, 0x0030b8ee, + 0x00338c36, 0x00618500, 0x00979e5a, 0x00c5976c, + 0x007ba9ee, 0x0029a0d8, 0x00dfbb82, 0x008db2b4, + 0x00588a00, 0x000a8336, 0x00fc986c, 0x00ae915a, + 0x0010afd8, 0x0042a6ee, 0x00b4bdb4, 0x00e6b482, + 0x00499982, 0x001b90b4, 0x00ed8bee, 0x00bf82d8, + 0x0001bc5a, 0x0053b56c, 0x00a5ae36, 0x00f7a700, + 0x00229fb4, 0x00709682, 0x00868dd8, 0x00d484ee, + 0x006aba6c, 0x0038b35a, 0x00cea800, 0x009ca136, + 0x009f95ee, 0x00cd9cd8, 0x003b8782, 0x00698eb4, + 0x00d7b036, 0x0085b900, 0x0073a25a, 0x0021ab6c, + 0x00f493d8, 0x00a69aee, 0x005081b4, 0x00028882, + 0x00bcb600, 0x00eebf36, 0x0018a46c, 0x004aad5a +}; + +static inline +u32 crc24_init (void) +{ + /* Transformed to 32-bit CRC by multiplied by x⁸ and then byte swapped. */ + return 0xce04b7; /* _gcry_bswap(0xb704ce << 8) */ +} + +static inline +u32 crc24_next (u32 crc, byte data) +{ + return (crc >> 8) ^ crc24_table[(crc & 0xff) ^ data]; +} + +/* + * Process 4 bytes in one go + */ +static inline +u32 crc24_next4 (u32 crc, u32 data) +{ + crc ^= data; + crc = crc24_table[(crc & 0xff) + 0x300] ^ + crc24_table[((crc >> 8) & 0xff) + 0x200] ^ + crc24_table[((crc >> 16) & 0xff) + 0x100] ^ + crc24_table[(data >> 24) & 0xff]; + return crc; +} + +static inline +u32 crc24_final (u32 crc) +{ + return crc & 0xffffff; +} + +static void +crc24rfc2440_init (void *context, unsigned int flags) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + u32 hwf = _gcry_get_hw_features (); + +#ifdef USE_INTEL_PCLMUL + ctx->use_pclmul = (hwf & HWF_INTEL_SSE4_1) && (hwf & HWF_INTEL_PCLMUL); +#endif +#ifdef USE_ARM_PMULL + ctx->use_pmull = (hwf & HWF_ARM_NEON) && (hwf & HWF_ARM_PMULL); +#endif +#ifdef USE_PPC_VPMSUM + ctx->use_vpmsum = !!(hwf & HWF_PPC_ARCH_2_07); +#endif + + (void)hwf; + (void)flags; + + ctx->CRC = crc24_init(); +} + +static void +crc24rfc2440_write (void *context, const void *inbuf_arg, size_t inlen) +{ + const unsigned char *inbuf = inbuf_arg; + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + u32 crc; + +#ifdef USE_INTEL_PCLMUL + if (ctx->use_pclmul) + { + _gcry_crc24rfc2440_intel_pclmul(&ctx->CRC, inbuf, inlen); + return; + } +#endif +#ifdef USE_ARM_PMULL + if (ctx->use_pmull) + { + _gcry_crc24rfc2440_armv8_ce_pmull(&ctx->CRC, inbuf, inlen); + return; + } +#endif +#ifdef USE_PPC_VPMSUM + if (ctx->use_vpmsum) + { + _gcry_crc24rfc2440_ppc8_vpmsum(&ctx->CRC, inbuf, inlen); + return; + } +#endif + + if (!inbuf || !inlen) + return; + + crc = ctx->CRC; + + while (inlen >= 16) + { + inlen -= 16; + crc = crc24_next4(crc, buf_get_le32(&inbuf[0])); + crc = crc24_next4(crc, buf_get_le32(&inbuf[4])); + crc = crc24_next4(crc, buf_get_le32(&inbuf[8])); + crc = crc24_next4(crc, buf_get_le32(&inbuf[12])); + inbuf += 16; + } + + while (inlen >= 4) + { + inlen -= 4; + crc = crc24_next4(crc, buf_get_le32(inbuf)); + inbuf += 4; + } + + while (inlen--) + { + crc = crc24_next(crc, *inbuf++); + } + + ctx->CRC = crc; +} + +static void +crc24rfc2440_final (void *context) +{ + CRC_CONTEXT *ctx = (CRC_CONTEXT *) context; + ctx->CRC = crc24_final(ctx->CRC); + buf_put_le32 (ctx->buf, ctx->CRC); +} + +/* We allow the CRC algorithms even in FIPS mode because they are + actually no cryptographic primitives. */ + +gcry_md_spec_t _gcry_digest_spec_crc32 = + { + GCRY_MD_CRC32, {0, 1}, + "CRC32", NULL, 0, NULL, 4, + crc32_init, crc32_write, crc32_final, crc32_read, NULL, + NULL, NULL, + sizeof (CRC_CONTEXT) + }; + +gcry_md_spec_t _gcry_digest_spec_crc32_rfc1510 = + { + GCRY_MD_CRC32_RFC1510, {0, 1}, + "CRC32RFC1510", NULL, 0, NULL, 4, + crc32rfc1510_init, crc32_write, crc32rfc1510_final, crc32_read, NULL, + NULL, NULL, + sizeof (CRC_CONTEXT) + }; + +gcry_md_spec_t _gcry_digest_spec_crc24_rfc2440 = + { + GCRY_MD_CRC24_RFC2440, {0, 1}, + "CRC24RFC2440", NULL, 0, NULL, 3, + crc24rfc2440_init, crc24rfc2440_write, crc24rfc2440_final, crc32_read, NULL, + NULL, NULL, + sizeof (CRC_CONTEXT) + }; diff --git a/comm/third_party/libgcrypt/cipher/des-amd64.S b/comm/third_party/libgcrypt/cipher/des-amd64.S new file mode 100644 index 0000000000..a211dac38a --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/des-amd64.S @@ -0,0 +1,1111 @@ +/* des-amd64.S - AMD64 assembly implementation of 3DES cipher + * + * Copyright (C) 2014 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifdef __x86_64 +#include +#if defined(USE_DES) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) + +#include "asm-common-amd64.h" + +.text + +#define s1 0 +#define s2 ((s1) + (64*8)) +#define s3 ((s2) + (64*8)) +#define s4 ((s3) + (64*8)) +#define s5 ((s4) + (64*8)) +#define s6 ((s5) + (64*8)) +#define s7 ((s6) + (64*8)) +#define s8 ((s7) + (64*8)) + +/* register macros */ +#define CTX %rdi +#define SBOXES %rbp + +#define RL0 %r8 +#define RL1 %r9 +#define RL2 %r10 + +#define RL0d %r8d +#define RL1d %r9d +#define RL2d %r10d + +#define RR0 %r11 +#define RR1 %r12 +#define RR2 %r13 + +#define RR0d %r11d +#define RR1d %r12d +#define RR2d %r13d + +#define RW0 %rax +#define RW1 %rbx +#define RW2 %rcx + +#define RW0d %eax +#define RW1d %ebx +#define RW2d %ecx + +#define RW0bl %al +#define RW1bl %bl +#define RW2bl %cl + +#define RW0bh %ah +#define RW1bh %bh +#define RW2bh %ch + +#define RT0 %r15 +#define RT1 %rsi +#define RT2 %r14 +#define RT3 %rdx + +#define RT0d %r15d +#define RT1d %esi +#define RT2d %r14d +#define RT3d %edx + +/*********************************************************************** + * 1-way 3DES + ***********************************************************************/ +#define do_permutation(a, b, offset, mask) \ + movl a, RT0d; \ + shrl $(offset), RT0d; \ + xorl b, RT0d; \ + andl $(mask), RT0d; \ + xorl RT0d, b; \ + shll $(offset), RT0d; \ + xorl RT0d, a; + +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation(left, right) \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + movl left##d, RW0d; \ + roll $1, right##d; \ + xorl right##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##d; \ + xorl RW0d, right##d; \ + roll $1, left##d; \ + expand_to_64bits(right, RT3); \ + expand_to_64bits(left, RT3); + +#define final_permutation(left, right) \ + compress_to_64bits(right); \ + compress_to_64bits(left); \ + movl right##d, RW0d; \ + rorl $1, left##d; \ + xorl left##d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##d; \ + xorl RW0d, left##d; \ + rorl $1, right##d; \ + do_permutation(right##d, left##d, 8, 0x00ff00ff); \ + do_permutation(right##d, left##d, 2, 0x33333333); \ + do_permutation(left##d, right##d, 16, 0x0000ffff); \ + do_permutation(left##d, right##d, 4, 0x0f0f0f0f); + +#define round1(n, from, to, load_next_key) \ + xorq from, RW0; \ + \ + movzbl RW0bl, RT0d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + shrq $16, RW0; \ + movq s8(SBOXES, RT0, 8), RT0; \ + xorq s6(SBOXES, RT1, 8), to; \ + movzbl RW0bl, RL1d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s4(SBOXES, RT2, 8), RT0; \ + xorq s2(SBOXES, RT3, 8), to; \ + movzbl RW0bl, RT2d; \ + movzbl RW0bh, RT3d; \ + xorq s7(SBOXES, RL1, 8), RT0; \ + xorq s5(SBOXES, RT1, 8), to; \ + xorq s3(SBOXES, RT2, 8), RT0; \ + load_next_key(n, RW0); \ + xorq RT0, to; \ + xorq s1(SBOXES, RT3, 8), to; \ + +#define load_next_key(n, RWx) \ + movq (((n) + 1) * 8)(CTX), RWx; + +#define dummy2(a, b) /*_*/ + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +.align 8 +.globl _gcry_3des_amd64_crypt_block +ELF(.type _gcry_3des_amd64_crypt_block,@function;) + +_gcry_3des_amd64_crypt_block: + /* input: + * %rdi: round keys, CTX + * %rsi: dst + * %rdx: src + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + pushq %r15; + CFI_PUSH(%r15); + pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); + + leaq .L_s1 rRIP, SBOXES; + + read_block(%rdx, RL0, RR0); + initial_permutation(RL0, RR0); + + movq (CTX), RW0; + + round1(0, RR0, RL0, load_next_key); + round1(1, RL0, RR0, load_next_key); + round1(2, RR0, RL0, load_next_key); + round1(3, RL0, RR0, load_next_key); + round1(4, RR0, RL0, load_next_key); + round1(5, RL0, RR0, load_next_key); + round1(6, RR0, RL0, load_next_key); + round1(7, RL0, RR0, load_next_key); + round1(8, RR0, RL0, load_next_key); + round1(9, RL0, RR0, load_next_key); + round1(10, RR0, RL0, load_next_key); + round1(11, RL0, RR0, load_next_key); + round1(12, RR0, RL0, load_next_key); + round1(13, RL0, RR0, load_next_key); + round1(14, RR0, RL0, load_next_key); + round1(15, RL0, RR0, load_next_key); + + round1(16+0, RL0, RR0, load_next_key); + round1(16+1, RR0, RL0, load_next_key); + round1(16+2, RL0, RR0, load_next_key); + round1(16+3, RR0, RL0, load_next_key); + round1(16+4, RL0, RR0, load_next_key); + round1(16+5, RR0, RL0, load_next_key); + round1(16+6, RL0, RR0, load_next_key); + round1(16+7, RR0, RL0, load_next_key); + round1(16+8, RL0, RR0, load_next_key); + round1(16+9, RR0, RL0, load_next_key); + round1(16+10, RL0, RR0, load_next_key); + round1(16+11, RR0, RL0, load_next_key); + round1(16+12, RL0, RR0, load_next_key); + round1(16+13, RR0, RL0, load_next_key); + round1(16+14, RL0, RR0, load_next_key); + round1(16+15, RR0, RL0, load_next_key); + + round1(32+0, RR0, RL0, load_next_key); + round1(32+1, RL0, RR0, load_next_key); + round1(32+2, RR0, RL0, load_next_key); + round1(32+3, RL0, RR0, load_next_key); + round1(32+4, RR0, RL0, load_next_key); + round1(32+5, RL0, RR0, load_next_key); + round1(32+6, RR0, RL0, load_next_key); + round1(32+7, RL0, RR0, load_next_key); + round1(32+8, RR0, RL0, load_next_key); + round1(32+9, RL0, RR0, load_next_key); + round1(32+10, RR0, RL0, load_next_key); + round1(32+11, RL0, RR0, load_next_key); + round1(32+12, RR0, RL0, load_next_key); + round1(32+13, RL0, RR0, load_next_key); + round1(32+14, RR0, RL0, load_next_key); + round1(32+15, RL0, RR0, dummy2); + + popq RW2; /*dst*/ + CFI_POP_TMP_REG(); + final_permutation(RR0, RL0); + write_block(RW2, RR0, RL0); + + popq %r15; + CFI_POP(%r15); + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_3des_amd64_crypt_block,.-_gcry_3des_amd64_crypt_block;) + +/*********************************************************************** + * 3-way 3DES + ***********************************************************************/ +#define expand_to_64bits(val, mask) \ + movl val##d, RT0d; \ + rorl $4, RT0d; \ + shlq $32, RT0; \ + orq RT0, val; \ + andq mask, val; + +#define compress_to_64bits(val) \ + movq val, RT0; \ + shrq $32, RT0; \ + roll $4, RT0d; \ + orl RT0d, val##d; + +#define initial_permutation3(left, right) \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + \ + movabs $0x3f3f3f3f3f3f3f3f, RT3; \ + \ + movl left##0d, RW0d; \ + roll $1, right##0d; \ + xorl right##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, left##0d; \ + xorl RW0d, right##0d; \ + roll $1, left##0d; \ + expand_to_64bits(right##0, RT3); \ + expand_to_64bits(left##0, RT3); \ + movl left##1d, RW1d; \ + roll $1, right##1d; \ + xorl right##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, left##1d; \ + xorl RW1d, right##1d; \ + roll $1, left##1d; \ + expand_to_64bits(right##1, RT3); \ + expand_to_64bits(left##1, RT3); \ + movl left##2d, RW2d; \ + roll $1, right##2d; \ + xorl right##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, left##2d; \ + xorl RW2d, right##2d; \ + roll $1, left##2d; \ + expand_to_64bits(right##2, RT3); \ + expand_to_64bits(left##2, RT3); + +#define final_permutation3(left, right) \ + compress_to_64bits(right##0); \ + compress_to_64bits(left##0); \ + movl right##0d, RW0d; \ + rorl $1, left##0d; \ + xorl left##0d, RW0d; \ + andl $0xaaaaaaaa, RW0d; \ + xorl RW0d, right##0d; \ + xorl RW0d, left##0d; \ + rorl $1, right##0d; \ + compress_to_64bits(right##1); \ + compress_to_64bits(left##1); \ + movl right##1d, RW1d; \ + rorl $1, left##1d; \ + xorl left##1d, RW1d; \ + andl $0xaaaaaaaa, RW1d; \ + xorl RW1d, right##1d; \ + xorl RW1d, left##1d; \ + rorl $1, right##1d; \ + compress_to_64bits(right##2); \ + compress_to_64bits(left##2); \ + movl right##2d, RW2d; \ + rorl $1, left##2d; \ + xorl left##2d, RW2d; \ + andl $0xaaaaaaaa, RW2d; \ + xorl RW2d, right##2d; \ + xorl RW2d, left##2d; \ + rorl $1, right##2d; \ + \ + do_permutation(right##0d, left##0d, 8, 0x00ff00ff); \ + do_permutation(right##0d, left##0d, 2, 0x33333333); \ + do_permutation(right##1d, left##1d, 8, 0x00ff00ff); \ + do_permutation(right##1d, left##1d, 2, 0x33333333); \ + do_permutation(right##2d, left##2d, 8, 0x00ff00ff); \ + do_permutation(right##2d, left##2d, 2, 0x33333333); \ + \ + do_permutation(left##0d, right##0d, 16, 0x0000ffff); \ + do_permutation(left##0d, right##0d, 4, 0x0f0f0f0f); \ + do_permutation(left##1d, right##1d, 16, 0x0000ffff); \ + do_permutation(left##1d, right##1d, 4, 0x0f0f0f0f); \ + do_permutation(left##2d, right##2d, 16, 0x0000ffff); \ + do_permutation(left##2d, right##2d, 4, 0x0f0f0f0f); + +#define round3(n, from, to, load_next_key, do_movq) \ + xorq from##0, RW0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s8(SBOXES, RT3, 8), to##0; \ + xorq s6(SBOXES, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrq $16, RW0; \ + xorq s4(SBOXES, RT3, 8), to##0; \ + xorq s2(SBOXES, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + shrl $16, RW0d; \ + xorq s7(SBOXES, RT3, 8), to##0; \ + xorq s5(SBOXES, RT1, 8), to##0; \ + movzbl RW0bl, RT3d; \ + movzbl RW0bh, RT1d; \ + load_next_key(n, RW0); \ + xorq s3(SBOXES, RT3, 8), to##0; \ + xorq s1(SBOXES, RT1, 8), to##0; \ + xorq from##1, RW1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s8(SBOXES, RT3, 8), to##1; \ + xorq s6(SBOXES, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrq $16, RW1; \ + xorq s4(SBOXES, RT3, 8), to##1; \ + xorq s2(SBOXES, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + shrl $16, RW1d; \ + xorq s7(SBOXES, RT3, 8), to##1; \ + xorq s5(SBOXES, RT1, 8), to##1; \ + movzbl RW1bl, RT3d; \ + movzbl RW1bh, RT1d; \ + do_movq(RW0, RW1); \ + xorq s3(SBOXES, RT3, 8), to##1; \ + xorq s1(SBOXES, RT1, 8), to##1; \ + xorq from##2, RW2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s8(SBOXES, RT3, 8), to##2; \ + xorq s6(SBOXES, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrq $16, RW2; \ + xorq s4(SBOXES, RT3, 8), to##2; \ + xorq s2(SBOXES, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + shrl $16, RW2d; \ + xorq s7(SBOXES, RT3, 8), to##2; \ + xorq s5(SBOXES, RT1, 8), to##2; \ + movzbl RW2bl, RT3d; \ + movzbl RW2bh, RT1d; \ + do_movq(RW0, RW2); \ + xorq s3(SBOXES, RT3, 8), to##2; \ + xorq s1(SBOXES, RT1, 8), to##2; + +#define __movq(src, dst) \ + movq src, dst; + +#define read_block(io, left, right) \ + movl (io), left##d; \ + movl 4(io), right##d; \ + bswapl left##d; \ + bswapl right##d; + +#define write_block(io, left, right) \ + bswapl left##d; \ + bswapl right##d; \ + movl left##d, (io); \ + movl right##d, 4(io); + +.align 8 +ELF(.type _gcry_3des_amd64_crypt_blk3,@function;) +_gcry_3des_amd64_crypt_blk3: + /* input: + * %rdi: round keys, CTX + * RL0d, RR0d, RL1d, RR1d, RL2d, RR2d: 3 input blocks + * RR0d, RL0d, RR1d, RL1d, RR2d, RL2d: 3 output blocks + */ + CFI_STARTPROC(); + + leaq .L_s1 rRIP, SBOXES; + + initial_permutation3(RL, RR); + + movq 0(CTX), RW0; + movq RW0, RW1; + movq RW0, RW2; + + round3(0, RR, RL, load_next_key, __movq); + round3(1, RL, RR, load_next_key, __movq); + round3(2, RR, RL, load_next_key, __movq); + round3(3, RL, RR, load_next_key, __movq); + round3(4, RR, RL, load_next_key, __movq); + round3(5, RL, RR, load_next_key, __movq); + round3(6, RR, RL, load_next_key, __movq); + round3(7, RL, RR, load_next_key, __movq); + round3(8, RR, RL, load_next_key, __movq); + round3(9, RL, RR, load_next_key, __movq); + round3(10, RR, RL, load_next_key, __movq); + round3(11, RL, RR, load_next_key, __movq); + round3(12, RR, RL, load_next_key, __movq); + round3(13, RL, RR, load_next_key, __movq); + round3(14, RR, RL, load_next_key, __movq); + round3(15, RL, RR, load_next_key, __movq); + + round3(16+0, RL, RR, load_next_key, __movq); + round3(16+1, RR, RL, load_next_key, __movq); + round3(16+2, RL, RR, load_next_key, __movq); + round3(16+3, RR, RL, load_next_key, __movq); + round3(16+4, RL, RR, load_next_key, __movq); + round3(16+5, RR, RL, load_next_key, __movq); + round3(16+6, RL, RR, load_next_key, __movq); + round3(16+7, RR, RL, load_next_key, __movq); + round3(16+8, RL, RR, load_next_key, __movq); + round3(16+9, RR, RL, load_next_key, __movq); + round3(16+10, RL, RR, load_next_key, __movq); + round3(16+11, RR, RL, load_next_key, __movq); + round3(16+12, RL, RR, load_next_key, __movq); + round3(16+13, RR, RL, load_next_key, __movq); + round3(16+14, RL, RR, load_next_key, __movq); + round3(16+15, RR, RL, load_next_key, __movq); + + round3(32+0, RR, RL, load_next_key, __movq); + round3(32+1, RL, RR, load_next_key, __movq); + round3(32+2, RR, RL, load_next_key, __movq); + round3(32+3, RL, RR, load_next_key, __movq); + round3(32+4, RR, RL, load_next_key, __movq); + round3(32+5, RL, RR, load_next_key, __movq); + round3(32+6, RR, RL, load_next_key, __movq); + round3(32+7, RL, RR, load_next_key, __movq); + round3(32+8, RR, RL, load_next_key, __movq); + round3(32+9, RL, RR, load_next_key, __movq); + round3(32+10, RR, RL, load_next_key, __movq); + round3(32+11, RL, RR, load_next_key, __movq); + round3(32+12, RR, RL, load_next_key, __movq); + round3(32+13, RL, RR, load_next_key, __movq); + round3(32+14, RR, RL, load_next_key, __movq); + round3(32+15, RL, RR, dummy2, dummy2); + + final_permutation3(RR, RL); + + ret; + CFI_ENDPROC(); +ELF(.size _gcry_3des_amd64_crypt_blk3,.-_gcry_3des_amd64_crypt_blk3;) + +.align 8 +.globl _gcry_3des_amd64_cbc_dec +ELF(.type _gcry_3des_amd64_cbc_dec,@function;) +_gcry_3des_amd64_cbc_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + pushq %r15; + CFI_PUSH(%r15); + + pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); + pushq %rdx; /*src*/ + CFI_PUSH(%rdx); + pushq %rcx; /*iv*/ + CFI_PUSH(%rcx); + + /* load input */ + movl 0 * 4(%rdx), RL0d; + movl 1 * 4(%rdx), RR0d; + movl 2 * 4(%rdx), RL1d; + movl 3 * 4(%rdx), RR1d; + movl 4 * 4(%rdx), RL2d; + movl 5 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + call _gcry_3des_amd64_crypt_blk3; + + popq %rcx; /*iv*/ + CFI_POP_TMP_REG(); + popq %rdx; /*src*/ + CFI_POP_TMP_REG(); + popq %rsi; /*dst*/ + CFI_POP_TMP_REG(); + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + movq 2 * 8(%rdx), RT0; + xorl 0 * 4(%rcx), RR0d; + xorl 1 * 4(%rcx), RL0d; + xorl 0 * 4(%rdx), RR1d; + xorl 1 * 4(%rdx), RL1d; + xorl 2 * 4(%rdx), RR2d; + xorl 3 * 4(%rdx), RL2d; + movq RT0, (%rcx); /* store new IV */ + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + CFI_POP(%r15); + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) + +.align 8 +.globl _gcry_3des_amd64_ctr_enc +ELF(.type _gcry_3des_amd64_ctr_enc,@function;) +_gcry_3des_amd64_ctr_enc: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + pushq %r15; + CFI_PUSH(%r15); + + pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); + pushq %rdx; /*src*/ + CFI_PUSH(%rdx); + movq %rcx, RW2; + + /* load IV and byteswap */ + movq (RW2), RT0; + bswapq RT0; + movq RT0, RR0; + + /* construct IVs */ + leaq 1(RT0), RR1; + leaq 2(RT0), RR2; + leaq 3(RT0), RT0; + movq RR0, RL0; + movq RR1, RL1; + movq RR2, RL2; + bswapq RT0; + shrq $32, RL0; + shrq $32, RL1; + shrq $32, RL2; + + /* store new IV */ + movq RT0, (RW2); + + call _gcry_3des_amd64_crypt_blk3; + + popq %rdx; /*src*/ + CFI_POP_TMP_REG(); + popq %rsi; /*dst*/ + CFI_POP_TMP_REG(); + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + xorl 0 * 4(%rdx), RR0d; + xorl 1 * 4(%rdx), RL0d; + xorl 2 * 4(%rdx), RR1d; + xorl 3 * 4(%rdx), RL1d; + xorl 4 * 4(%rdx), RR2d; + xorl 5 * 4(%rdx), RL2d; + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + CFI_POP(%r15); + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_3des_amd64_cbc_dec,.-_gcry_3des_amd64_cbc_dec;) + +.align 8 +.globl _gcry_3des_amd64_cfb_dec +ELF(.type _gcry_3des_amd64_cfb_dec,@function;) +_gcry_3des_amd64_cfb_dec: + /* input: + * %rdi: ctx, CTX + * %rsi: dst (3 blocks) + * %rdx: src (3 blocks) + * %rcx: iv (64bit) + */ + CFI_STARTPROC(); + ENTER_SYSV_FUNC_PARAMS_0_4 + + pushq %rbp; + CFI_PUSH(%rbp); + pushq %rbx; + CFI_PUSH(%rbx); + pushq %r12; + CFI_PUSH(%r12); + pushq %r13; + CFI_PUSH(%r13); + pushq %r14; + CFI_PUSH(%r14); + pushq %r15; + CFI_PUSH(%r15); + + pushq %rsi; /*dst*/ + CFI_PUSH(%rsi); + pushq %rdx; /*src*/ + CFI_PUSH(%rdx); + movq %rcx, RW2; + + /* Load input */ + movl 0 * 4(RW2), RL0d; + movl 1 * 4(RW2), RR0d; + movl 0 * 4(%rdx), RL1d; + movl 1 * 4(%rdx), RR1d; + movl 2 * 4(%rdx), RL2d; + movl 3 * 4(%rdx), RR2d; + + bswapl RL0d; + bswapl RR0d; + bswapl RL1d; + bswapl RR1d; + bswapl RL2d; + bswapl RR2d; + + /* Update IV */ + movq 4 * 4(%rdx), RW0; + movq RW0, (RW2); + + call _gcry_3des_amd64_crypt_blk3; + + popq %rdx; /*src*/ + CFI_POP_TMP_REG(); + popq %rsi; /*dst*/ + CFI_POP_TMP_REG(); + + bswapl RR0d; + bswapl RL0d; + bswapl RR1d; + bswapl RL1d; + bswapl RR2d; + bswapl RL2d; + + xorl 0 * 4(%rdx), RR0d; + xorl 1 * 4(%rdx), RL0d; + xorl 2 * 4(%rdx), RR1d; + xorl 3 * 4(%rdx), RL1d; + xorl 4 * 4(%rdx), RR2d; + xorl 5 * 4(%rdx), RL2d; + + movl RR0d, 0 * 4(%rsi); + movl RL0d, 1 * 4(%rsi); + movl RR1d, 2 * 4(%rsi); + movl RL1d, 3 * 4(%rsi); + movl RR2d, 4 * 4(%rsi); + movl RL2d, 5 * 4(%rsi); + + popq %r15; + CFI_POP(%r15); + popq %r14; + CFI_POP(%r14); + popq %r13; + CFI_POP(%r13); + popq %r12; + CFI_POP(%r12); + popq %rbx; + CFI_POP(%rbx); + popq %rbp; + CFI_POP(%rbp); + + EXIT_SYSV_FUNC + ret; + CFI_ENDPROC(); +ELF(.size _gcry_3des_amd64_cfb_dec,.-_gcry_3des_amd64_cfb_dec;) + +.align 16 +.L_s1: + .quad 0x0010100001010400, 0x0000000000000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0010100001010004, 0x0000100000010404 + .quad 0x0000000000000004, 0x0000100000010000 + .quad 0x0000000000000400, 0x0010100001010400 + .quad 0x0010100001010404, 0x0000000000000400 + .quad 0x0010000001000404, 0x0010100001010004 + .quad 0x0010000001000000, 0x0000000000000004 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000100000010400 + .quad 0x0000100000010400, 0x0010100001010000 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0000100000010004, 0x0010000001000004 + .quad 0x0010000001000004, 0x0000100000010004 + .quad 0x0000000000000000, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010000001000000 + .quad 0x0000100000010000, 0x0010100001010404 + .quad 0x0000000000000004, 0x0010100001010000 + .quad 0x0010100001010400, 0x0010000001000000 + .quad 0x0010000001000000, 0x0000000000000400 + .quad 0x0010100001010004, 0x0000100000010000 + .quad 0x0000100000010400, 0x0010000001000004 + .quad 0x0000000000000400, 0x0000000000000004 + .quad 0x0010000001000404, 0x0000100000010404 + .quad 0x0010100001010404, 0x0000100000010004 + .quad 0x0010100001010000, 0x0010000001000404 + .quad 0x0010000001000004, 0x0000000000000404 + .quad 0x0000100000010404, 0x0010100001010400 + .quad 0x0000000000000404, 0x0010000001000400 + .quad 0x0010000001000400, 0x0000000000000000 + .quad 0x0000100000010004, 0x0000100000010400 + .quad 0x0000000000000000, 0x0010100001010004 +.L_s2: + .quad 0x0801080200100020, 0x0800080000000000 + .quad 0x0000080000000000, 0x0001080200100020 + .quad 0x0001000000100000, 0x0000000200000020 + .quad 0x0801000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0801080200100020 + .quad 0x0801080000100000, 0x0800000000000000 + .quad 0x0800080000000000, 0x0001000000100000 + .quad 0x0000000200000020, 0x0801000200100020 + .quad 0x0001080000100000, 0x0001000200100020 + .quad 0x0800080200000020, 0x0000000000000000 + .quad 0x0800000000000000, 0x0000080000000000 + .quad 0x0001080200100020, 0x0801000000100000 + .quad 0x0001000200100020, 0x0800000200000020 + .quad 0x0000000000000000, 0x0001080000100000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0801000000100000, 0x0000080200000020 + .quad 0x0000000000000000, 0x0001080200100020 + .quad 0x0801000200100020, 0x0001000000100000 + .quad 0x0800080200000020, 0x0801000000100000 + .quad 0x0801080000100000, 0x0000080000000000 + .quad 0x0801000000100000, 0x0800080000000000 + .quad 0x0000000200000020, 0x0801080200100020 + .quad 0x0001080200100020, 0x0000000200000020 + .quad 0x0000080000000000, 0x0800000000000000 + .quad 0x0000080200000020, 0x0801080000100000 + .quad 0x0001000000100000, 0x0800000200000020 + .quad 0x0001000200100020, 0x0800080200000020 + .quad 0x0800000200000020, 0x0001000200100020 + .quad 0x0001080000100000, 0x0000000000000000 + .quad 0x0800080000000000, 0x0000080200000020 + .quad 0x0800000000000000, 0x0801000200100020 + .quad 0x0801080200100020, 0x0001080000100000 +.L_s3: + .quad 0x0000002000000208, 0x0000202008020200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000202000020208, 0x0000002008000200 + .quad 0x0000200000020008, 0x0000000008000008 + .quad 0x0000000008000008, 0x0000200000020000 + .quad 0x0000202008020208, 0x0000200000020008 + .quad 0x0000200008020000, 0x0000002000000208 + .quad 0x0000000008000000, 0x0000000000000008 + .quad 0x0000202008020200, 0x0000002000000200 + .quad 0x0000202000020200, 0x0000200008020000 + .quad 0x0000200008020008, 0x0000202000020208 + .quad 0x0000002008000208, 0x0000202000020200 + .quad 0x0000200000020000, 0x0000002008000208 + .quad 0x0000000000000008, 0x0000202008020208 + .quad 0x0000002000000200, 0x0000000008000000 + .quad 0x0000202008020200, 0x0000000008000000 + .quad 0x0000200000020008, 0x0000002000000208 + .quad 0x0000200000020000, 0x0000202008020200 + .quad 0x0000002008000200, 0x0000000000000000 + .quad 0x0000002000000200, 0x0000200000020008 + .quad 0x0000202008020208, 0x0000002008000200 + .quad 0x0000000008000008, 0x0000002000000200 + .quad 0x0000000000000000, 0x0000200008020008 + .quad 0x0000002008000208, 0x0000200000020000 + .quad 0x0000000008000000, 0x0000202008020208 + .quad 0x0000000000000008, 0x0000202000020208 + .quad 0x0000202000020200, 0x0000000008000008 + .quad 0x0000200008020000, 0x0000002008000208 + .quad 0x0000002000000208, 0x0000200008020000 + .quad 0x0000202000020208, 0x0000000000000008 + .quad 0x0000200008020008, 0x0000202000020200 +.L_s4: + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x0000020000002000, 0x0008020800002000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x0008000800000000, 0x1008000000000001 + .quad 0x0008020000002000, 0x1008020800002001 + .quad 0x1000000800000001, 0x0000000000000000 + .quad 0x0000000000000000, 0x0008020000002000 + .quad 0x0000020800002000, 0x0008000800000000 + .quad 0x1008000800000001, 0x1000000000000001 + .quad 0x1008020000002001, 0x1000020800002001 + .quad 0x1000020800002001, 0x0000000800000000 + .quad 0x1008020800002001, 0x1000000800000001 + .quad 0x1000000000000001, 0x0000020000002000 + .quad 0x1008000000000001, 0x1000020000002001 + .quad 0x0008020800002000, 0x1008000800000001 + .quad 0x1000020000002001, 0x0000020800002000 + .quad 0x0008000000000000, 0x1008020000002001 + .quad 0x0000000800000000, 0x0008000000000000 + .quad 0x0000020000002000, 0x0008020800002000 +.L_s5: + .quad 0x0000001000000100, 0x0020001002080100 + .quad 0x0020000002080000, 0x0420001002000100 + .quad 0x0000000000080000, 0x0000001000000100 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0400001000080100, 0x0000000000080000 + .quad 0x0020001002000100, 0x0400001000080100 + .quad 0x0420001002000100, 0x0420000002080000 + .quad 0x0000001000080100, 0x0400000000000000 + .quad 0x0020000002000000, 0x0400000000080000 + .quad 0x0400000000080000, 0x0000000000000000 + .quad 0x0400001000000100, 0x0420001002080100 + .quad 0x0420001002080100, 0x0020001002000100 + .quad 0x0420000002080000, 0x0400001000000100 + .quad 0x0000000000000000, 0x0420000002000000 + .quad 0x0020001002080100, 0x0020000002000000 + .quad 0x0420000002000000, 0x0000001000080100 + .quad 0x0000000000080000, 0x0420001002000100 + .quad 0x0000001000000100, 0x0020000002000000 + .quad 0x0400000000000000, 0x0020000002080000 + .quad 0x0420001002000100, 0x0400001000080100 + .quad 0x0020001002000100, 0x0400000000000000 + .quad 0x0420000002080000, 0x0020001002080100 + .quad 0x0400001000080100, 0x0000001000000100 + .quad 0x0020000002000000, 0x0420000002080000 + .quad 0x0420001002080100, 0x0000001000080100 + .quad 0x0420000002000000, 0x0420001002080100 + .quad 0x0020000002080000, 0x0000000000000000 + .quad 0x0400000000080000, 0x0420000002000000 + .quad 0x0000001000080100, 0x0020001002000100 + .quad 0x0400001000000100, 0x0000000000080000 + .quad 0x0000000000000000, 0x0400000000080000 + .quad 0x0020001002080100, 0x0400001000000100 +.L_s6: + .quad 0x0200000120000010, 0x0204000020000000 + .quad 0x0000040000000000, 0x0204040120000010 + .quad 0x0204000020000000, 0x0000000100000010 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0200040020000000, 0x0004040100000010 + .quad 0x0004000000000000, 0x0200000120000010 + .quad 0x0004000100000010, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0000000000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000040000000000 + .quad 0x0004040000000000, 0x0200040120000010 + .quad 0x0000000100000010, 0x0204000120000010 + .quad 0x0204000120000010, 0x0000000000000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000040100000010, 0x0004040000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0200040020000000, 0x0000000100000010 + .quad 0x0204000120000010, 0x0004040000000000 + .quad 0x0204040120000010, 0x0004000000000000 + .quad 0x0000040100000010, 0x0200000120000010 + .quad 0x0004000000000000, 0x0200040020000000 + .quad 0x0200000020000000, 0x0000040100000010 + .quad 0x0200000120000010, 0x0204040120000010 + .quad 0x0004040000000000, 0x0204000020000000 + .quad 0x0004040100000010, 0x0204040020000000 + .quad 0x0000000000000000, 0x0204000120000010 + .quad 0x0000000100000010, 0x0000040000000000 + .quad 0x0204000020000000, 0x0004040100000010 + .quad 0x0000040000000000, 0x0004000100000010 + .quad 0x0200040120000010, 0x0000000000000000 + .quad 0x0204040020000000, 0x0200000020000000 + .quad 0x0004000100000010, 0x0200040120000010 +.L_s7: + .quad 0x0002000000200000, 0x2002000004200002 + .quad 0x2000000004000802, 0x0000000000000000 + .quad 0x0000000000000800, 0x2000000004000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2002000004200802, 0x0002000000200000 + .quad 0x0000000000000000, 0x2000000004000002 + .quad 0x2000000000000002, 0x0000000004000000 + .quad 0x2002000004200002, 0x2000000000000802 + .quad 0x0000000004000800, 0x2002000000200802 + .quad 0x2002000000200002, 0x0000000004000800 + .quad 0x2000000004000002, 0x0002000004200000 + .quad 0x0002000004200800, 0x2002000000200002 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000000000802, 0x2002000004200802 + .quad 0x0002000000200800, 0x2000000000000002 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0000000004000000, 0x0002000000200800 + .quad 0x0002000000200000, 0x2000000004000802 + .quad 0x2000000004000802, 0x2002000004200002 + .quad 0x2002000004200002, 0x2000000000000002 + .quad 0x2002000000200002, 0x0000000004000000 + .quad 0x0000000004000800, 0x0002000000200000 + .quad 0x0002000004200800, 0x2000000000000802 + .quad 0x2002000000200802, 0x0002000004200800 + .quad 0x2000000000000802, 0x2000000004000002 + .quad 0x2002000004200802, 0x0002000004200000 + .quad 0x0002000000200800, 0x0000000000000000 + .quad 0x2000000000000002, 0x2002000004200802 + .quad 0x0000000000000000, 0x2002000000200802 + .quad 0x0002000004200000, 0x0000000000000800 + .quad 0x2000000004000002, 0x0000000004000800 + .quad 0x0000000000000800, 0x2002000000200002 +.L_s8: + .quad 0x0100010410001000, 0x0000010000001000 + .quad 0x0000000000040000, 0x0100010410041000 + .quad 0x0100000010000000, 0x0100010410001000 + .quad 0x0000000400000000, 0x0100000010000000 + .quad 0x0000000400040000, 0x0100000010040000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0100010010041000, 0x0000010400041000 + .quad 0x0000010000001000, 0x0000000400000000 + .quad 0x0100000010040000, 0x0100000410000000 + .quad 0x0100010010001000, 0x0000010400001000 + .quad 0x0000010000041000, 0x0000000400040000 + .quad 0x0100000410040000, 0x0100010010041000 + .quad 0x0000010400001000, 0x0000000000000000 + .quad 0x0000000000000000, 0x0100000410040000 + .quad 0x0100000410000000, 0x0100010010001000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0000010400041000, 0x0000000000040000 + .quad 0x0100010010041000, 0x0000010000001000 + .quad 0x0000000400000000, 0x0100000410040000 + .quad 0x0000010000001000, 0x0000010400041000 + .quad 0x0100010010001000, 0x0000000400000000 + .quad 0x0100000410000000, 0x0100000010040000 + .quad 0x0100000410040000, 0x0100000010000000 + .quad 0x0000000000040000, 0x0100010410001000 + .quad 0x0000000000000000, 0x0100010410041000 + .quad 0x0000000400040000, 0x0100000410000000 + .quad 0x0100000010040000, 0x0100010010001000 + .quad 0x0100010410001000, 0x0000000000000000 + .quad 0x0100010410041000, 0x0000010000041000 + .quad 0x0000010000041000, 0x0000010400001000 + .quad 0x0000010400001000, 0x0000000400040000 + .quad 0x0100000010000000, 0x0100010010041000 + +#endif +#endif diff --git a/comm/third_party/libgcrypt/cipher/des.c b/comm/third_party/libgcrypt/cipher/des.c new file mode 100644 index 0000000000..1580ea4ec5 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/des.c @@ -0,0 +1,1507 @@ +/* des.c - DES and Triple-DES encryption/decryption Algorithm + * Copyright (C) 1998, 1999, 2001, 2002, 2003, + * 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * For a description of triple encryption, see: + * Bruce Schneier: Applied Cryptography. Second Edition. + * John Wiley & Sons, 1996. ISBN 0-471-12845-7. Pages 358 ff. + * This implementation is according to the definition of DES in FIPS + * PUB 46-2 from December 1993. + */ + + +/* + * Written by Michael Roth , September 1998 + */ + + +/* + * U S A G E + * =========== + * + * For DES or Triple-DES encryption/decryption you must initialize a proper + * encryption context with a key. + * + * A DES key is 64bit wide but only 56bits of the key are used. The remaining + * bits are parity bits and they will _not_ checked in this implementation, but + * simply ignored. + * + * For Triple-DES you could use either two 64bit keys or three 64bit keys. + * The parity bits will _not_ checked, too. + * + * After initializing a context with a key you could use this context to + * encrypt or decrypt data in 64bit blocks in Electronic Codebook Mode. + * + * (In the examples below the slashes at the beginning and ending of comments + * are omitted.) + * + * DES Example + * ----------- + * unsigned char key[8]; + * unsigned char plaintext[8]; + * unsigned char ciphertext[8]; + * unsigned char recoverd[8]; + * des_ctx context; + * + * * Fill 'key' and 'plaintext' with some data * + * .... + * + * * Set up the DES encryption context * + * des_setkey(context, key); + * + * * Encrypt the plaintext * + * des_ecb_encrypt(context, plaintext, ciphertext); + * + * * To recover the original plaintext from ciphertext use: * + * des_ecb_decrypt(context, ciphertext, recoverd); + * + * + * Triple-DES Example + * ------------------ + * unsigned char key1[8]; + * unsigned char key2[8]; + * unsigned char key3[8]; + * unsigned char plaintext[8]; + * unsigned char ciphertext[8]; + * unsigned char recoverd[8]; + * tripledes_ctx context; + * + * * If you would like to use two 64bit keys, fill 'key1' and'key2' + * then setup the encryption context: * + * tripledes_set2keys(context, key1, key2); + * + * * To use three 64bit keys with Triple-DES use: * + * tripledes_set3keys(context, key1, key2, key3); + * + * * Encrypting plaintext with Triple-DES * + * tripledes_ecb_encrypt(context, plaintext, ciphertext); + * + * * Decrypting ciphertext to recover the plaintext with Triple-DES * + * tripledes_ecb_decrypt(context, ciphertext, recoverd); + * + * + * Selftest + * -------- + * char *error_msg; + * + * * To perform a selftest of this DES/Triple-DES implementation use the + * function selftest(). It will return an error string if there are + * some problems with this library. * + * + * if ( (error_msg = selftest()) ) + * { + * fprintf(stderr, "An error in the DES/Triple-DES implementation occurred: %s\n", error_msg); + * abort(); + * } + */ + + +#include +#include +#include /* memcpy, memcmp */ +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "cipher-internal.h" +#include "cipher-selftest.h" + + +#define DES_BLOCKSIZE 8 + + +/* USE_AMD64_ASM indicates whether to use AMD64 assembly code. */ +#undef USE_AMD64_ASM +#if defined(__x86_64__) && (defined(HAVE_COMPATIBLE_GCC_AMD64_PLATFORM_AS) || \ + defined(HAVE_COMPATIBLE_GCC_WIN64_PLATFORM_AS)) +# define USE_AMD64_ASM 1 +#endif + +/* Helper macro to force alignment to 16 bytes. */ +#ifdef HAVE_GCC_ATTRIBUTE_ALIGNED +# define ATTR_ALIGNED_16 __attribute__ ((aligned (16))) +#else +# define ATTR_ALIGNED_16 +#endif + +#if defined(__GNUC__) && defined(__GNU_LIBRARY__) +# define working_memcmp memcmp +#else +/* + * According to the SunOS man page, memcmp returns indeterminate sign + * depending on whether characters are signed or not. + */ +static int +working_memcmp( const void *_a, const void *_b, size_t n ) +{ + const char *a = _a; + const char *b = _b; + for( ; n; n--, a++, b++ ) + if( *a != *b ) + return (int)(*(byte*)a) - (int)(*(byte*)b); + return 0; +} +#endif + +/* + * Encryption/Decryption context of DES + */ +typedef struct _des_ctx + { + u32 encrypt_subkeys[32]; + u32 decrypt_subkeys[32]; + } +des_ctx[1]; + +/* + * Encryption/Decryption context of Triple-DES + */ +typedef struct _tripledes_ctx + { + u32 encrypt_subkeys[96]; + u32 decrypt_subkeys[96]; + struct { + int no_weak_key; + } flags; + } +tripledes_ctx[1]; + +static void des_key_schedule (const byte *, u32 *); +static int des_setkey (struct _des_ctx *, const byte *); +static int des_ecb_crypt (struct _des_ctx *, const byte *, byte *, int); +static int tripledes_set2keys (struct _tripledes_ctx *, + const byte *, const byte *); +static int tripledes_set3keys (struct _tripledes_ctx *, + const byte *, const byte *, const byte *); +static int tripledes_ecb_crypt (struct _tripledes_ctx *, + const byte *, byte *, int); +static int is_weak_key ( const byte *key ); +static const char *selftest (void); +static unsigned int do_tripledes_encrypt(void *context, byte *outbuf, + const byte *inbuf ); +static unsigned int do_tripledes_decrypt(void *context, byte *outbuf, + const byte *inbuf ); +static gcry_err_code_t do_tripledes_setkey(void *context, const byte *key, + unsigned keylen, + cipher_bulk_ops_t *bulk_ops); + +static int initialized; + + + + +/* + * The s-box values are permuted according to the 'primitive function P' + * and are rotated one bit to the left. + */ +static u32 sbox1[64] = +{ + 0x01010400, 0x00000000, 0x00010000, 0x01010404, 0x01010004, 0x00010404, 0x00000004, 0x00010000, + 0x00000400, 0x01010400, 0x01010404, 0x00000400, 0x01000404, 0x01010004, 0x01000000, 0x00000004, + 0x00000404, 0x01000400, 0x01000400, 0x00010400, 0x00010400, 0x01010000, 0x01010000, 0x01000404, + 0x00010004, 0x01000004, 0x01000004, 0x00010004, 0x00000000, 0x00000404, 0x00010404, 0x01000000, + 0x00010000, 0x01010404, 0x00000004, 0x01010000, 0x01010400, 0x01000000, 0x01000000, 0x00000400, + 0x01010004, 0x00010000, 0x00010400, 0x01000004, 0x00000400, 0x00000004, 0x01000404, 0x00010404, + 0x01010404, 0x00010004, 0x01010000, 0x01000404, 0x01000004, 0x00000404, 0x00010404, 0x01010400, + 0x00000404, 0x01000400, 0x01000400, 0x00000000, 0x00010004, 0x00010400, 0x00000000, 0x01010004 +}; + +static u32 sbox2[64] = +{ + 0x80108020, 0x80008000, 0x00008000, 0x00108020, 0x00100000, 0x00000020, 0x80100020, 0x80008020, + 0x80000020, 0x80108020, 0x80108000, 0x80000000, 0x80008000, 0x00100000, 0x00000020, 0x80100020, + 0x00108000, 0x00100020, 0x80008020, 0x00000000, 0x80000000, 0x00008000, 0x00108020, 0x80100000, + 0x00100020, 0x80000020, 0x00000000, 0x00108000, 0x00008020, 0x80108000, 0x80100000, 0x00008020, + 0x00000000, 0x00108020, 0x80100020, 0x00100000, 0x80008020, 0x80100000, 0x80108000, 0x00008000, + 0x80100000, 0x80008000, 0x00000020, 0x80108020, 0x00108020, 0x00000020, 0x00008000, 0x80000000, + 0x00008020, 0x80108000, 0x00100000, 0x80000020, 0x00100020, 0x80008020, 0x80000020, 0x00100020, + 0x00108000, 0x00000000, 0x80008000, 0x00008020, 0x80000000, 0x80100020, 0x80108020, 0x00108000 +}; + +static u32 sbox3[64] = +{ + 0x00000208, 0x08020200, 0x00000000, 0x08020008, 0x08000200, 0x00000000, 0x00020208, 0x08000200, + 0x00020008, 0x08000008, 0x08000008, 0x00020000, 0x08020208, 0x00020008, 0x08020000, 0x00000208, + 0x08000000, 0x00000008, 0x08020200, 0x00000200, 0x00020200, 0x08020000, 0x08020008, 0x00020208, + 0x08000208, 0x00020200, 0x00020000, 0x08000208, 0x00000008, 0x08020208, 0x00000200, 0x08000000, + 0x08020200, 0x08000000, 0x00020008, 0x00000208, 0x00020000, 0x08020200, 0x08000200, 0x00000000, + 0x00000200, 0x00020008, 0x08020208, 0x08000200, 0x08000008, 0x00000200, 0x00000000, 0x08020008, + 0x08000208, 0x00020000, 0x08000000, 0x08020208, 0x00000008, 0x00020208, 0x00020200, 0x08000008, + 0x08020000, 0x08000208, 0x00000208, 0x08020000, 0x00020208, 0x00000008, 0x08020008, 0x00020200 +}; + +static u32 sbox4[64] = +{ + 0x00802001, 0x00002081, 0x00002081, 0x00000080, 0x00802080, 0x00800081, 0x00800001, 0x00002001, + 0x00000000, 0x00802000, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00800080, 0x00800001, + 0x00000001, 0x00002000, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002001, 0x00002080, + 0x00800081, 0x00000001, 0x00002080, 0x00800080, 0x00002000, 0x00802080, 0x00802081, 0x00000081, + 0x00800080, 0x00800001, 0x00802000, 0x00802081, 0x00000081, 0x00000000, 0x00000000, 0x00802000, + 0x00002080, 0x00800080, 0x00800081, 0x00000001, 0x00802001, 0x00002081, 0x00002081, 0x00000080, + 0x00802081, 0x00000081, 0x00000001, 0x00002000, 0x00800001, 0x00002001, 0x00802080, 0x00800081, + 0x00002001, 0x00002080, 0x00800000, 0x00802001, 0x00000080, 0x00800000, 0x00002000, 0x00802080 +}; + +static u32 sbox5[64] = +{ + 0x00000100, 0x02080100, 0x02080000, 0x42000100, 0x00080000, 0x00000100, 0x40000000, 0x02080000, + 0x40080100, 0x00080000, 0x02000100, 0x40080100, 0x42000100, 0x42080000, 0x00080100, 0x40000000, + 0x02000000, 0x40080000, 0x40080000, 0x00000000, 0x40000100, 0x42080100, 0x42080100, 0x02000100, + 0x42080000, 0x40000100, 0x00000000, 0x42000000, 0x02080100, 0x02000000, 0x42000000, 0x00080100, + 0x00080000, 0x42000100, 0x00000100, 0x02000000, 0x40000000, 0x02080000, 0x42000100, 0x40080100, + 0x02000100, 0x40000000, 0x42080000, 0x02080100, 0x40080100, 0x00000100, 0x02000000, 0x42080000, + 0x42080100, 0x00080100, 0x42000000, 0x42080100, 0x02080000, 0x00000000, 0x40080000, 0x42000000, + 0x00080100, 0x02000100, 0x40000100, 0x00080000, 0x00000000, 0x40080000, 0x02080100, 0x40000100 +}; + +static u32 sbox6[64] = +{ + 0x20000010, 0x20400000, 0x00004000, 0x20404010, 0x20400000, 0x00000010, 0x20404010, 0x00400000, + 0x20004000, 0x00404010, 0x00400000, 0x20000010, 0x00400010, 0x20004000, 0x20000000, 0x00004010, + 0x00000000, 0x00400010, 0x20004010, 0x00004000, 0x00404000, 0x20004010, 0x00000010, 0x20400010, + 0x20400010, 0x00000000, 0x00404010, 0x20404000, 0x00004010, 0x00404000, 0x20404000, 0x20000000, + 0x20004000, 0x00000010, 0x20400010, 0x00404000, 0x20404010, 0x00400000, 0x00004010, 0x20000010, + 0x00400000, 0x20004000, 0x20000000, 0x00004010, 0x20000010, 0x20404010, 0x00404000, 0x20400000, + 0x00404010, 0x20404000, 0x00000000, 0x20400010, 0x00000010, 0x00004000, 0x20400000, 0x00404010, + 0x00004000, 0x00400010, 0x20004010, 0x00000000, 0x20404000, 0x20000000, 0x00400010, 0x20004010 +}; + +static u32 sbox7[64] = +{ + 0x00200000, 0x04200002, 0x04000802, 0x00000000, 0x00000800, 0x04000802, 0x00200802, 0x04200800, + 0x04200802, 0x00200000, 0x00000000, 0x04000002, 0x00000002, 0x04000000, 0x04200002, 0x00000802, + 0x04000800, 0x00200802, 0x00200002, 0x04000800, 0x04000002, 0x04200000, 0x04200800, 0x00200002, + 0x04200000, 0x00000800, 0x00000802, 0x04200802, 0x00200800, 0x00000002, 0x04000000, 0x00200800, + 0x04000000, 0x00200800, 0x00200000, 0x04000802, 0x04000802, 0x04200002, 0x04200002, 0x00000002, + 0x00200002, 0x04000000, 0x04000800, 0x00200000, 0x04200800, 0x00000802, 0x00200802, 0x04200800, + 0x00000802, 0x04000002, 0x04200802, 0x04200000, 0x00200800, 0x00000000, 0x00000002, 0x04200802, + 0x00000000, 0x00200802, 0x04200000, 0x00000800, 0x04000002, 0x04000800, 0x00000800, 0x00200002 +}; + +static u32 sbox8[64] = +{ + 0x10001040, 0x00001000, 0x00040000, 0x10041040, 0x10000000, 0x10001040, 0x00000040, 0x10000000, + 0x00040040, 0x10040000, 0x10041040, 0x00041000, 0x10041000, 0x00041040, 0x00001000, 0x00000040, + 0x10040000, 0x10000040, 0x10001000, 0x00001040, 0x00041000, 0x00040040, 0x10040040, 0x10041000, + 0x00001040, 0x00000000, 0x00000000, 0x10040040, 0x10000040, 0x10001000, 0x00041040, 0x00040000, + 0x00041040, 0x00040000, 0x10041000, 0x00001000, 0x00000040, 0x10040040, 0x00001000, 0x00041040, + 0x10001000, 0x00000040, 0x10000040, 0x10040000, 0x10040040, 0x10000000, 0x00040000, 0x10001040, + 0x00000000, 0x10041040, 0x00040040, 0x10000040, 0x10040000, 0x10001000, 0x10001040, 0x00000000, + 0x10041040, 0x00041000, 0x00041000, 0x00001040, 0x00001040, 0x00040040, 0x10000000, 0x10041000 +}; + + +/* + * These two tables are part of the 'permuted choice 1' function. + * In this implementation several speed improvements are done. + */ +static u32 leftkey_swap[16] = +{ + 0x00000000, 0x00000001, 0x00000100, 0x00000101, + 0x00010000, 0x00010001, 0x00010100, 0x00010101, + 0x01000000, 0x01000001, 0x01000100, 0x01000101, + 0x01010000, 0x01010001, 0x01010100, 0x01010101 +}; + +static u32 rightkey_swap[16] = +{ + 0x00000000, 0x01000000, 0x00010000, 0x01010000, + 0x00000100, 0x01000100, 0x00010100, 0x01010100, + 0x00000001, 0x01000001, 0x00010001, 0x01010001, + 0x00000101, 0x01000101, 0x00010101, 0x01010101, +}; + + + +/* + * Numbers of left shifts per round for encryption subkeys. + * To calculate the decryption subkeys we just reverse the + * ordering of the calculated encryption subkeys. So their + * is no need for a decryption rotate tab. + */ +static byte encrypt_rotate_tab[16] = +{ + 1, 1, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2, 2, 1 +}; + + + +/* + * Table with weak DES keys sorted in ascending order. + * In DES their are 64 known keys which are weak. They are weak + * because they produce only one, two or four different + * subkeys in the subkey scheduling process. + * The keys in this table have all their parity bits cleared. + */ +static byte weak_keys[64][8] = +{ + { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 }, /*w*/ + { 0x00, 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e }, + { 0x00, 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0 }, + { 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe }, + { 0x00, 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e }, /*sw*/ + { 0x00, 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00 }, + { 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe }, + { 0x00, 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0 }, + { 0x00, 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0 }, /*sw*/ + { 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe }, + { 0x00, 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00 }, + { 0x00, 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e }, + { 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe }, /*sw*/ + { 0x00, 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0 }, + { 0x00, 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e }, + { 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00 }, + { 0x1e, 0x00, 0x00, 0x1e, 0x0e, 0x00, 0x00, 0x0e }, + { 0x1e, 0x00, 0x1e, 0x00, 0x0e, 0x00, 0x0e, 0x00 }, /*sw*/ + { 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0, 0xfe }, + { 0x1e, 0x00, 0xfe, 0xe0, 0x0e, 0x00, 0xfe, 0xf0 }, + { 0x1e, 0x1e, 0x00, 0x00, 0x0e, 0x0e, 0x00, 0x00 }, + { 0x1e, 0x1e, 0x1e, 0x1e, 0x0e, 0x0e, 0x0e, 0x0e }, /*w*/ + { 0x1e, 0x1e, 0xe0, 0xe0, 0x0e, 0x0e, 0xf0, 0xf0 }, + { 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe, 0xfe }, + { 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00, 0xfe }, + { 0x1e, 0xe0, 0x1e, 0xe0, 0x0e, 0xf0, 0x0e, 0xf0 }, /*sw*/ + { 0x1e, 0xe0, 0xe0, 0x1e, 0x0e, 0xf0, 0xf0, 0x0e }, + { 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0, 0xfe, 0x00 }, + { 0x1e, 0xfe, 0x00, 0xe0, 0x0e, 0xfe, 0x00, 0xf0 }, + { 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e, 0xfe }, /*sw*/ + { 0x1e, 0xfe, 0xe0, 0x00, 0x0e, 0xfe, 0xf0, 0x00 }, + { 0x1e, 0xfe, 0xfe, 0x1e, 0x0e, 0xfe, 0xfe, 0x0e }, + { 0xe0, 0x00, 0x00, 0xe0, 0xf0, 0x00, 0x00, 0xf0 }, + { 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e, 0xfe }, + { 0xe0, 0x00, 0xe0, 0x00, 0xf0, 0x00, 0xf0, 0x00 }, /*sw*/ + { 0xe0, 0x00, 0xfe, 0x1e, 0xf0, 0x00, 0xfe, 0x0e }, + { 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00, 0xfe }, + { 0xe0, 0x1e, 0x1e, 0xe0, 0xf0, 0x0e, 0x0e, 0xf0 }, + { 0xe0, 0x1e, 0xe0, 0x1e, 0xf0, 0x0e, 0xf0, 0x0e }, /*sw*/ + { 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e, 0xfe, 0x00 }, + { 0xe0, 0xe0, 0x00, 0x00, 0xf0, 0xf0, 0x00, 0x00 }, + { 0xe0, 0xe0, 0x1e, 0x1e, 0xf0, 0xf0, 0x0e, 0x0e }, + { 0xe0, 0xe0, 0xe0, 0xe0, 0xf0, 0xf0, 0xf0, 0xf0 }, /*w*/ + { 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe, 0xfe }, + { 0xe0, 0xfe, 0x00, 0x1e, 0xf0, 0xfe, 0x00, 0x0e }, + { 0xe0, 0xfe, 0x1e, 0x00, 0xf0, 0xfe, 0x0e, 0x00 }, + { 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0, 0xfe }, /*sw*/ + { 0xe0, 0xfe, 0xfe, 0xe0, 0xf0, 0xfe, 0xfe, 0xf0 }, + { 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00, 0xfe }, + { 0xfe, 0x00, 0x1e, 0xe0, 0xfe, 0x00, 0x0e, 0xf0 }, + { 0xfe, 0x00, 0xe0, 0x1e, 0xfe, 0x00, 0xf0, 0x0e }, + { 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00, 0xfe, 0x00 }, /*sw*/ + { 0xfe, 0x1e, 0x00, 0xe0, 0xfe, 0x0e, 0x00, 0xf0 }, + { 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e, 0xfe }, + { 0xfe, 0x1e, 0xe0, 0x00, 0xfe, 0x0e, 0xf0, 0x00 }, + { 0xfe, 0x1e, 0xfe, 0x1e, 0xfe, 0x0e, 0xfe, 0x0e }, /*sw*/ + { 0xfe, 0xe0, 0x00, 0x1e, 0xfe, 0xf0, 0x00, 0x0e }, + { 0xfe, 0xe0, 0x1e, 0x00, 0xfe, 0xf0, 0x0e, 0x00 }, + { 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0, 0xfe }, + { 0xfe, 0xe0, 0xfe, 0xe0, 0xfe, 0xf0, 0xfe, 0xf0 }, /*sw*/ + { 0xfe, 0xfe, 0x00, 0x00, 0xfe, 0xfe, 0x00, 0x00 }, + { 0xfe, 0xfe, 0x1e, 0x1e, 0xfe, 0xfe, 0x0e, 0x0e }, + { 0xfe, 0xfe, 0xe0, 0xe0, 0xfe, 0xfe, 0xf0, 0xf0 }, + { 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe } /*w*/ +}; +static unsigned char weak_keys_chksum[20] = { + 0xD0, 0xCF, 0x07, 0x38, 0x93, 0x70, 0x8A, 0x83, 0x7D, 0xD7, + 0x8A, 0x36, 0x65, 0x29, 0x6C, 0x1F, 0x7C, 0x3F, 0xD3, 0x41 +}; + + + +/* + * Macro to swap bits across two words. + */ +#define DO_PERMUTATION(a, temp, b, offset, mask) \ + temp = ((a>>offset) ^ b) & mask; \ + b ^= temp; \ + a ^= temp<> 31); \ + temp = (left ^ right) & 0xaaaaaaaa; \ + right ^= temp; \ + left ^= temp; \ + left = (left << 1) | (left >> 31); + +/* + * The 'inverse initial permutation'. + */ +#define FINAL_PERMUTATION(left, temp, right) \ + left = (left << 31) | (left >> 1); \ + temp = (left ^ right) & 0xaaaaaaaa; \ + left ^= temp; \ + right ^= temp; \ + right = (right << 31) | (right >> 1); \ + DO_PERMUTATION(right, temp, left, 8, 0x00ff00ff) \ + DO_PERMUTATION(right, temp, left, 2, 0x33333333) \ + DO_PERMUTATION(left, temp, right, 16, 0x0000ffff) \ + DO_PERMUTATION(left, temp, right, 4, 0x0f0f0f0f) + + +/* + * A full DES round including 'expansion function', 'sbox substitution' + * and 'primitive function P' but without swapping the left and right word. + * Please note: The data in 'from' and 'to' is already rotated one bit to + * the left, done in the initial permutation. + */ +#define DES_ROUND(from, to, work, subkey) \ + work = from ^ *subkey++; \ + to ^= sbox8[ work & 0x3f ]; \ + to ^= sbox6[ (work>>8) & 0x3f ]; \ + to ^= sbox4[ (work>>16) & 0x3f ]; \ + to ^= sbox2[ (work>>24) & 0x3f ]; \ + work = ((from << 28) | (from >> 4)) ^ *subkey++; \ + to ^= sbox7[ work & 0x3f ]; \ + to ^= sbox5[ (work>>8) & 0x3f ]; \ + to ^= sbox3[ (work>>16) & 0x3f ]; \ + to ^= sbox1[ (work>>24) & 0x3f ]; + +/* + * Macros to convert 8 bytes from/to 32bit words. + */ +#define READ_64BIT_DATA(data, left, right) \ + left = buf_get_be32(data + 0); \ + right = buf_get_be32(data + 4); + +#define WRITE_64BIT_DATA(data, left, right) \ + buf_put_be32(data + 0, left); \ + buf_put_be32(data + 4, right); + +/* + * Handy macros for encryption and decryption of data + */ +#define des_ecb_encrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 0) +#define des_ecb_decrypt(ctx, from, to) des_ecb_crypt(ctx, from, to, 1) +#define tripledes_ecb_encrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,0) +#define tripledes_ecb_decrypt(ctx, from, to) tripledes_ecb_crypt(ctx,from,to,1) + + + + + + +/* + * des_key_schedule(): Calculate 16 subkeys pairs (even/odd) for + * 16 encryption rounds. + * To calculate subkeys for decryption the caller + * have to reorder the generated subkeys. + * + * rawkey: 8 Bytes of key data + * subkey: Array of at least 32 u32s. Will be filled + * with calculated subkeys. + * + */ +static void +des_key_schedule (const byte * rawkey, u32 * subkey) +{ + u32 left, right, work; + int round; + + READ_64BIT_DATA (rawkey, left, right) + + DO_PERMUTATION (right, work, left, 4, 0x0f0f0f0f) + DO_PERMUTATION (right, work, left, 0, 0x10101010) + + left = ((leftkey_swap[(left >> 0) & 0xf] << 3) + | (leftkey_swap[(left >> 8) & 0xf] << 2) + | (leftkey_swap[(left >> 16) & 0xf] << 1) + | (leftkey_swap[(left >> 24) & 0xf]) + | (leftkey_swap[(left >> 5) & 0xf] << 7) + | (leftkey_swap[(left >> 13) & 0xf] << 6) + | (leftkey_swap[(left >> 21) & 0xf] << 5) + | (leftkey_swap[(left >> 29) & 0xf] << 4)); + + left &= 0x0fffffff; + + right = ((rightkey_swap[(right >> 1) & 0xf] << 3) + | (rightkey_swap[(right >> 9) & 0xf] << 2) + | (rightkey_swap[(right >> 17) & 0xf] << 1) + | (rightkey_swap[(right >> 25) & 0xf]) + | (rightkey_swap[(right >> 4) & 0xf] << 7) + | (rightkey_swap[(right >> 12) & 0xf] << 6) + | (rightkey_swap[(right >> 20) & 0xf] << 5) + | (rightkey_swap[(right >> 28) & 0xf] << 4)); + + right &= 0x0fffffff; + + for (round = 0; round < 16; ++round) + { + left = ((left << encrypt_rotate_tab[round]) + | (left >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff; + right = ((right << encrypt_rotate_tab[round]) + | (right >> (28 - encrypt_rotate_tab[round]))) & 0x0fffffff; + + *subkey++ = (((left << 4) & 0x24000000) + | ((left << 28) & 0x10000000) + | ((left << 14) & 0x08000000) + | ((left << 18) & 0x02080000) + | ((left << 6) & 0x01000000) + | ((left << 9) & 0x00200000) + | ((left >> 1) & 0x00100000) + | ((left << 10) & 0x00040000) + | ((left << 2) & 0x00020000) + | ((left >> 10) & 0x00010000) + | ((right >> 13) & 0x00002000) + | ((right >> 4) & 0x00001000) + | ((right << 6) & 0x00000800) + | ((right >> 1) & 0x00000400) + | ((right >> 14) & 0x00000200) + | (right & 0x00000100) + | ((right >> 5) & 0x00000020) + | ((right >> 10) & 0x00000010) + | ((right >> 3) & 0x00000008) + | ((right >> 18) & 0x00000004) + | ((right >> 26) & 0x00000002) + | ((right >> 24) & 0x00000001)); + + *subkey++ = (((left << 15) & 0x20000000) + | ((left << 17) & 0x10000000) + | ((left << 10) & 0x08000000) + | ((left << 22) & 0x04000000) + | ((left >> 2) & 0x02000000) + | ((left << 1) & 0x01000000) + | ((left << 16) & 0x00200000) + | ((left << 11) & 0x00100000) + | ((left << 3) & 0x00080000) + | ((left >> 6) & 0x00040000) + | ((left << 15) & 0x00020000) + | ((left >> 4) & 0x00010000) + | ((right >> 2) & 0x00002000) + | ((right << 8) & 0x00001000) + | ((right >> 14) & 0x00000808) + | ((right >> 9) & 0x00000400) + | ((right) & 0x00000200) + | ((right << 7) & 0x00000100) + | ((right >> 7) & 0x00000020) + | ((right >> 3) & 0x00000011) + | ((right << 2) & 0x00000004) + | ((right >> 21) & 0x00000002)); + } +} + + +/* + * Fill a DES context with subkeys calculated from a 64bit key. + * Does not check parity bits, but simply ignore them. + * Does not check for weak keys. + */ +static int +des_setkey (struct _des_ctx *ctx, const byte * key) +{ + static const char *selftest_failed; + int i; + + if (!fips_mode () && !initialized) + { + initialized = 1; + selftest_failed = selftest (); + + if (selftest_failed) + log_error ("%s\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + des_key_schedule (key, ctx->encrypt_subkeys); + _gcry_burn_stack (32); + + for(i=0; i<32; i+=2) + { + ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i]; + ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i]; + } + + return 0; +} + + + +/* + * Electronic Codebook Mode DES encryption/decryption of data according + * to 'mode'. + */ +static int +des_ecb_crypt (struct _des_ctx *ctx, const byte * from, byte * to, int mode) +{ + u32 left, right, work; + u32 *keys; + + keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; + + READ_64BIT_DATA (from, left, right) + INITIAL_PERMUTATION (left, work, right) + + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + + FINAL_PERMUTATION (right, work, left) + WRITE_64BIT_DATA (to, right, left) + + return 0; +} + + + +/* + * Fill a Triple-DES context with subkeys calculated from two 64bit keys. + * Does not check the parity bits of the keys, but simply ignore them. + * Does not check for weak keys. + */ +static int +tripledes_set2keys (struct _tripledes_ctx *ctx, + const byte * key1, + const byte * key2) +{ + int i; + + des_key_schedule (key1, ctx->encrypt_subkeys); + des_key_schedule (key2, &(ctx->decrypt_subkeys[32])); + _gcry_burn_stack (32); + + for(i=0; i<32; i+=2) + { + ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[30-i]; + ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[31-i]; + + ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i]; + ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i]; + + ctx->encrypt_subkeys[i+64] = ctx->encrypt_subkeys[i]; + ctx->encrypt_subkeys[i+65] = ctx->encrypt_subkeys[i+1]; + + ctx->decrypt_subkeys[i+64] = ctx->decrypt_subkeys[i]; + ctx->decrypt_subkeys[i+65] = ctx->decrypt_subkeys[i+1]; + } + + return 0; +} + + + +/* + * Fill a Triple-DES context with subkeys calculated from three 64bit keys. + * Does not check the parity bits of the keys, but simply ignore them. + * Does not check for weak keys. + */ +static int +tripledes_set3keys (struct _tripledes_ctx *ctx, + const byte * key1, + const byte * key2, + const byte * key3) +{ + static const char *selftest_failed; + int i; + + if (!fips_mode () && !initialized) + { + initialized = 1; + selftest_failed = selftest (); + + if (selftest_failed) + log_error ("%s\n", selftest_failed); + } + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + des_key_schedule (key1, ctx->encrypt_subkeys); + des_key_schedule (key2, &(ctx->decrypt_subkeys[32])); + des_key_schedule (key3, &(ctx->encrypt_subkeys[64])); + _gcry_burn_stack (32); + + for(i=0; i<32; i+=2) + { + ctx->decrypt_subkeys[i] = ctx->encrypt_subkeys[94-i]; + ctx->decrypt_subkeys[i+1] = ctx->encrypt_subkeys[95-i]; + + ctx->encrypt_subkeys[i+32] = ctx->decrypt_subkeys[62-i]; + ctx->encrypt_subkeys[i+33] = ctx->decrypt_subkeys[63-i]; + + ctx->decrypt_subkeys[i+64] = ctx->encrypt_subkeys[30-i]; + ctx->decrypt_subkeys[i+65] = ctx->encrypt_subkeys[31-i]; + } + + return 0; +} + + + +#ifdef USE_AMD64_ASM + +/* Assembly implementation of triple-DES. */ +extern void _gcry_3des_amd64_crypt_block(const void *keys, byte *out, + const byte *in); + +/* These assembly implementations process three blocks in parallel. */ +extern void _gcry_3des_amd64_ctr_enc(const void *keys, byte *out, + const byte *in, byte *ctr); + +extern void _gcry_3des_amd64_cbc_dec(const void *keys, byte *out, + const byte *in, byte *iv); + +extern void _gcry_3des_amd64_cfb_dec(const void *keys, byte *out, + const byte *in, byte *iv); + +#define TRIPLEDES_ECB_BURN_STACK (8 * sizeof(void *)) + + +/* + * Electronic Codebook Mode Triple-DES encryption/decryption of data + * according to 'mode'. Sometimes this mode is named 'EDE' mode + * (Encryption-Decryption-Encryption). + */ +static inline int +tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from, + byte * to, int mode) +{ + u32 *keys; + + keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; + + _gcry_3des_amd64_crypt_block(keys, to, from); + + return 0; +} + +static inline void +tripledes_amd64_ctr_enc(const void *keys, byte *out, const byte *in, byte *ctr) +{ + _gcry_3des_amd64_ctr_enc(keys, out, in, ctr); +} + +static inline void +tripledes_amd64_cbc_dec(const void *keys, byte *out, const byte *in, byte *iv) +{ + _gcry_3des_amd64_cbc_dec(keys, out, in, iv); +} + +static inline void +tripledes_amd64_cfb_dec(const void *keys, byte *out, const byte *in, byte *iv) +{ + _gcry_3des_amd64_cfb_dec(keys, out, in, iv); +} + +#else /*USE_AMD64_ASM*/ + +#define TRIPLEDES_ECB_BURN_STACK 32 + +/* + * Electronic Codebook Mode Triple-DES encryption/decryption of data + * according to 'mode'. Sometimes this mode is named 'EDE' mode + * (Encryption-Decryption-Encryption). + */ +static int +tripledes_ecb_crypt (struct _tripledes_ctx *ctx, const byte * from, + byte * to, int mode) +{ + u32 left, right, work; + u32 *keys; + + keys = mode ? ctx->decrypt_subkeys : ctx->encrypt_subkeys; + + READ_64BIT_DATA (from, left, right) + INITIAL_PERMUTATION (left, work, right) + + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + DES_ROUND (left, right, work, keys) DES_ROUND (right, left, work, keys) + + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + DES_ROUND (right, left, work, keys) DES_ROUND (left, right, work, keys) + + FINAL_PERMUTATION (right, work, left) + WRITE_64BIT_DATA (to, right, left) + + return 0; +} + +#endif /*!USE_AMD64_ASM*/ + + + +/* Bulk encryption of complete blocks in CTR mode. This function is only + intended for the bulk encryption feature of cipher.c. CTR is expected to be + of size DES_BLOCKSIZE. */ +static void +_gcry_3des_ctr_enc(void *context, unsigned char *ctr, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + struct _tripledes_ctx *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char tmpbuf[DES_BLOCKSIZE]; + int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; + +#ifdef USE_AMD64_ASM + { + int asm_burn_depth = 9 * sizeof(void *); + + if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) + burn_stack_depth = asm_burn_depth; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + tripledes_amd64_ctr_enc(ctx->encrypt_subkeys, outbuf, inbuf, ctr); + + nblocks -= 3; + outbuf += 3 * DES_BLOCKSIZE; + inbuf += 3 * DES_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* Encrypt the counter. */ + tripledes_ecb_encrypt (ctx, ctr, tmpbuf); + /* XOR the input with the encrypted counter and store in output. */ + cipher_block_xor(outbuf, tmpbuf, inbuf, DES_BLOCKSIZE); + outbuf += DES_BLOCKSIZE; + inbuf += DES_BLOCKSIZE; + /* Increment the counter. */ + cipher_block_add(ctr, 1, DES_BLOCKSIZE); + } + + wipememory(tmpbuf, sizeof(tmpbuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CBC mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_3des_cbc_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + struct _tripledes_ctx *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + unsigned char savebuf[DES_BLOCKSIZE]; + int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; + +#ifdef USE_AMD64_ASM + { + int asm_burn_depth = 10 * sizeof(void *); + + if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) + burn_stack_depth = asm_burn_depth; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + tripledes_amd64_cbc_dec(ctx->decrypt_subkeys, outbuf, inbuf, iv); + + nblocks -= 3; + outbuf += 3 * DES_BLOCKSIZE; + inbuf += 3 * DES_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + /* INBUF is needed later and it may be identical to OUTBUF, so store + the intermediate result to SAVEBUF. */ + tripledes_ecb_decrypt (ctx, inbuf, savebuf); + + cipher_block_xor_n_copy_2(outbuf, savebuf, iv, inbuf, DES_BLOCKSIZE); + inbuf += DES_BLOCKSIZE; + outbuf += DES_BLOCKSIZE; + } + + wipememory(savebuf, sizeof(savebuf)); + _gcry_burn_stack(burn_stack_depth); +} + + +/* Bulk decryption of complete blocks in CFB mode. This function is only + intended for the bulk encryption feature of cipher.c. */ +static void +_gcry_3des_cfb_dec(void *context, unsigned char *iv, void *outbuf_arg, + const void *inbuf_arg, size_t nblocks) +{ + struct _tripledes_ctx *ctx = context; + unsigned char *outbuf = outbuf_arg; + const unsigned char *inbuf = inbuf_arg; + int burn_stack_depth = TRIPLEDES_ECB_BURN_STACK; + +#ifdef USE_AMD64_ASM + { + int asm_burn_depth = 9 * sizeof(void *); + + if (nblocks >= 3 && burn_stack_depth < asm_burn_depth) + burn_stack_depth = asm_burn_depth; + + /* Process data in 3 block chunks. */ + while (nblocks >= 3) + { + tripledes_amd64_cfb_dec(ctx->encrypt_subkeys, outbuf, inbuf, iv); + + nblocks -= 3; + outbuf += 3 * DES_BLOCKSIZE; + inbuf += 3 * DES_BLOCKSIZE; + } + + /* Use generic code to handle smaller chunks... */ + } +#endif + + for ( ;nblocks; nblocks-- ) + { + tripledes_ecb_encrypt (ctx, iv, iv); + cipher_block_xor_n_copy(outbuf, iv, inbuf, DES_BLOCKSIZE); + outbuf += DES_BLOCKSIZE; + inbuf += DES_BLOCKSIZE; + } + + _gcry_burn_stack(burn_stack_depth); +} + + +/* + * Check whether the 8 byte key is weak. + * Does not check the parity bits of the key but simple ignore them. + */ +static int +is_weak_key ( const byte *key ) +{ + byte work[8]; + int i, left, right, middle, cmp_result; + + /* clear parity bits */ + for(i=0; i<8; ++i) + work[i] = key[i] & 0xfe; + + /* binary search in the weak key table */ + left = 0; + right = 63; + while(left <= right) + { + middle = (left + right) / 2; + + if ( !(cmp_result=working_memcmp(work, weak_keys[middle], 8)) ) + return -1; + + if ( cmp_result > 0 ) + left = middle + 1; + else + right = middle - 1; + } + + return 0; +} + + +/* Alternative setkey for selftests; need larger key than default. */ +static gcry_err_code_t +bulk_selftest_setkey (void *context, const byte *__key, unsigned __keylen, + cipher_bulk_ops_t *bulk_ops) +{ + static const unsigned char key[24] ATTR_ALIGNED_16 = { + 0x66,0x9A,0x00,0x7F,0xC7,0x6A,0x45,0x9F, + 0x98,0xBA,0xF9,0x17,0xFE,0xDF,0x95,0x22, + 0x18,0x2A,0x39,0x47,0x5E,0x6F,0x75,0x82 + }; + + (void)__key; + (void)__keylen; + + return do_tripledes_setkey(context, key, sizeof(key), bulk_ops); +} + + +/* Run the self-tests for DES-CTR, tests IV increment of bulk CTR + encryption. Returns NULL on success. */ +static const char * +selftest_ctr (void) +{ + const int nblocks = 3+1; + const int blocksize = DES_BLOCKSIZE; + const int context_size = sizeof(struct _tripledes_ctx); + + return _gcry_selftest_helper_ctr("3DES", &bulk_selftest_setkey, + &do_tripledes_encrypt, nblocks, blocksize, context_size); +} + + +/* Run the self-tests for DES-CBC, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cbc (void) +{ + const int nblocks = 3+2; + const int blocksize = DES_BLOCKSIZE; + const int context_size = sizeof(struct _tripledes_ctx); + + return _gcry_selftest_helper_cbc("3DES", &bulk_selftest_setkey, + &do_tripledes_encrypt, nblocks, blocksize, context_size); +} + + +/* Run the self-tests for DES-CFB, tests bulk CBC decryption. + Returns NULL on success. */ +static const char * +selftest_cfb (void) +{ + const int nblocks = 3+2; + const int blocksize = DES_BLOCKSIZE; + const int context_size = sizeof(struct _tripledes_ctx); + + return _gcry_selftest_helper_cfb("3DES", &bulk_selftest_setkey, + &do_tripledes_encrypt, nblocks, blocksize, context_size); +} + + +/* + * Performs a selftest of this DES/Triple-DES implementation. + * Returns an string with the error text on failure. + * Returns NULL if all is ok. + */ +static const char * +selftest (void) +{ + const char *r; + + /* + * Check if 'u32' is really 32 bits wide. This DES / 3DES implementation + * need this. + */ + if (sizeof (u32) != 4) + return "Wrong word size for DES configured."; + + /* + * DES Maintenance Test + */ + { + int i; + byte key[8] = + {0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55, 0x55}; + byte input[8] = + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; + byte result[8] = + {0x24, 0x6e, 0x9d, 0xb9, 0xc5, 0x50, 0x38, 0x1a}; + byte temp1[8], temp2[8], temp3[8]; + des_ctx des; + + for (i = 0; i < 64; ++i) + { + des_setkey (des, key); + des_ecb_encrypt (des, input, temp1); + des_ecb_encrypt (des, temp1, temp2); + des_setkey (des, temp2); + des_ecb_decrypt (des, temp1, temp3); + memcpy (key, temp3, 8); + memcpy (input, temp1, 8); + } + if (memcmp (temp3, result, 8)) + return "DES maintenance test failed."; + } + + + /* + * Self made Triple-DES test (Does somebody know an official test?) + */ + { + int i; + byte input[8] = + {0xfe, 0xdc, 0xba, 0x98, 0x76, 0x54, 0x32, 0x10}; + byte key1[8] = + {0x12, 0x34, 0x56, 0x78, 0x9a, 0xbc, 0xde, 0xf0}; + byte key2[8] = + {0x11, 0x22, 0x33, 0x44, 0xff, 0xaa, 0xcc, 0xdd}; + byte result[8] = + {0x7b, 0x38, 0x3b, 0x23, 0xa2, 0x7d, 0x26, 0xd3}; + + tripledes_ctx des3; + + for (i = 0; i < 16; ++i) + { + tripledes_set2keys (des3, key1, key2); + tripledes_ecb_encrypt (des3, input, key1); + tripledes_ecb_decrypt (des3, input, key2); + tripledes_set3keys (des3, key1, input, key2); + tripledes_ecb_encrypt (des3, input, input); + } + if (memcmp (input, result, 8)) + return "Triple-DES test failed."; + } + + /* + * More Triple-DES test. These are testvectors as used by SSLeay, + * thanks to Jeroen C. van Gelderen. + */ + { + static const struct { byte key[24]; byte plain[8]; byte cipher[8]; } + testdata[] = { + { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 }, + { 0x95,0xF8,0xA5,0xE5,0xDD,0x31,0xD9,0x00 }, + { 0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00 } + }, + + { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01 }, + { 0x9D,0x64,0x55,0x5A,0x9A,0x10,0xB8,0x52, }, + { 0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x00 } + }, + { { 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E, + 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E, + 0x38,0x49,0x67,0x4C,0x26,0x02,0x31,0x9E }, + { 0x51,0x45,0x4B,0x58,0x2D,0xDF,0x44,0x0A }, + { 0x71,0x78,0x87,0x6E,0x01,0xF1,0x9B,0x2A } + }, + { { 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6, + 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6, + 0x04,0xB9,0x15,0xBA,0x43,0xFE,0xB5,0xB6 }, + { 0x42,0xFD,0x44,0x30,0x59,0x57,0x7F,0xA2 }, + { 0xAF,0x37,0xFB,0x42,0x1F,0x8C,0x40,0x95 } + }, + { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF }, + { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, + { 0x3D,0x12,0x4F,0xE2,0x19,0x8B,0xA3,0x18 } + }, + { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55, + 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF }, + { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, + { 0xFB,0xAB,0xA1,0xFF,0x9D,0x05,0xE9,0xB1 } + }, + { { 0x01,0x23,0x45,0x67,0x89,0xAB,0xCD,0xEF, + 0x55,0x55,0x55,0x55,0x55,0x55,0x55,0x55, + 0xFE,0xDC,0xBA,0x98,0x76,0x54,0x32,0x10 }, + { 0x73,0x6F,0x6D,0x65,0x64,0x61,0x74,0x61 }, + { 0x18,0xd7,0x48,0xe5,0x63,0x62,0x05,0x72 } + }, + { { 0x03,0x52,0x02,0x07,0x67,0x20,0x82,0x17, + 0x86,0x02,0x87,0x66,0x59,0x08,0x21,0x98, + 0x64,0x05,0x6A,0xBD,0xFE,0xA9,0x34,0x57 }, + { 0x73,0x71,0x75,0x69,0x67,0x67,0x6C,0x65 }, + { 0xc0,0x7d,0x2a,0x0f,0xa5,0x66,0xfa,0x30 } + }, + { { 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x80,0x01,0x01,0x01,0x01,0x01,0x01,0x01, + 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x02 }, + { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, + { 0xe6,0xe6,0xdd,0x5b,0x7e,0x72,0x29,0x74 } + }, + { { 0x10,0x46,0x10,0x34,0x89,0x98,0x80,0x20, + 0x91,0x07,0xD0,0x15,0x89,0x19,0x01,0x01, + 0x19,0x07,0x92,0x10,0x98,0x1A,0x01,0x01 }, + { 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 }, + { 0xe1,0xef,0x62,0xc3,0x32,0xfe,0x82,0x5b } + } + }; + + byte result[8]; + int i; + tripledes_ctx des3; + + for (i=0; icbc_dec = _gcry_3des_cbc_dec; + bulk_ops->cfb_dec = _gcry_3des_cfb_dec; + bulk_ops->ctr_enc = _gcry_3des_ctr_enc; + + tripledes_set3keys ( ctx, key, key+8, key+16); + + if (ctx->flags.no_weak_key) + ; /* Detection has been disabled. */ + else if (is_weak_key (key) || is_weak_key (key+8) || is_weak_key (key+16)) + { + _gcry_burn_stack (64); + return GPG_ERR_WEAK_KEY; + } + _gcry_burn_stack (64); + + return GPG_ERR_NO_ERROR; +} + + +static gcry_err_code_t +do_tripledes_set_extra_info (void *context, int what, + const void *buffer, size_t buflen) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *)context; + gpg_err_code_t ec = 0; + + (void)buffer; + (void)buflen; + + switch (what) + { + case CIPHER_INFO_NO_WEAK_KEY: + ctx->flags.no_weak_key = 1; + break; + + default: + ec = GPG_ERR_INV_OP; + break; + } + return ec; +} + + +static unsigned int +do_tripledes_encrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; + + tripledes_ecb_encrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK; +} + +static unsigned int +do_tripledes_decrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _tripledes_ctx *ctx = (struct _tripledes_ctx *) context; + tripledes_ecb_decrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ TRIPLEDES_ECB_BURN_STACK; +} + +static gcry_err_code_t +do_des_setkey (void *context, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops) +{ + struct _des_ctx *ctx = (struct _des_ctx *) context; + + (void)bulk_ops; + + if (keylen != 8) + return GPG_ERR_INV_KEYLEN; + + des_setkey (ctx, key); + + if (is_weak_key (key)) { + _gcry_burn_stack (64); + return GPG_ERR_WEAK_KEY; + } + _gcry_burn_stack (64); + + return GPG_ERR_NO_ERROR; +} + + +static unsigned int +do_des_encrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _des_ctx *ctx = (struct _des_ctx *) context; + + des_ecb_encrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ (32); +} + +static unsigned int +do_des_decrypt( void *context, byte *outbuf, const byte *inbuf ) +{ + struct _des_ctx *ctx = (struct _des_ctx *) context; + + des_ecb_decrypt ( ctx, inbuf, outbuf ); + return /*burn_stack*/ (32); +} + + + + +/* + Self-test section. + */ + + +/* Selftest for TripleDES. */ +static gpg_err_code_t +selftest_fips (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + + (void)extended; /* No extended tests available. */ + + what = "low-level"; + errtxt = selftest (); + if (errtxt) + goto failed; + + /* The low-level self-tests are quite extensive and thus we can do + without high level tests. This is also justified because we have + no custom block code implementation for 3des but always use the + standard high level block code. */ + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cipher", GCRY_CIPHER_3DES, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_CIPHER_3DES: + ec = selftest_fips (extended, report); + break; + default: + ec = GPG_ERR_CIPHER_ALGO; + break; + + } + return ec; +} + + + +gcry_cipher_spec_t _gcry_cipher_spec_des = + { + GCRY_CIPHER_DES, {0, 0}, + "DES", NULL, NULL, 8, 64, sizeof (struct _des_ctx), + do_des_setkey, do_des_encrypt, do_des_decrypt + }; + +static gcry_cipher_oid_spec_t oids_tripledes[] = + { + { "1.2.840.113549.3.7", GCRY_CIPHER_MODE_CBC }, + /* Teletrust specific OID for 3DES. */ + { "1.3.36.3.1.3.2.1", GCRY_CIPHER_MODE_CBC }, + /* pbeWithSHAAnd3_KeyTripleDES_CBC */ + { "1.2.840.113549.1.12.1.3", GCRY_CIPHER_MODE_CBC }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_tripledes = + { + GCRY_CIPHER_3DES, {0, 1}, + "3DES", NULL, oids_tripledes, 8, 192, sizeof (struct _tripledes_ctx), + do_tripledes_setkey, do_tripledes_encrypt, do_tripledes_decrypt, + NULL, NULL, + run_selftests, + do_tripledes_set_extra_info + }; diff --git a/comm/third_party/libgcrypt/cipher/dsa-common.c b/comm/third_party/libgcrypt/cipher/dsa-common.c new file mode 100644 index 0000000000..fe49248dd6 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/dsa-common.c @@ -0,0 +1,418 @@ +/* dsa-common.c - Common code for DSA + * Copyright (C) 1998, 1999 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* + * Modify K, so that computation time difference can be small, + * by making K large enough. + * + * Originally, (EC)DSA computation requires k where 0 < k < q. Here, + * we add q (the order), to keep k in a range: q < k < 2*q (or, + * addming more q, to keep k in a range: 2*q < k < 3*q), so that + * timing difference of the EC multiply (or exponentiation) operation + * can be small. The result of (EC)DSA computation is same. + */ +void +_gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits) +{ + gcry_mpi_t k1 = mpi_new (qbits+2); + + mpi_resize (k, (qbits+2+BITS_PER_MPI_LIMB-1) / BITS_PER_MPI_LIMB); + k->nlimbs = k->alloced; + mpi_add (k, k, q); + mpi_add (k1, k, q); + mpi_set_cond (k, k1, !mpi_test_bit (k, qbits)); + + mpi_free (k1); +} + +/* + * Generate a random secret exponent K less than Q. + * Note that ECDSA uses this code also to generate D. + */ +gcry_mpi_t +_gcry_dsa_gen_k (gcry_mpi_t q, int security_level) +{ + gcry_mpi_t k = mpi_alloc_secure (mpi_get_nlimbs (q)); + unsigned int nbits = mpi_get_nbits (q); + unsigned int nbytes = (nbits+7)/8; + char *rndbuf = NULL; + + /* To learn why we don't use mpi_mod to get the requested bit size, + read the paper: "The Insecurity of the Digital Signature + Algorithm with Partially Known Nonces" by Nguyen and Shparlinski. + Journal of Cryptology, New York. Vol 15, nr 3 (2003) */ + + if (DBG_CIPHER) + log_debug ("choosing a random k of %u bits at seclevel %d\n", + nbits, security_level); + for (;;) + { + if ( !rndbuf || nbits < 32 ) + { + xfree (rndbuf); + rndbuf = _gcry_random_bytes_secure (nbytes, security_level); + } + else + { /* Change only some of the higher bits. We could improve + this by directly requesting more memory at the first call + to get_random_bytes() and use these extra bytes here. + However the required management code is more complex and + thus we better use this simple method. */ + char *pp = _gcry_random_bytes_secure (4, security_level); + memcpy (rndbuf, pp, 4); + xfree (pp); + } + _gcry_mpi_set_buffer (k, rndbuf, nbytes, 0); + + /* Make sure we have the requested number of bits. This code + looks a bit funny but it is easy to understand if you + consider that mpi_set_highbit clears all higher bits. We + don't have a clear_highbit, thus we first set the high bit + and then clear it again. */ + if (mpi_test_bit (k, nbits-1)) + mpi_set_highbit (k, nbits-1); + else + { + mpi_set_highbit (k, nbits-1); + mpi_clear_bit (k, nbits-1); + } + + if (!(mpi_cmp (k, q) < 0)) /* check: k < q */ + { + if (DBG_CIPHER) + log_debug ("\tk too large - again\n"); + continue; /* no */ + } + if (!(mpi_cmp_ui (k, 0) > 0)) /* check: k > 0 */ + { + if (DBG_CIPHER) + log_debug ("\tk is zero - again\n"); + continue; /* no */ + } + break; /* okay */ + } + xfree (rndbuf); + + return k; +} + + +/* Turn VALUE into an octet string and store it in an allocated buffer + at R_FRAME. If the resulting octet string is shorter than NBYTES + the result will be left padded with zeroes. If VALUE does not fit + into NBYTES an error code is returned. */ +static gpg_err_code_t +int2octets (unsigned char **r_frame, gcry_mpi_t value, size_t nbytes) +{ + gpg_err_code_t rc; + size_t nframe, noff, n; + unsigned char *frame; + + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, NULL, 0, &nframe, value); + if (rc) + return rc; + if (nframe > nbytes) + return GPG_ERR_TOO_LARGE; /* Value too long to fit into NBYTES. */ + + noff = (nframe < nbytes)? nbytes - nframe : 0; + n = nframe + noff; + frame = mpi_is_secure (value)? xtrymalloc_secure (n) : xtrymalloc (n); + if (!frame) + return gpg_err_code_from_syserror (); + if (noff) + memset (frame, 0, noff); + nframe += noff; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, frame+noff, nframe-noff, NULL, value); + if (rc) + { + xfree (frame); + return rc; + } + + *r_frame = frame; + return 0; +} + + +/* Connert the bit string BITS of length NBITS into an octet string + with a length of (QBITS+7)/8 bytes. On success store the result at + R_FRAME. */ +static gpg_err_code_t +bits2octets (unsigned char **r_frame, + const void *bits, unsigned int nbits, + gcry_mpi_t q, unsigned int qbits) +{ + gpg_err_code_t rc; + gcry_mpi_t z1; + + /* z1 = bits2int (b) */ + rc = _gcry_mpi_scan (&z1, GCRYMPI_FMT_USG, bits, (nbits+7)/8, NULL); + if (rc) + return rc; + if (nbits > qbits) + mpi_rshift (z1, z1, nbits - qbits); + + /* z2 - z1 mod q */ + if (mpi_cmp (z1, q) >= 0) + mpi_sub (z1, z1, q); + + /* Convert to an octet string. */ + rc = int2octets (r_frame, z1, (qbits+7)/8); + + mpi_free (z1); + return rc; +} + + +/* + * Generate a deterministic secret exponent K less than DSA_Q. H1 is + * the to be signed digest with a length of HLEN bytes. HALGO is the + * algorithm used to create the hash. On success the value for K is + * stored at R_K. + */ +gpg_err_code_t +_gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k, + gcry_mpi_t dsa_q, gcry_mpi_t dsa_x, + const unsigned char *h1, unsigned int hlen, + int halgo, unsigned int extraloops) +{ + gpg_err_code_t rc; + unsigned char *V = NULL; + unsigned char *K = NULL; + unsigned char *x_buf = NULL; + unsigned char *h1_buf = NULL; + gcry_md_hd_t hd = NULL; + unsigned char *t = NULL; + gcry_mpi_t k = NULL; + unsigned int tbits, qbits; + int i; + + qbits = mpi_get_nbits (dsa_q); + + if (!qbits || !h1 || !hlen) + return GPG_ERR_EINVAL; + + if (_gcry_md_get_algo_dlen (halgo) != hlen) + return GPG_ERR_DIGEST_ALGO; + + /* Step b: V = 0x01 0x01 0x01 ... 0x01 */ + V = xtrymalloc (hlen); + if (!V) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + for (i=0; i < hlen; i++) + V[i] = 1; + + /* Step c: K = 0x00 0x00 0x00 ... 0x00 */ + K = xtrycalloc (1, hlen); + if (!K) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + rc = int2octets (&x_buf, dsa_x, (qbits+7)/8); + if (rc) + goto leave; + + rc = bits2octets (&h1_buf, h1, hlen*8, dsa_q, qbits); + if (rc) + goto leave; + + /* Create a handle to compute the HMACs. */ + rc = _gcry_md_open (&hd, halgo, (GCRY_MD_FLAG_SECURE | GCRY_MD_FLAG_HMAC)); + if (rc) + goto leave; + + /* Step d: K = HMAC_K(V || 0x00 || int2octets(x) || bits2octets(h1) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "", 1); + _gcry_md_write (hd, x_buf, (qbits+7)/8); + _gcry_md_write (hd, h1_buf, (qbits+7)/8); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* Step e: V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + /* Step f: K = HMAC_K(V || 0x01 || int2octets(x) || bits2octets(h1) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "\x01", 1); + _gcry_md_write (hd, x_buf, (qbits+7)/8); + _gcry_md_write (hd, h1_buf, (qbits+7)/8); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* Step g: V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + /* Step h. */ + t = xtrymalloc_secure ((qbits+7)/8+hlen); + if (!t) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + again: + for (tbits = 0; tbits < qbits;) + { + /* V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + /* T = T || V */ + memcpy (t+(tbits+7)/8, V, hlen); + tbits += 8*hlen; + } + + /* k = bits2int (T) */ + mpi_free (k); + k = NULL; + rc = _gcry_mpi_scan (&k, GCRYMPI_FMT_USG, t, (tbits+7)/8, NULL); + if (rc) + goto leave; + if (tbits > qbits) + mpi_rshift (k, k, tbits - qbits); + + /* Check: k < q and k > 1 */ + if (!(mpi_cmp (k, dsa_q) < 0 && mpi_cmp_ui (k, 0) > 0)) + { + /* K = HMAC_K(V || 0x00) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "", 1); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + goto again; + } + + /* The caller may have requested that we introduce some extra loops. + This is for example useful if the caller wants another value for + K because the last returned one yielded an R of 0. Because this + is very unlikely we implement it in a straightforward way. */ + if (extraloops) + { + extraloops--; + + /* K = HMAC_K(V || 0x00) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + _gcry_md_write (hd, "", 1); + memcpy (K, _gcry_md_read (hd, 0), hlen); + + /* V = HMAC_K(V) */ + rc = _gcry_md_setkey (hd, K, hlen); + if (rc) + goto leave; + _gcry_md_write (hd, V, hlen); + memcpy (V, _gcry_md_read (hd, 0), hlen); + + goto again; + } + + /* log_mpidump (" k", k); */ + + leave: + xfree (t); + _gcry_md_close (hd); + xfree (h1_buf); + xfree (x_buf); + xfree (K); + xfree (V); + + if (rc) + mpi_free (k); + else + *r_k = k; + return rc; +} + +/* + * Truncate opaque hash value to qbits for DSA. + * Non-opaque input is not truncated, in hope that user + * knows what is passed. It is not possible to correctly + * trucate non-opaque inputs. + */ +gpg_err_code_t +_gcry_dsa_normalize_hash (gcry_mpi_t input, + gcry_mpi_t *out, + unsigned int qbits) +{ + gpg_err_code_t rc = 0; + const void *abuf; + unsigned int abits; + gcry_mpi_t hash; + + if (mpi_is_opaque (input)) + { + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_mpi_scan (&hash, GCRYMPI_FMT_USG, abuf, (abits+7)/8, NULL); + if (rc) + return rc; + if (abits > qbits) + mpi_rshift (hash, hash, abits - qbits); + } + else + hash = input; + + *out = hash; + + return rc; +} diff --git a/comm/third_party/libgcrypt/cipher/dsa.c b/comm/third_party/libgcrypt/cipher/dsa.c new file mode 100644 index 0000000000..d793b9aaf2 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/dsa.c @@ -0,0 +1,1394 @@ +/* dsa.c - DSA signature algorithm + * Copyright (C) 1998, 2000, 2001, 2002, 2003, + * 2006, 2008 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t q; /* group order */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ +} DSA_public_key; + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t q; /* group order */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ + gcry_mpi_t x; /* secret exponent */ +} DSA_secret_key; + + +/* A structure used to hold domain parameters. */ +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t q; /* group order */ + gcry_mpi_t g; /* group generator */ +} dsa_domain_t; + + +static const char *dsa_names[] = + { + "dsa", + "openpgp-dsa", + NULL, + }; + + +/* A sample 1024 bit DSA key used for the selftests. Not anymore + * used, kept only for reference. */ +#if 0 +static const char sample_secret_key_1024[] = +"(private-key" +" (dsa" +" (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB" +" 96273899DD8B2BB46CD6ECA263FAF04A28903503D59062A8865D2AE8ADFB5191" +" CF36FFB562D0E2F5809801A1F675DAE59698A9E01EFE8D7DCFCA084F4C6F5A44" +" 44D499A06FFAEA5E8EF5E01F2FD20A7B7EF3F6968AFBA1FB8D91F1559D52D8777B#)" +" (q #00EB7B5751D25EBBB7BD59D920315FD840E19AEBF9#)" +" (g #1574363387FDFD1DDF38F4FBE135BB20C7EE4772FB94C337AF86EA8E49666503" +" AE04B6BE81A2F8DD095311E0217ACA698A11E6C5D33CCDAE71498ED35D13991E" +" B02F09AB40BD8F4C5ED8C75DA779D0AE104BC34C960B002377068AB4B5A1F984" +" 3FBA91F537F1B7CAC4D8DD6D89B0D863AF7025D549F9C765D2FC07EE208F8D15#)" +" (y #64B11EF8871BE4AB572AA810D5D3CA11A6CDBC637A8014602C72960DB135BF46" +" A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827" +" 6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20" +" 42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)" +" (x #11D54E4ADBD3034160F2CED4B7CD292A4EBF3EC0#)))"; +/* A sample 1024 bit DSA key used for the selftests (public only). */ +static const char sample_public_key_1024[] = +"(public-key" +" (dsa" +" (p #00AD7C0025BA1A15F775F3F2D673718391D00456978D347B33D7B49E7F32EDAB" +" 96273899DD8B2BB46CD6ECA263FAF04A28903503D59062A8865D2AE8ADFB5191" +" CF36FFB562D0E2F5809801A1F675DAE59698A9E01EFE8D7DCFCA084F4C6F5A44" +" 44D499A06FFAEA5E8EF5E01F2FD20A7B7EF3F6968AFBA1FB8D91F1559D52D8777B#)" +" (q #00EB7B5751D25EBBB7BD59D920315FD840E19AEBF9#)" +" (g #1574363387FDFD1DDF38F4FBE135BB20C7EE4772FB94C337AF86EA8E49666503" +" AE04B6BE81A2F8DD095311E0217ACA698A11E6C5D33CCDAE71498ED35D13991E" +" B02F09AB40BD8F4C5ED8C75DA779D0AE104BC34C960B002377068AB4B5A1F984" +" 3FBA91F537F1B7CAC4D8DD6D89B0D863AF7025D549F9C765D2FC07EE208F8D15#)" +" (y #64B11EF8871BE4AB572AA810D5D3CA11A6CDBC637A8014602C72960DB135BF46" +" A1816A724C34F87330FC9E187C5D66897A04535CC2AC9164A7150ABFA8179827" +" 6E45831AB811EEE848EBB24D9F5F2883B6E5DDC4C659DEF944DCFD80BF4D0A20" +" 42CAA7DC289F0C5A9D155F02D3D551DB741A81695B74D4C8F477F9C7838EB0FB#)))"; +#endif /*0*/ + +/* 2048 DSA key from RFC 6979 A.2.2 */ +static const char sample_public_key_2048[] = +"(public-key" +" (dsa" +" (p #9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)" +" (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)" +" (g #5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)" +" (y #667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)))"; + +static const char sample_secret_key_2048[] = +"(private-key" +" (dsa" +" (p #9DB6FB5951B66BB6FE1E140F1D2CE5502374161FD6538DF1648218642F0B5C48C8F7A41AADFA187324B87674FA1822B00F1ECF8136943D7C55757264E5A1A44FFE012E9936E00C1D3E9310B01C7D179805D3058B2A9F4BB6F9716BFE6117C6B5B3CC4D9BE341104AD4A80AD6C94E005F4B993E14F091EB51743BF33050C38DE235567E1B34C3D6A5C0CEAA1A0F368213C3D19843D0B4B09DCB9FC72D39C8DE41F1BF14D4BB4563CA28371621CAD3324B6A2D392145BEBFAC748805236F5CA2FE92B871CD8F9C36D3292B5509CA8CAA77A2ADFC7BFD77DDA6F71125A7456FEA153E433256A2261C6A06ED3693797E7995FAD5AABBCFBE3EDA2741E375404AE25B#)" +" (q #F2C3119374CE76C9356990B465374A17F23F9ED35089BD969F61C6DDE9998C1F#)" +" (g #5C7FF6B06F8F143FE8288433493E4769C4D988ACE5BE25A0E24809670716C613D7B0CEE6932F8FAA7C44D2CB24523DA53FBE4F6EC3595892D1AA58C4328A06C46A15662E7EAA703A1DECF8BBB2D05DBE2EB956C142A338661D10461C0D135472085057F3494309FFA73C611F78B32ADBB5740C361C9F35BE90997DB2014E2EF5AA61782F52ABEB8BD6432C4DD097BC5423B285DAFB60DC364E8161F4A2A35ACA3A10B1C4D203CC76A470A33AFDCBDD92959859ABD8B56E1725252D78EAC66E71BA9AE3F1DD2487199874393CD4D832186800654760E1E34C09E4D155179F9EC0DC4473F996BDCE6EED1CABED8B6F116F7AD9CF505DF0F998E34AB27514B0FFE7#)" +" (y #667098C654426C78D7F8201EAC6C203EF030D43605032C2F1FA937E5237DBD949F34A0A2564FE126DC8B715C5141802CE0979C8246463C40E6B6BDAA2513FA611728716C2E4FD53BC95B89E69949D96512E873B9C8F8DFD499CC312882561ADECB31F658E934C0C197F2C4D96B05CBAD67381E7B768891E4DA3843D24D94CDFB5126E9B8BF21E8358EE0E0A30EF13FD6A664C0DCE3731F7FB49A4845A4FD8254687972A2D382599C9BAC4E0ED7998193078913032558134976410B89D2C171D123AC35FD977219597AA7D15C1A9A428E59194F75C721EBCBCFAE44696A499AFA74E04299F132026601638CB87AB79190D4A0986315DA8EEC6561C938996BEADF#)" +" (x #69C7548C21D0DFEA6B9A51C9EAD4E27C33D3B3F180316E5BCAB92C933F0E4DBC#)))"; + + + +static int test_keys (DSA_secret_key *sk, unsigned int qbits); +static int check_secret_key (DSA_secret_key *sk); +static gpg_err_code_t generate (DSA_secret_key *sk, + unsigned int nbits, + unsigned int qbits, + int transient_key, + dsa_domain_t *domain, + gcry_mpi_t **ret_factors); +static gpg_err_code_t sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, + DSA_secret_key *skey, int flags, int hashalgo); +static gpg_err_code_t verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, + DSA_public_key *pkey); +static unsigned int dsa_get_nbits (gcry_sexp_t parms); + + +static void (*progress_cb) (void *,const char *, int, int, int ); +static void *progress_cb_data; + + +void +_gcry_register_pk_dsa_progress (void (*cb) (void *, const char *, + int, int, int), + void *cb_data) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + + +static void +progress (int c) +{ + if (progress_cb) + progress_cb (progress_cb_data, "pk_dsa", c, 0, 0); +} + + +/* Check that a freshly generated key actually works. Returns 0 on success. */ +static int +test_keys (DSA_secret_key *sk, unsigned int qbits) +{ + int result = -1; /* Default to failure. */ + DSA_public_key pk; + gcry_mpi_t data = mpi_new (qbits); + gcry_mpi_t sig_a = mpi_new (qbits); + gcry_mpi_t sig_b = mpi_new (qbits); + + /* Put the relevant parameters into a public key structure. */ + pk.p = sk->p; + pk.q = sk->q; + pk.g = sk->g; + pk.y = sk->y; + + /* Create a random plaintext. */ + _gcry_mpi_randomize (data, qbits, GCRY_WEAK_RANDOM); + + /* Sign DATA using the secret key. */ + sign (sig_a, sig_b, data, sk, 0, 0); + + /* Verify the signature using the public key. */ + if ( verify (sig_a, sig_b, data, &pk) ) + goto leave; /* Signature does not match. */ + + /* Modify the data and check that the signing fails. */ + mpi_add_ui (data, data, 1); + if ( !verify (sig_a, sig_b, data, &pk) ) + goto leave; /* Signature matches but should not. */ + + result = 0; /* The test succeeded. */ + + leave: + _gcry_mpi_release (sig_b); + _gcry_mpi_release (sig_a); + _gcry_mpi_release (data); + return result; +} + + + +/* + Generate a DSA key pair with a key of size NBITS. If transient_key + is true the key is generated using the standard RNG and not the + very secure one. + + Returns: 2 structures filled with all needed values + and an array with the n-1 factors of (p-1) + */ +static gpg_err_code_t +generate (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits, + int transient_key, dsa_domain_t *domain, gcry_mpi_t **ret_factors ) +{ + gpg_err_code_t rc; + gcry_mpi_t p; /* the prime */ + gcry_mpi_t q; /* the 160 bit prime factor */ + gcry_mpi_t g; /* the generator */ + gcry_mpi_t y; /* g^x mod p */ + gcry_mpi_t x; /* the secret exponent */ + gcry_mpi_t h, e; /* helper */ + unsigned char *rndbuf; + gcry_random_level_t random_level; + + if (qbits) + ; /* Caller supplied qbits. Use this value. */ + else if ( nbits >= 512 && nbits <= 1024 ) + qbits = 160; + else if ( nbits == 2048 ) + qbits = 224; + else if ( nbits == 3072 ) + qbits = 256; + else if ( nbits == 7680 ) + qbits = 384; + else if ( nbits == 15360 ) + qbits = 512; + else + return GPG_ERR_INV_VALUE; + + if (qbits < 160 || qbits > 512 || (qbits%8) ) + return GPG_ERR_INV_VALUE; + if (nbits < 2*qbits || nbits > 15360) + return GPG_ERR_INV_VALUE; + + if (fips_mode ()) + { + if (nbits < 1024) + return GPG_ERR_INV_VALUE; + if (transient_key) + return GPG_ERR_INV_VALUE; + } + + if (domain->p && domain->q && domain->g) + { + /* Domain parameters are given; use them. */ + p = mpi_copy (domain->p); + q = mpi_copy (domain->q); + g = mpi_copy (domain->g); + gcry_assert (mpi_get_nbits (p) == nbits); + gcry_assert (mpi_get_nbits (q) == qbits); + h = mpi_alloc (0); + e = NULL; + } + else + { + /* Generate new domain parameters. */ + rc = _gcry_generate_elg_prime (1, nbits, qbits, NULL, &p, ret_factors); + if (rc) + return rc; + + /* Get q out of factors. */ + q = mpi_copy ((*ret_factors)[0]); + gcry_assert (mpi_get_nbits (q) == qbits); + + /* Find a generator g (h and e are helpers). + e = (p-1)/q */ + e = mpi_alloc (mpi_get_nlimbs (p)); + mpi_sub_ui (e, p, 1); + mpi_fdiv_q (e, e, q); + g = mpi_alloc (mpi_get_nlimbs (p)); + h = mpi_alloc_set_ui (1); /* (We start with 2.) */ + do + { + mpi_add_ui (h, h, 1); + /* g = h^e mod p */ + mpi_powm (g, h, e, p); + } + while (!mpi_cmp_ui (g, 1)); /* Continue until g != 1. */ + } + + /* Select a random number X with the property: + * 0 < x < q-1 + * + * FIXME: Why do we use the requirement x < q-1 ? It should be + * sufficient to test for x < q. FIPS-186-3 check x < q-1 but it + * does not check for 0 < x because it makes sure that Q is unsigned + * and finally adds one to the result so that 0 will never be + * returned. We should replace the code below with _gcry_dsa_gen_k. + * + * This must be a very good random number because this is the secret + * part. The random quality depends on the transient_key flag. */ + random_level = transient_key ? GCRY_STRONG_RANDOM : GCRY_VERY_STRONG_RANDOM; + if (DBG_CIPHER) + log_debug("choosing a random x%s\n", transient_key? " (transient-key)":""); + gcry_assert( qbits >= 160 ); + x = mpi_alloc_secure( mpi_get_nlimbs(q) ); + mpi_sub_ui( h, q, 1 ); /* put q-1 into h */ + rndbuf = NULL; + do + { + if( DBG_CIPHER ) + progress('.'); + if( !rndbuf ) + rndbuf = _gcry_random_bytes_secure ((qbits+7)/8, random_level); + else + { /* Change only some of the higher bits (= 2 bytes)*/ + char *r = _gcry_random_bytes_secure (2, random_level); + memcpy(rndbuf, r, 2 ); + xfree(r); + } + + _gcry_mpi_set_buffer( x, rndbuf, (qbits+7)/8, 0 ); + mpi_clear_highbit( x, qbits+1 ); + } + while ( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, h )<0 ) ); + xfree(rndbuf); + mpi_free( e ); + mpi_free( h ); + + /* y = g^x mod p */ + y = mpi_alloc( mpi_get_nlimbs(p) ); + mpi_powm (y, g, x, p); + + if( DBG_CIPHER ) + { + progress('\n'); + log_mpidump("dsa p", p ); + log_mpidump("dsa q", q ); + log_mpidump("dsa g", g ); + log_mpidump("dsa y", y ); + log_mpidump("dsa x", x ); + } + + /* Copy the stuff to the key structures. */ + sk->p = p; + sk->q = q; + sk->g = g; + sk->y = y; + sk->x = x; + + /* Now we can test our keys (this should never fail!). */ + if ( test_keys (sk, qbits) ) + { + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->g); sk->g = NULL; + _gcry_mpi_release (sk->y); sk->y = NULL; + _gcry_mpi_release (sk->x); sk->x = NULL; + fips_signal_error ("self-test after key generation failed"); + return GPG_ERR_SELFTEST_FAILED; + } + return 0; +} + + +/* Generate a DSA key pair with a key of size NBITS using the + algorithm given in FIPS-186-3. If USE_FIPS186_2 is true, + FIPS-186-2 is used and thus the length is restricted to 1024/160. + If DERIVEPARMS is not NULL it may contain a seed value. If domain + parameters are specified in DOMAIN, DERIVEPARMS may not be given + and NBITS and QBITS must match the specified domain parameters. */ +static gpg_err_code_t +generate_fips186 (DSA_secret_key *sk, unsigned int nbits, unsigned int qbits, + gcry_sexp_t deriveparms, int use_fips186_2, + dsa_domain_t *domain, + int *r_counter, void **r_seed, size_t *r_seedlen, + gcry_mpi_t *r_h) +{ + gpg_err_code_t ec; + struct { + gcry_sexp_t sexp; + const void *seed; + size_t seedlen; + } initial_seed = { NULL, NULL, 0 }; + gcry_mpi_t prime_q = NULL; + gcry_mpi_t prime_p = NULL; + gcry_mpi_t value_g = NULL; /* The generator. */ + gcry_mpi_t value_y = NULL; /* g^x mod p */ + gcry_mpi_t value_x = NULL; /* The secret exponent. */ + gcry_mpi_t value_h = NULL; /* Helper. */ + gcry_mpi_t value_e = NULL; /* Helper. */ + gcry_mpi_t value_c = NULL; /* helper for x */ + gcry_mpi_t value_qm2 = NULL; /* q - 2 */ + + /* Preset return values. */ + *r_counter = 0; + *r_seed = NULL; + *r_seedlen = 0; + *r_h = NULL; + + /* Derive QBITS from NBITS if requested */ + if (!qbits) + { + if (nbits == 1024) + qbits = 160; + else if (nbits == 2048) + qbits = 224; + else if (nbits == 3072) + qbits = 256; + } + + /* Check that QBITS and NBITS match the standard. Note that FIPS + 186-3 uses N for QBITS and L for NBITS. */ + if (nbits == 1024 && qbits == 160 && use_fips186_2) + ; /* Allowed in FIPS 186-2 mode. */ + else if (nbits == 2048 && qbits == 224) + ; + else if (nbits == 2048 && qbits == 256) + ; + else if (nbits == 3072 && qbits == 256) + ; + else + return GPG_ERR_INV_VALUE; + + if (domain->p && domain->q && domain->g) + { + /* Domain parameters are given; use them. */ + prime_p = mpi_copy (domain->p); + prime_q = mpi_copy (domain->q); + value_g = mpi_copy (domain->g); + gcry_assert (mpi_get_nbits (prime_p) == nbits); + gcry_assert (mpi_get_nbits (prime_q) == qbits); + gcry_assert (!deriveparms); + ec = 0; + } + else + { + /* Generate new domain parameters. */ + + /* Get an initial seed value. */ + if (deriveparms) + { + initial_seed.sexp = sexp_find_token (deriveparms, "seed", 0); + if (initial_seed.sexp) + initial_seed.seed = sexp_nth_data (initial_seed.sexp, 1, + &initial_seed.seedlen); + } + + if (use_fips186_2) + ec = _gcry_generate_fips186_2_prime (nbits, qbits, + initial_seed.seed, + initial_seed.seedlen, + &prime_q, &prime_p, + r_counter, + r_seed, r_seedlen); + else + ec = _gcry_generate_fips186_3_prime (nbits, qbits, + initial_seed.seed, + initial_seed.seedlen, + &prime_q, &prime_p, + r_counter, + r_seed, r_seedlen, NULL); + sexp_release (initial_seed.sexp); + if (ec) + goto leave; + + /* Find a generator g (h and e are helpers). + * e = (p-1)/q + */ + value_e = mpi_alloc_like (prime_p); + mpi_sub_ui (value_e, prime_p, 1); + mpi_fdiv_q (value_e, value_e, prime_q ); + value_g = mpi_alloc_like (prime_p); + value_h = mpi_alloc_set_ui (1); + do + { + mpi_add_ui (value_h, value_h, 1); + /* g = h^e mod p */ + mpi_powm (value_g, value_h, value_e, prime_p); + } + while (!mpi_cmp_ui (value_g, 1)); /* Continue until g != 1. */ + } + + value_c = mpi_snew (qbits); + value_x = mpi_snew (qbits); + value_qm2 = mpi_snew (qbits); + mpi_sub_ui (value_qm2, prime_q, 2); + + /* FIPS 186-4 B.1.2 steps 4-6 */ + do + { + if( DBG_CIPHER ) + progress('.'); + _gcry_mpi_randomize (value_c, qbits, GCRY_VERY_STRONG_RANDOM); + mpi_clear_highbit (value_c, qbits+1); + } + while (!(mpi_cmp_ui (value_c, 0) > 0 && mpi_cmp (value_c, value_qm2) < 0)); + /* while (mpi_cmp (value_c, value_qm2) > 0); */ + + /* x = c + 1 */ + mpi_add_ui(value_x, value_c, 1); + + /* y = g^x mod p */ + value_y = mpi_alloc_like (prime_p); + mpi_powm (value_y, value_g, value_x, prime_p); + + if (DBG_CIPHER) + { + progress('\n'); + log_mpidump("dsa p", prime_p ); + log_mpidump("dsa q", prime_q ); + log_mpidump("dsa g", value_g ); + log_mpidump("dsa y", value_y ); + log_mpidump("dsa x", value_x ); + log_mpidump("dsa h", value_h ); + } + + /* Copy the stuff to the key structures. */ + sk->p = prime_p; prime_p = NULL; + sk->q = prime_q; prime_q = NULL; + sk->g = value_g; value_g = NULL; + sk->y = value_y; value_y = NULL; + sk->x = value_x; value_x = NULL; + *r_h = value_h; value_h = NULL; + + leave: + _gcry_mpi_release (prime_p); + _gcry_mpi_release (prime_q); + _gcry_mpi_release (value_g); + _gcry_mpi_release (value_y); + _gcry_mpi_release (value_x); + _gcry_mpi_release (value_h); + _gcry_mpi_release (value_e); + _gcry_mpi_release (value_c); + _gcry_mpi_release (value_qm2); + + /* As a last step test this keys (this should never fail of course). */ + if (!ec && test_keys (sk, qbits) ) + { + _gcry_mpi_release (sk->p); sk->p = NULL; + _gcry_mpi_release (sk->q); sk->q = NULL; + _gcry_mpi_release (sk->g); sk->g = NULL; + _gcry_mpi_release (sk->y); sk->y = NULL; + _gcry_mpi_release (sk->x); sk->x = NULL; + fips_signal_error ("self-test after key generation failed"); + ec = GPG_ERR_SELFTEST_FAILED; + } + + if (ec) + { + *r_counter = 0; + xfree (*r_seed); *r_seed = NULL; + *r_seedlen = 0; + _gcry_mpi_release (*r_h); *r_h = NULL; + } + + return ec; +} + + + +/* + Test whether the secret key is valid. + Returns: if this is a valid key. + */ +static int +check_secret_key( DSA_secret_key *sk ) +{ + int rc; + gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) ); + + mpi_powm( y, sk->g, sk->x, sk->p ); + rc = !mpi_cmp( y, sk->y ); + mpi_free( y ); + return rc; +} + + + +/* + Make a DSA signature from INPUT and put it into r and s. + + INPUT may either be a plain MPI or an opaque MPI which is then + internally converted to a plain MPI. FLAGS and HASHALGO may both + be 0 for standard operation mode. + + The return value is 0 on success or an error code. Note that for + backward compatibility the function will not return any error if + FLAGS and HASHALGO are both 0 and INPUT is a plain MPI. + */ +static gpg_err_code_t +sign (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_secret_key *skey, + int flags, int hashalgo) +{ + gpg_err_code_t rc; + gcry_mpi_t hash; + gcry_mpi_t k; + gcry_mpi_t kinv; + gcry_mpi_t tmp; + const void *abuf; + unsigned int abits, qbits; + int extraloops = 0; + + qbits = mpi_get_nbits (skey->q); + + /* Convert the INPUT into an MPI. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + again: + /* Create the K value. */ + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this flag is + set, it is expected that HASH is an opaque MPI with the to be + signed hash. That hash is also used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, skey->q, skey->x, + abuf, (abits+7)/8, hashalgo, extraloops); + if (rc) + goto leave; + } + else + { + /* Select a random k with 0 < k < q */ + k = _gcry_dsa_gen_k (skey->q, GCRY_STRONG_RANDOM); + } + + /* kinv = k^(-1) mod q */ + kinv = mpi_alloc( mpi_get_nlimbs(k) ); + mpi_invm(kinv, k, skey->q ); + + _gcry_dsa_modify_k (k, skey->q, qbits); + + /* r = (a^k mod p) mod q */ + mpi_powm( r, skey->g, k, skey->p ); + mpi_fdiv_r( r, r, skey->q ); + + /* s = (kinv * ( hash + x * r)) mod q */ + tmp = mpi_alloc( mpi_get_nlimbs(skey->p) ); + mpi_mul( tmp, skey->x, r ); + mpi_add( tmp, tmp, hash ); + mpi_mulm( s , kinv, tmp, skey->q ); + + mpi_free(k); + mpi_free(kinv); + mpi_free(tmp); + + if (!mpi_cmp_ui (r, 0)) + { + /* This is a highly unlikely code path. */ + extraloops++; + goto again; + } + + rc = 0; + + leave: + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* + Returns true if the signature composed from R and S is valid. + */ +static gpg_err_code_t +verify (gcry_mpi_t r, gcry_mpi_t s, gcry_mpi_t input, DSA_public_key *pkey ) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t w, u1, u2, v; + gcry_mpi_t base[3]; + gcry_mpi_t ex[3]; + gcry_mpi_t hash; + unsigned int nbits; + + if( !(mpi_cmp_ui( r, 0 ) > 0 && mpi_cmp( r, pkey->q ) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui( s, 0 ) > 0 && mpi_cmp( s, pkey->q ) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ + + nbits = mpi_get_nbits (pkey->q); + rc = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (rc) + return rc; + + w = mpi_alloc( mpi_get_nlimbs(pkey->q) ); + u1 = mpi_alloc( mpi_get_nlimbs(pkey->q) ); + u2 = mpi_alloc( mpi_get_nlimbs(pkey->q) ); + v = mpi_alloc( mpi_get_nlimbs(pkey->p) ); + + /* w = s^(-1) mod q */ + mpi_invm( w, s, pkey->q ); + + /* u1 = (hash * w) mod q */ + mpi_mulm( u1, hash, w, pkey->q ); + + /* u2 = r * w mod q */ + mpi_mulm( u2, r, w, pkey->q ); + + /* v = g^u1 * y^u2 mod p mod q */ + base[0] = pkey->g; ex[0] = u1; + base[1] = pkey->y; ex[1] = u2; + base[2] = NULL; ex[2] = NULL; + mpi_mulpowm( v, base, ex, pkey->p ); + mpi_fdiv_r( v, v, pkey->q ); + + if (mpi_cmp( v, r )) + { + if (DBG_CIPHER) + { + log_mpidump (" i", input); + log_mpidump (" h", hash); + log_mpidump (" v", v); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + rc = GPG_ERR_BAD_SIGNATURE; + } + + mpi_free(w); + mpi_free(u1); + mpi_free(u2); + mpi_free(v); + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gcry_err_code_t +dsa_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t rc; + unsigned int nbits; + gcry_sexp_t domainsexp; + DSA_secret_key sk; + gcry_sexp_t l1; + unsigned int qbits = 0; + gcry_sexp_t deriveparms = NULL; + gcry_sexp_t seedinfo = NULL; + gcry_sexp_t misc_info = NULL; + int flags = 0; + dsa_domain_t domain; + gcry_mpi_t *factors = NULL; + + memset (&sk, 0, sizeof sk); + memset (&domain, 0, sizeof domain); + + rc = _gcry_pk_util_get_nbits (genparms, &nbits); + if (rc) + return rc; + + /* Parse the optional flags list. */ + l1 = sexp_find_token (genparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + sexp_release (l1); + if (rc) + return rc;\ + } + + /* Parse the optional qbits element. */ + l1 = sexp_find_token (genparms, "qbits", 0); + if (l1) + { + char buf[50]; + const char *s; + size_t n; + + s = sexp_nth_data (l1, 1, &n); + if (!s || n >= DIM (buf) - 1 ) + { + sexp_release (l1); + return GPG_ERR_INV_OBJ; /* No value or value too large. */ + } + memcpy (buf, s, n); + buf[n] = 0; + qbits = (unsigned int)strtoul (buf, NULL, 0); + sexp_release (l1); + } + + /* Parse the optional transient-key flag. */ + if (!(flags & PUBKEY_FLAG_TRANSIENT_KEY)) + { + l1 = sexp_find_token (genparms, "transient-key", 0); + if (l1) + { + flags |= PUBKEY_FLAG_TRANSIENT_KEY; + sexp_release (l1); + } + } + + /* Get the optional derive parameters. */ + deriveparms = sexp_find_token (genparms, "derive-parms", 0); + + /* Parse the optional "use-fips186" flags. */ + if (!(flags & PUBKEY_FLAG_USE_FIPS186)) + { + l1 = sexp_find_token (genparms, "use-fips186", 0); + if (l1) + { + flags |= PUBKEY_FLAG_USE_FIPS186; + sexp_release (l1); + } + } + if (!(flags & PUBKEY_FLAG_USE_FIPS186_2)) + { + l1 = sexp_find_token (genparms, "use-fips186-2", 0); + if (l1) + { + flags |= PUBKEY_FLAG_USE_FIPS186_2; + sexp_release (l1); + } + } + + /* Check whether domain parameters are given. */ + domainsexp = sexp_find_token (genparms, "domain", 0); + if (domainsexp) + { + /* DERIVEPARMS can't be used together with domain parameters. + NBITS abnd QBITS may not be specified because there values + are derived from the domain parameters. */ + if (deriveparms || qbits || nbits) + { + sexp_release (domainsexp); + sexp_release (deriveparms); + return GPG_ERR_INV_VALUE; + } + + /* Put all domain parameters into the domain object. */ + l1 = sexp_find_token (domainsexp, "p", 0); + domain.p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + l1 = sexp_find_token (domainsexp, "q", 0); + domain.q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + l1 = sexp_find_token (domainsexp, "g", 0); + domain.g = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + sexp_release (domainsexp); + + /* Check that all domain parameters are available. */ + if (!domain.p || !domain.q || !domain.g) + { + _gcry_mpi_release (domain.p); + _gcry_mpi_release (domain.q); + _gcry_mpi_release (domain.g); + sexp_release (deriveparms); + return GPG_ERR_MISSING_VALUE; + } + + /* Get NBITS and QBITS from the domain parameters. */ + nbits = mpi_get_nbits (domain.p); + qbits = mpi_get_nbits (domain.q); + } + + if (deriveparms + || (flags & PUBKEY_FLAG_USE_FIPS186) + || (flags & PUBKEY_FLAG_USE_FIPS186_2) + || fips_mode ()) + { + int counter; + void *seed; + size_t seedlen; + gcry_mpi_t h_value; + + rc = generate_fips186 (&sk, nbits, qbits, deriveparms, + !!(flags & PUBKEY_FLAG_USE_FIPS186_2), + &domain, + &counter, &seed, &seedlen, &h_value); + if (!rc && h_value) + { + /* Format the seed-values unless domain parameters are used + for which a H_VALUE of NULL is an indication. */ + rc = sexp_build (&seedinfo, NULL, + "(seed-values(counter %d)(seed %b)(h %m))", + counter, (int)seedlen, seed, h_value); + xfree (seed); + _gcry_mpi_release (h_value); + } + } + else + { + rc = generate (&sk, nbits, qbits, + !!(flags & PUBKEY_FLAG_TRANSIENT_KEY), + &domain, &factors); + } + + if (!rc) + { + /* Put the factors into MISC_INFO. Note that the factors are + not confidential thus we can store them in standard memory. */ + int nfactors, i, j; + char *p; + char *format = NULL; + void **arg_list = NULL; + + for (nfactors=0; factors && factors[nfactors]; nfactors++) + ; + /* Allocate space for the format string: + "(misc-key-info%S(pm1-factors%m))" + with one "%m" for each factor and construct it. */ + format = xtrymalloc (50 + 2*nfactors); + if (!format) + rc = gpg_err_code_from_syserror (); + else + { + p = stpcpy (format, "(misc-key-info"); + if (seedinfo) + p = stpcpy (p, "%S"); + if (nfactors) + { + p = stpcpy (p, "(pm1-factors"); + for (i=0; i < nfactors; i++) + p = stpcpy (p, "%m"); + p = stpcpy (p, ")"); + } + p = stpcpy (p, ")"); + + /* Allocate space for the list of factors plus one for the + seedinfo s-exp plus an extra NULL entry for safety and + fill it with the factors. */ + arg_list = xtrycalloc (nfactors+1+1, sizeof *arg_list); + if (!arg_list) + rc = gpg_err_code_from_syserror (); + else + { + i = 0; + if (seedinfo) + arg_list[i++] = &seedinfo; + for (j=0; j < nfactors; j++) + arg_list[i++] = factors + j; + arg_list[i] = NULL; + + rc = sexp_build_array (&misc_info, NULL, format, arg_list); + } + } + + xfree (arg_list); + xfree (format); + } + + if (!rc) + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (dsa(p%m)(q%m)(g%m)(y%m)))" + " (private-key" + " (dsa(p%m)(q%m)(g%m)(y%m)(x%m)))" + " %S)", + sk.p, sk.q, sk.g, sk.y, + sk.p, sk.q, sk.g, sk.y, sk.x, + misc_info); + + + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + + _gcry_mpi_release (domain.p); + _gcry_mpi_release (domain.q); + _gcry_mpi_release (domain.g); + + sexp_release (seedinfo); + sexp_release (misc_info); + sexp_release (deriveparms); + if (factors) + { + gcry_mpi_t *mp; + for (mp = factors; *mp; mp++) + mpi_free (*mp); + xfree (factors); + } + return rc; +} + + + +static gcry_err_code_t +dsa_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL}; + + rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx", + &sk.p, &sk.q, &sk.g, &sk.y, &sk.x, + NULL); + if (rc) + goto leave; + + if (!check_secret_key (&sk)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + if (DBG_CIPHER) + log_debug ("dsa_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +dsa_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + DSA_secret_key sk = {NULL, NULL, NULL, NULL, NULL}; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, + dsa_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("dsa_sign data", data); + + /* Extract the key. */ + rc = _gcry_sexp_extract_param (keyparms, NULL, "pqgyx", + &sk.p, &sk.q, &sk.g, &sk.y, &sk.x, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_sign p", sk.p); + log_mpidump ("dsa_sign q", sk.q); + log_mpidump ("dsa_sign g", sk.g); + log_mpidump ("dsa_sign y", sk.y); + if (!fips_mode ()) + log_mpidump ("dsa_sign x", sk.x); + } + + sig_r = mpi_new (0); + sig_s = mpi_new (0); + rc = sign (sig_r, sig_s, data, &sk, ctx.flags, ctx.hash_algo); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_sign sig_r", sig_r); + log_mpidump ("dsa_sign sig_s", sig_s); + } + rc = sexp_build (r_sig, NULL, "(sig-val(dsa(r%M)(s%M)))", sig_r, sig_s); + + leave: + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.q); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("dsa_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +dsa_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + gcry_mpi_t data = NULL; + DSA_public_key pk = { NULL, NULL, NULL, NULL }; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + dsa_get_nbits (s_keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("dsa_verify data", data); + + /* Extract the signature value. */ + rc = _gcry_pk_util_preparse_sigval (s_sig, dsa_names, &l1, NULL); + if (rc) + goto leave; + rc = _gcry_sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_verify s_r", sig_r); + log_mpidump ("dsa_verify s_s", sig_s); + } + + /* Extract the key. */ + rc = _gcry_sexp_extract_param (s_keyparms, NULL, "pqgy", + &pk.p, &pk.q, &pk.g, &pk.y, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("dsa_verify p", pk.p); + log_mpidump ("dsa_verify q", pk.q); + log_mpidump ("dsa_verify g", pk.g); + log_mpidump ("dsa_verify y", pk.y); + } + + /* Verify the signature. */ + rc = verify (sig_r, sig_s, data, &pk); + + leave: + _gcry_mpi_release (pk.p); + _gcry_mpi_release (pk.q); + _gcry_mpi_release (pk.g); + _gcry_mpi_release (pk.y); + _gcry_mpi_release (data); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("dsa_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (dsa + * (p ) + * (q ) + * (g ) + * (y )) + * + * More parameters may be given but we only need P here. + */ +static unsigned int +dsa_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t p; + unsigned int nbits; + + l1 = sexp_find_token (parms, "p", 1); + if (!l1) + return 0; /* Parameter P not found. */ + + p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + nbits = p? mpi_get_nbits (p) : 0; + _gcry_mpi_release (p); + return nbits; +} + + + +/* + Self-test section. + */ + +static const char * +selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey) +{ + /* Sample data from RFC 6979 section A.2.2, hash is of message "sample" */ + static const char sample_data[] = + "(data (flags rfc6979)" + " (hash sha256 #af2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))"; + static const char sample_data_bad[] = + "(data (flags rfc6979)" + " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e9891562113d8a62add1bf#))"; + static const char signature_r[] = + "eace8bdbbe353c432a795d9ec556c6d021f7a03f42c36e9bc87e4ac7932cc809"; + static const char signature_s[] = + "7081e175455f9247b812b74583e9e94f9ea79bd640dc962533b0680793a38d53"; + + const char *errtxt = NULL; + gcry_error_t err; + gcry_sexp_t data = NULL; + gcry_sexp_t data_bad = NULL; + gcry_sexp_t sig = NULL; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + gcry_mpi_t r = NULL; + gcry_mpi_t s = NULL; + gcry_mpi_t calculated_r = NULL; + gcry_mpi_t calculated_s = NULL; + int cmp; + + err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data)); + if (!err) + err = sexp_sscan (&data_bad, NULL, + sample_data_bad, strlen (sample_data_bad)); + if (!err) + err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL); + if (!err) + err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL); + + if (err) + { + errtxt = "converting data failed"; + goto leave; + } + + err = _gcry_pk_sign (&sig, data, skey); + if (err) + { + errtxt = "signing failed"; + goto leave; + } + + /* check against known signature */ + errtxt = "signature validity failed"; + l1 = _gcry_sexp_find_token (sig, "sig-val", 0); + if (!l1) + goto leave; + l2 = _gcry_sexp_find_token (l1, "dsa", 0); + if (!l2) + goto leave; + + sexp_release (l1); + l1 = l2; + + l2 = _gcry_sexp_find_token (l1, "r", 0); + if (!l2) + goto leave; + calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_r) + goto leave; + + sexp_release (l2); + l2 = _gcry_sexp_find_token (l1, "s", 0); + if (!l2) + goto leave; + calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_s) + goto leave; + + errtxt = "known sig check failed"; + + cmp = _gcry_mpi_cmp (r, calculated_r); + if (cmp) + goto leave; + cmp = _gcry_mpi_cmp (s, calculated_s); + if (cmp) + goto leave; + + errtxt = NULL; + + + err = _gcry_pk_verify (sig, data, pkey); + if (err) + { + errtxt = "verify failed"; + goto leave; + } + err = _gcry_pk_verify (sig, data_bad, pkey); + if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE) + { + errtxt = "bad signature not detected"; + goto leave; + } + + + leave: + _gcry_mpi_release (calculated_s); + _gcry_mpi_release (calculated_r); + _gcry_mpi_release (s); + _gcry_mpi_release (r); + sexp_release (l2); + sexp_release (l1); + sexp_release (sig); + sexp_release (data_bad); + sexp_release (data); + return errtxt; +} + + +static gpg_err_code_t +selftests_dsa_2048 (selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + gcry_error_t err; + gcry_sexp_t skey = NULL; + gcry_sexp_t pkey = NULL; + + /* Convert the S-expressions into the internal representation. */ + what = "convert"; + err = sexp_sscan (&skey, NULL, sample_secret_key_2048, strlen (sample_secret_key_2048)); + if (!err) + err = sexp_sscan (&pkey, NULL, + sample_public_key_2048, strlen (sample_public_key_2048)); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "key consistency"; + err = _gcry_pk_testkey (skey); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "sign"; + errtxt = selftest_sign (pkey, skey); + if (errtxt) + goto failed; + + sexp_release (pkey); + sexp_release (skey); + return 0; /* Succeeded. */ + + failed: + sexp_release (pkey); + sexp_release (skey); + if (report) + report ("pubkey", GCRY_PK_DSA, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + (void)extended; + + switch (algo) + { + case GCRY_PK_DSA: + ec = selftests_dsa_2048 (report); + break; + default: + ec = GPG_ERR_PUBKEY_ALGO; + break; + + } + return ec; +} + + + +gcry_pk_spec_t _gcry_pubkey_spec_dsa = + { + GCRY_PK_DSA, { 0, 1 }, + GCRY_PK_USAGE_SIGN, + "DSA", dsa_names, + "pqgy", "pqgyx", "", "rs", "pqgy", + dsa_generate, + dsa_check_secret_key, + NULL, + NULL, + dsa_sign, + dsa_verify, + dsa_get_nbits, + run_selftests + }; diff --git a/comm/third_party/libgcrypt/cipher/ecc-common.h b/comm/third_party/libgcrypt/cipher/ecc-common.h new file mode 100644 index 0000000000..25c3111263 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-common.h @@ -0,0 +1,140 @@ +/* ecc-common.h - Declarations of common ECC code + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_ECC_COMMON_H +#define GCRY_ECC_COMMON_H + + +/* Definition of a curve. */ +typedef struct +{ + enum gcry_mpi_ec_models model;/* The model descrinbing this curve. */ + enum ecc_dialects dialect; /* The dialect used with the curve. */ + gcry_mpi_t p; /* Prime specifying the field GF(p). */ + gcry_mpi_t a; /* First coefficient of the Weierstrass equation. */ + gcry_mpi_t b; /* Second coefficient of the Weierstrass equation. + or d as used by Twisted Edwards curves. */ + mpi_point_struct G; /* Base point (generator). */ + gcry_mpi_t n; /* Order of G. */ + unsigned int h; /* Cofactor. */ + const char *name; /* Name of the curve or NULL. */ +} elliptic_curve_t; + + + +/* Set the value from S into D. */ +static inline void +point_set (mpi_point_t d, mpi_point_t s) +{ + mpi_set (d->x, s->x); + mpi_set (d->y, s->y); + mpi_set (d->z, s->z); +} + +#define point_init(a) _gcry_mpi_point_init ((a)) +#define point_free(a) _gcry_mpi_point_free_parts ((a)) + + +/*-- ecc-curves.c --*/ +gpg_err_code_t _gcry_ecc_fill_in_curve (unsigned int nbits, + const char *name, + elliptic_curve_t *curve, + unsigned int *r_nbits); +gpg_err_code_t _gcry_ecc_update_curve_param (const char *name, + enum gcry_mpi_ec_models *model, + enum ecc_dialects *dialect, + gcry_mpi_t *p, gcry_mpi_t *a, + gcry_mpi_t *b, gcry_mpi_t *g, + gcry_mpi_t *n); + +const char *_gcry_ecc_get_curve (gcry_sexp_t keyparms, + int iterator, + unsigned int *r_nbits); +gcry_sexp_t _gcry_ecc_get_param_sexp (const char *name); + +/*-- ecc-misc.c --*/ +void _gcry_ecc_curve_free (elliptic_curve_t *E); +elliptic_curve_t _gcry_ecc_curve_copy (elliptic_curve_t E); +const char *_gcry_ecc_model2str (enum gcry_mpi_ec_models model); +const char *_gcry_ecc_dialect2str (enum ecc_dialects dialect); +gcry_mpi_t _gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p); + +mpi_point_t _gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_mont_encodepoint (gcry_mpi_t x, unsigned int nbits, + int with_prefix, + unsigned char **r_buffer, + unsigned int *r_buflen); + + +/*-- ecc.c --*/ + +/*-- ecc-ecdsa.c --*/ +gpg_err_code_t _gcry_ecc_ecdsa_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo); +gpg_err_code_t _gcry_ecc_ecdsa_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); + +/*-- ecc-eddsa.c --*/ +gpg_err_code_t _gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign, + mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ctx, + gcry_mpi_t x, gcry_mpi_t y, + int with_prefix, + unsigned char **r_buffer, + unsigned int *r_buflen); +gpg_err_code_t _gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value, + unsigned int nbits); + + +gpg_err_code_t _gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest, + mpi_ec_t ec); + +gpg_err_code_t _gcry_ecc_eddsa_genkey (mpi_ec_t ec, int flags); +gpg_err_code_t _gcry_ecc_eddsa_sign (gcry_mpi_t input, + mpi_ec_t ec, + gcry_mpi_t r_r, gcry_mpi_t s, + struct pk_encoding_ctx *ctx); +gpg_err_code_t _gcry_ecc_eddsa_verify (gcry_mpi_t input, + mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + struct pk_encoding_ctx *ctx); +void reverse_buffer (unsigned char *buffer, unsigned int length); + + +/*-- ecc-gost.c --*/ +gpg_err_code_t _gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); +gpg_err_code_t _gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); + + +/*-- ecc-sm2.c --*/ +gpg_err_code_t _gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, + gcry_mpi_t input, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, + gcry_sexp_t data_list, mpi_ec_t ec); +gpg_err_code_t _gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo); +gpg_err_code_t _gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s); + + +#endif /*GCRY_ECC_COMMON_H*/ diff --git a/comm/third_party/libgcrypt/cipher/ecc-curves.c b/comm/third_party/libgcrypt/cipher/ecc-curves.c new file mode 100644 index 0000000000..900b668aac --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-curves.c @@ -0,0 +1,1603 @@ +/* ecc-curves.c - Elliptic Curve parameter mangement + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "mpi-internal.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + + +static gpg_err_code_t +point_from_keyparam (gcry_mpi_point_t *r_a, + gcry_sexp_t keyparam, const char *name, mpi_ec_t ec); + +/* This tables defines aliases for curve names. */ +static const struct +{ + const char *name; /* Our name. */ + const char *other; /* Other name. */ +} curve_aliases[] = + { + { "Ed25519", "1.3.6.1.4.1.11591.15.1" }, /* OpenPGP */ + { "Ed25519", "1.3.101.112" }, /* rfc8410 */ + + { "Curve25519", "1.3.6.1.4.1.3029.1.5.1" }, /* OpenPGP */ + { "Curve25519", "1.3.101.110" }, /* rfc8410 */ + { "Curve25519", "X25519" }, /* rfc8410 */ + + { "Ed448", "1.3.101.113" }, /* rfc8410 */ + { "X448", "1.3.101.111" }, /* rfc8410 */ + + { "NIST P-192", "1.2.840.10045.3.1.1" }, /* X9.62 OID */ + { "NIST P-192", "prime192v1" }, /* X9.62 name. */ + { "NIST P-192", "secp192r1" }, /* SECP name. */ + { "NIST P-192", "nistp192" }, /* rfc5656. */ + + { "NIST P-224", "secp224r1" }, + { "NIST P-224", "1.3.132.0.33" }, /* SECP OID. */ + { "NIST P-224", "nistp224" }, /* rfc5656. */ + + { "NIST P-256", "1.2.840.10045.3.1.7" }, /* From NIST SP 800-78-1. */ + { "NIST P-256", "prime256v1" }, + { "NIST P-256", "secp256r1" }, + { "NIST P-256", "nistp256" }, /* rfc5656. */ + + { "NIST P-384", "secp384r1" }, + { "NIST P-384", "1.3.132.0.34" }, + { "NIST P-384", "nistp384" }, /* rfc5656. */ + + { "NIST P-521", "secp521r1" }, + { "NIST P-521", "1.3.132.0.35" }, + { "NIST P-521", "nistp521" }, /* rfc5656. */ + + { "brainpoolP160r1", "1.3.36.3.3.2.8.1.1.1" }, + { "brainpoolP192r1", "1.3.36.3.3.2.8.1.1.3" }, + { "brainpoolP224r1", "1.3.36.3.3.2.8.1.1.5" }, + { "brainpoolP256r1", "1.3.36.3.3.2.8.1.1.7" }, + { "brainpoolP320r1", "1.3.36.3.3.2.8.1.1.9" }, + { "brainpoolP384r1", "1.3.36.3.3.2.8.1.1.11"}, + { "brainpoolP512r1", "1.3.36.3.3.2.8.1.1.13"}, + + { "GOST2001-test", "1.2.643.2.2.35.0" }, + { "GOST2001-CryptoPro-A", "1.2.643.2.2.35.1" }, + { "GOST2001-CryptoPro-B", "1.2.643.2.2.35.2" }, + { "GOST2001-CryptoPro-C", "1.2.643.2.2.35.3" }, + { "GOST2001-CryptoPro-A", "GOST2001-CryptoPro-XchA" }, + { "GOST2001-CryptoPro-C", "GOST2001-CryptoPro-XchB" }, + { "GOST2001-CryptoPro-A", "1.2.643.2.2.36.0" }, + { "GOST2001-CryptoPro-C", "1.2.643.2.2.36.1" }, + + { "GOST2012-256-tc26-A", "1.2.643.7.1.2.1.1.1" }, + { "GOST2001-CryptoPro-A", "1.2.643.7.1.2.1.1.2" }, + { "GOST2001-CryptoPro-A", "GOST2012-256-tc26-B" }, + { "GOST2001-CryptoPro-B", "1.2.643.7.1.2.1.1.3" }, + { "GOST2001-CryptoPro-B", "GOST2012-256-tc26-C" }, + { "GOST2001-CryptoPro-C", "1.2.643.7.1.2.1.1.4" }, + { "GOST2001-CryptoPro-C", "GOST2012-256-tc26-D" }, + + { "GOST2012-512-test", "GOST2012-test" }, + { "GOST2012-512-test", "1.2.643.7.1.2.1.2.0" }, + { "GOST2012-512-tc26-A", "GOST2012-tc26-A" }, + { "GOST2012-512-tc26-B", "GOST2012-tc26-B" }, + { "GOST2012-512-tc26-A", "1.2.643.7.1.2.1.2.1" }, + { "GOST2012-512-tc26-B", "1.2.643.7.1.2.1.2.2" }, + { "GOST2012-512-tc26-C", "1.2.643.7.1.2.1.2.3" }, + + { "secp256k1", "1.3.132.0.10" }, + + { "sm2p256v1", "1.2.156.10197.1.301" }, + + { NULL, NULL} + }; + + +typedef struct +{ + const char *desc; /* Description of the curve. */ + unsigned int nbits; /* Number of bits. */ + unsigned int fips:1; /* True if this is a FIPS140-2 approved curve. */ + + /* The model describing this curve. This is mainly used to select + the group equation. */ + enum gcry_mpi_ec_models model; + + /* The actual ECC dialect used. This is used for curve specific + optimizations and to select encodings etc. */ + enum ecc_dialects dialect; + + const char *p; /* The prime defining the field. */ + const char *a, *b; /* The coefficients. For Twisted Edwards + Curves b is used for d. For Montgomery + Curves (a,b) has ((A-2)/4,B^-1). */ + const char *n; /* The order of the base point. */ + const char *g_x, *g_y; /* Base point. */ + unsigned int h; /* Cofactor. */ +} ecc_domain_parms_t; + + +/* This static table defines all available curves. */ +static const ecc_domain_parms_t domain_parms[] = + { + { + /* (-x^2 + y^2 = 1 + dx^2y^2) */ + "Ed25519", 255, 0, + MPI_EC_EDWARDS, ECC_DIALECT_ED25519, + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + "-0x01", + "-0x2DFC9311D490018C7338BF8688861767FF8FF5B2BEBE27548A14B235ECA6874A", + "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED", + "0x216936D3CD6E53FEC0A4E231FDD6DC5C692CC7609525A7B2C9562D608F25D51A", + "0x6666666666666666666666666666666666666666666666666666666666666658", + 8 + }, + { + /* (y^2 = x^3 + 486662*x^2 + x) */ + "Curve25519", 255, 0, + MPI_EC_MONTGOMERY, ECC_DIALECT_STANDARD, + "0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED", + "0x01DB41", + "0x01", + "0x1000000000000000000000000000000014DEF9DEA2F79CD65812631A5CF5D3ED", + "0x0000000000000000000000000000000000000000000000000000000000000009", + "0x20AE19A1B8A086B4E01EDD2C7748D14C923D4D7E6D7C61B229E9C5A27ECED3D9", + 8 + /* Note: As per RFC-7748 errata eid4730 the g_y value should be + * "0x5F51E65E475F794B1FE122D388B72EB36DC2B28192839E4DD6163A5D81312C14" + * but that breaks the keygrip. The new value is recovered in + * the function _gcry_ecc_fill_in_curve. See bug #4712. + */ + }, + { + /* (x^2 + y^2 = 1 + dx^2y^2) */ + "Ed448", 448, 0, + MPI_EC_EDWARDS, ECC_DIALECT_SAFECURVE, + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + "0x01", + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF6756", + "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "7CCA23E9C44EDB49AED63690216CC2728DC58F552378C292AB5844F3", + "0x4F1970C66BED0DED221D15A622BF36DA9E146570470F1767EA6DE324" + "A3D3A46412AE1AF72AB66511433B80E18B00938E2626A82BC70CC05E", + "0x693F46716EB6BC248876203756C9C7624BEA73736CA3984087789C1E" + "05A0C2D73AD3FF1CE67C39C4FDBD132C4ED7C8AD9808795BF230FA14", + 4, + }, + { + /* (y^2 = x^3 + 156326*x^2 + x) */ + "X448", 448, 0, + MPI_EC_MONTGOMERY, ECC_DIALECT_SAFECURVE, + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", + "0x98A9", + "0x01", + "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "7CCA23E9C44EDB49AED63690216CC2728DC58F552378C292AB5844F3", + "0x00000000000000000000000000000000000000000000000000000000" + "00000000000000000000000000000000000000000000000000000005", + "0x7D235D1295F5B1F66C98AB6E58326FCECBAE5D34F55545D060F75DC2" + "8DF3F6EDB8027E2346430D211312C4B150677AF76FD7223D457B5B1A", + 4, + }, +#if 0 /* No real specs yet found. */ + { + /* x^2 + y^2 = 1 + 3617x^2y^2 mod 2^414 - 17 */ + "Curve3617", + "0x3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEF", + MPI_EC_EDWARDS, 0, + "0x01", + "0x0e21", + "0x07FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEB3CC92414CF" + "706022B36F1C0338AD63CF181B0E71A5E106AF79", + "0x1A334905141443300218C0631C326E5FCD46369F44C03EC7F57FF35498A4AB4D" + "6D6BA111301A73FAA8537C64C4FD3812F3CBC595", + "0x22", + 8 + }, +#endif /*0*/ + { + "NIST P-192", 192, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffeffffffffffffffff", + "0xfffffffffffffffffffffffffffffffefffffffffffffffc", + "0x64210519e59c80e70fa7e9ab72243049feb8deecc146b9b1", + "0xffffffffffffffffffffffff99def836146bc9b1b4d22831", + + "0x188da80eb03090f67cbf20eb43a18800f4ff0afd82ff1012", + "0x07192b95ffc8da78631011ed6b24cdd573f977a11e794811", + 1 + }, + { + "NIST P-224", 224, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffffffffffffffffffffffffffff000000000000000000000001", + "0xfffffffffffffffffffffffffffffffefffffffffffffffffffffffe", + "0xb4050a850c04b3abf54132565044b0b7d7bfd8ba270b39432355ffb4", + "0xffffffffffffffffffffffffffff16a2e0b8f03e13dd29455c5c2a3d" , + + "0xb70e0cbd6bb4bf7f321390b94a03c1d356c21122343280d6115c1d21", + "0xbd376388b5f723fb4c22dfe6cd4375a05a07476444d5819985007e34", + 1 + }, + { + "NIST P-256", 256, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffff00000001000000000000000000000000ffffffffffffffffffffffff", + "0xffffffff00000001000000000000000000000000fffffffffffffffffffffffc", + "0x5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", + "0xffffffff00000000ffffffffffffffffbce6faada7179e84f3b9cac2fc632551", + + "0x6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", + "0x4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", + 1 + }, + { + "NIST P-384", 384, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "ffffffff0000000000000000ffffffff", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffe" + "ffffffff0000000000000000fffffffc", + "0xb3312fa7e23ee7e4988e056be3f82d19181d9c6efe8141120314088f5013875a" + "c656398d8a2ed19d2a85c8edd3ec2aef", + "0xffffffffffffffffffffffffffffffffffffffffffffffffc7634d81f4372ddf" + "581a0db248b0a77aecec196accc52973", + + "0xaa87ca22be8b05378eb1c71ef320ad746e1d3b628ba79b9859f741e082542a38" + "5502f25dbf55296c3a545e3872760ab7", + "0x3617de4a96262c6f5d9e98bf9292dc29f8f41dbd289a147ce9da3113b5f0b8c0" + "0a60b1ce1d7e819d7a431d7c90ea0e5f", + 1 + }, + { + "NIST P-521", 521, 1, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff", + "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffc", + "0x051953eb9618e1c9a1f929a21a0b68540eea2da725b99b315f3b8b489918ef10" + "9e156193951ec7e937b1652c0bd3bb1bf073573df883d2c34f1ef451fd46b503f00", + "0x01ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffa51868783bf2f966b7fcc0148f709a5d03bb5c9b8899c47aebb6fb71e91386409", + + "0x00c6858e06b70404e9cd9e3ecb662395b4429c648139053fb521f828af606b4d" + "3dbaa14b5e77efe75928fe1dc127a2ffa8de3348b3c1856a429bf97e7e31c2e5bd66", + "0x011839296a789a3bc0045c8a5fb42c7d1bd998f54449579b446817afbd17273e" + "662c97ee72995ef42640c550b9013fad0761353c7086a272c24088be94769fd16650", + 1 + }, + + { "brainpoolP160r1", 160, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xe95e4a5f737059dc60dfc7ad95b3d8139515620f", + "0x340e7be2a280eb74e2be61bada745d97e8f7c300", + "0x1e589a8595423412134faa2dbdec95c8d8675e58", + "0xe95e4a5f737059dc60df5991d45029409e60fc09", + "0xbed5af16ea3f6a4f62938c4631eb5af7bdbcdbc3", + "0x1667cb477a1a8ec338f94741669c976316da6321", + 1 + }, + + { "brainpoolP192r1", 192, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xc302f41d932a36cda7a3463093d18db78fce476de1a86297", + "0x6a91174076b1e0e19c39c031fe8685c1cae040e5c69a28ef", + "0x469a28ef7c28cca3dc721d044f4496bcca7ef4146fbf25c9", + "0xc302f41d932a36cda7a3462f9e9e916b5be8f1029ac4acc1", + "0xc0a0647eaab6a48753b033c56cb0f0900a2f5c4853375fd6", + "0x14b690866abd5bb88b5f4828c1490002e6773fa2fa299b8f", + 1 + }, + + { "brainpoolP224r1", 224, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xd7c134aa264366862a18302575d1d787b09f075797da89f57ec8c0ff", + "0x68a5e62ca9ce6c1c299803a6c1530b514e182ad8b0042a59cad29f43", + "0x2580f63ccfe44138870713b1a92369e33e2135d266dbb372386c400b", + "0xd7c134aa264366862a18302575d0fb98d116bc4b6ddebca3a5a7939f", + "0x0d9029ad2c7e5cf4340823b2a87dc68c9e4ce3174c1e6efdee12c07d", + "0x58aa56f772c0726f24c6b89e4ecdac24354b9e99caa3f6d3761402cd", + 1 + }, + + { "brainpoolP256r1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xa9fb57dba1eea9bc3e660a909d838d726e3bf623d52620282013481d1f6e5377", + "0x7d5a0975fc2c3057eef67530417affe7fb8055c126dc5c6ce94a4b44f330b5d9", + "0x26dc5c6ce94a4b44f330b5d9bbd77cbf958416295cf7e1ce6bccdc18ff8c07b6", + "0xa9fb57dba1eea9bc3e660a909d838d718c397aa3b561a6f7901e0e82974856a7", + "0x8bd2aeb9cb7e57cb2c4b482ffc81b7afb9de27e1e3bd23c23a4453bd9ace3262", + "0x547ef835c3dac4fd97f8461a14611dc9c27745132ded8e545c1d54c72f046997", + 1 + }, + + { "brainpoolP320r1", 320, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xd35e472036bc4fb7e13c785ed201e065f98fcfa6f6f40def4f92b9ec7893ec28" + "fcd412b1f1b32e27", + "0x3ee30b568fbab0f883ccebd46d3f3bb8a2a73513f5eb79da66190eb085ffa9f4" + "92f375a97d860eb4", + "0x520883949dfdbc42d3ad198640688a6fe13f41349554b49acc31dccd88453981" + "6f5eb4ac8fb1f1a6", + "0xd35e472036bc4fb7e13c785ed201e065f98fcfa5b68f12a32d482ec7ee8658e9" + "8691555b44c59311", + "0x43bd7e9afb53d8b85289bcc48ee5bfe6f20137d10a087eb6e7871e2a10a599c7" + "10af8d0d39e20611", + "0x14fdd05545ec1cc8ab4093247f77275e0743ffed117182eaa9c77877aaac6ac7" + "d35245d1692e8ee1", + 1 + }, + + { "brainpoolP384r1", 384, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b412b1da197fb71123" + "acd3a729901d1a71874700133107ec53", + "0x7bc382c63d8c150c3c72080ace05afa0c2bea28e4fb22787139165efba91f90f" + "8aa5814a503ad4eb04a8c7dd22ce2826", + "0x04a8c7dd22ce28268b39b55416f0447c2fb77de107dcd2a62e880ea53eeb62d5" + "7cb4390295dbc9943ab78696fa504c11", + "0x8cb91e82a3386d280f5d6f7e50e641df152f7109ed5456b31f166e6cac0425a7" + "cf3ab6af6b7fc3103b883202e9046565", + "0x1d1c64f068cf45ffa2a63a81b7c13f6b8847a3e77ef14fe3db7fcafe0cbd10e8" + "e826e03436d646aaef87b2e247d4af1e", + "0x8abe1d7520f9c2a45cb1eb8e95cfd55262b70b29feec5864e19c054ff9912928" + "0e4646217791811142820341263c5315", + 1 + }, + + { "brainpoolP512r1", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330871" + "7d4d9b009bc66842aecda12ae6a380e62881ff2f2d82c68528aa6056583a48f3", + "0x7830a3318b603b89e2327145ac234cc594cbdd8d3df91610a83441caea9863bc" + "2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a72bf2c7b9e7c1ac4d77fc94ca", + "0x3df91610a83441caea9863bc2ded5d5aa8253aa10a2ef1c98b9ac8b57f1117a7" + "2bf2c7b9e7c1ac4d77fc94cadc083e67984050b75ebae5dd2809bd638016f723", + "0xaadd9db8dbe9c48b3fd4e6ae33c9fc07cb308db3b3c9d20ed6639cca70330870" + "553e5c414ca92619418661197fac10471db1d381085ddaddb58796829ca90069", + "0x81aee4bdd82ed9645a21322e9c4c6a9385ed9f70b5d916c1b43b62eef4d0098e" + "ff3b1f78e2d0d48d50d1687b93b97d5f7c6d5047406a5e688b352209bcb9f822", + "0x7dde385d566332ecc0eabfa9cf7822fdf209f70024a57b1aa000c55b881f8111" + "b2dcde494a5f485e5bca4bd88a2763aed1ca2b2fa8f0540678cd1e0f3ad80892", + 1 + }, + { + "GOST2001-test", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8000000000000000000000000000000000000000000000000000000000000431", + "0x0000000000000000000000000000000000000000000000000000000000000007", + "0x5fbff498aa938ce739b8e022fbafef40563f6e6a3472fc2a514c0ce9dae23b7e", + "0x8000000000000000000000000000000150fe8a1892976154c59cfc193accf5b3", + + "0x0000000000000000000000000000000000000000000000000000000000000002", + "0x08e2a8a0e65147d4bd6316030e16d19c85c97f0a9ca267122b96abbcea7e8fc8", + 1 + }, + { + "GOST2001-CryptoPro-A", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97", + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd94", + "0x00000000000000000000000000000000000000000000000000000000000000a6", + "0xffffffffffffffffffffffffffffffff6c611070995ad10045841b09b761b893", + "0x0000000000000000000000000000000000000000000000000000000000000001", + "0x8d91e471e0989cda27df505a453f2b7635294f2ddf23e3b122acc99c9e9f1e14", + 1 + }, + { + "GOST2001-CryptoPro-B", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8000000000000000000000000000000000000000000000000000000000000c99", + "0x8000000000000000000000000000000000000000000000000000000000000c96", + "0x3e1af419a269a5f866a7d3c25c3df80ae979259373ff2b182f49d4ce7e1bbc8b", + "0x800000000000000000000000000000015f700cfff1a624e5e497161bcc8a198f", + "0x0000000000000000000000000000000000000000000000000000000000000001", + "0x3fa8124359f96680b83d1c3eb2c070e5c545c9858d03ecfb744bf8d717717efc", + 1 + }, + { + "GOST2001-CryptoPro-C", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d759b", + "0x9b9f605f5a858107ab1ec85e6b41c8aacf846e86789051d37998f7b9022d7598", + "0x000000000000000000000000000000000000000000000000000000000000805a", + "0x9b9f605f5a858107ab1ec85e6b41c8aa582ca3511eddfb74f02f3a6598980bb9", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x41ece55743711a8c3cbf3783cd08c0ee4d4dc440d4641a8f366e550dfdb3bb67", + 1 + }, + { + "GOST2012-256-A", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffd97", + "0xc2173f1513981673af4892c23035a27ce25e2013bf95aa33b22c656f277e7335", + "0x295f9bae7428ed9ccc20e7c359a9d41a22fccd9108e17bf7ba9337a6f8ae9513", + "0x400000000000000000000000000000000fd8cddfc87b6635c115af556c360c67", + "0x91e38443a5e82c0d880923425712b2bb658b9196932e02c78b2582fe742daa28", + "0x32879423ab1a0375895786c4bb46e9565fde0b5344766740af268adb32322e5c", + 4 + }, + { + "GOST2012-512-test", 511, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d" + "f1d852741af4704a0458047e80e4546d35b8336fac224dd81664bbf528be6373", + "0x0000000000000000000000000000000000000000000000000000000000000007", + "0x1cff0806a31116da29d8cfa54e57eb748bc5f377e49400fdd788b649eca1ac4" + "361834013b2ad7322480a89ca58e0cf74bc9e540c2add6897fad0a3084f302adc", + "0x4531acd1fe0023c7550d267b6b2fee80922b14b2ffb90f04d4eb7c09b5d2d15d" + "a82f2d7ecb1dbac719905c5eecc423f1d86e25edbe23c595d644aaf187e6e6df", + + "0x24d19cc64572ee30f396bf6ebbfd7a6c5213b3b3d7057cc825f91093a68cd762" + "fd60611262cd838dc6b60aa7eee804e28bc849977fac33b4b530f1b120248a9a", + "0x2bb312a43bd2ce6e0d020613c857acddcfbf061e91e5f2c3f32447c259f39b2" + "c83ab156d77f1496bf7eb3351e1ee4e43dc1a18b91b24640b6dbb92cb1add371e", + 1 + }, + { + "GOST2012-512-tc26-A", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc4", + "0xe8c2505dedfc86ddc1bd0b2b6667f1da34b82574761cb0e879bd081cfd0b6265" + "ee3cb090f30d27614cb4574010da90dd862ef9d4ebee4761503190785a71c760", + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "27e69532f48d89116ff22b8d4e0560609b4b38abfad2b85dcacdb1411f10b275", + "0x0000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000003", + "0x7503cfe87a836ae3a61b8816e25450e6ce5e1c93acf1abc1778064fdcbefa921" + "df1626be4fd036e93d75e6a50e3a41e98028fe5fc235f5b889a589cb5215f2a4", + 1 + }, + { + "GOST2012-512-tc26-B", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0x8000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000006f", + "0x8000000000000000000000000000000000000000000000000000000000000000" + "000000000000000000000000000000000000000000000000000000000000006c", + "0x687d1b459dc841457e3e06cf6f5e2517b97c7d614af138bcbf85dc806c4b289f" + "3e965d2db1416d217f8b276fad1ab69c50f78bee1fa3106efb8ccbc7c5140116", + "0x8000000000000000000000000000000000000000000000000000000000000001" + "49a1ec142565a545acfdb77bd9d40cfa8b996712101bea0ec6346c54374f25bd", + "0x0000000000000000000000000000000000000000000000000000000000000000" + "0000000000000000000000000000000000000000000000000000000000000002", + "0x1a8f7eda389b094c2c071e3647a8940f3c123b697578c213be6dd9e6c8ec7335" + "dcb228fd1edf4a39152cbcaaf8c0398828041055f94ceeec7e21340780fe41bd", + 1 + }, + { + "GOST2012-512-tc26-C", 512, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffdc7", + "0xdc9203e514a721875485a529d2c722fb187bc8980eb866644de41c68e1430645" + "46e861c0e2c9edd92ade71f46fcf50ff2ad97f951fda9f2a2eb6546f39689bd3", + "0xb4c4ee28cebc6c2c8ac12952cf37f16ac7efb6a9f69f4b57ffda2e4f0de5ade0" + "38cbc2fff719d2c18de0284b8bfef3b52b8cc7a5f5bf0a3c8d2319a5312557e1", + "0x3fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff" + "c98cdba46506ab004c33a9ff5147502cc8eda9e7a769a12694623cef47f023ed", + "0xe2e31edfc23de7bdebe241ce593ef5de2295b7a9cbaef021d385f7074cea043a" + "a27272a7ae602bf2a7b9033db9ed3610c6fb85487eae97aac5bc7928c1950148", + "0xf5ce40d95b5eb899abbccff5911cb8577939804d6527378b8c108c3d2090ff9be" + "18e2d33e3021ed2ef32d85822423b6304f726aa854bae07d0396e9a9addc40f", + 4 + }, + + { + "secp256k1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEFFFFFC2F", + "0x0000000000000000000000000000000000000000000000000000000000000000", + "0x0000000000000000000000000000000000000000000000000000000000000007", + "0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFEBAAEDCE6AF48A03BBFD25E8CD0364141", + "0x79BE667EF9DCBBAC55A06295CE870B07029BFCDB2DCE28D959F2815B16F81798", + "0x483ADA7726A3C4655DA4FBFC0E1108A8FD17B448A68554199C47D08FFB10D4B8", + 1 + }, + + { + "sm2p256v1", 256, 0, + MPI_EC_WEIERSTRASS, ECC_DIALECT_STANDARD, + "0xfffffffeffffffffffffffffffffffffffffffff00000000ffffffffffffffff", + "0xfffffffeffffffffffffffffffffffffffffffff00000000fffffffffffffffc", + "0x28e9fa9e9d9f5e344d5a9e4bcf6509a7f39789f515ab8f92ddbcbd414d940e93", + "0xfffffffeffffffffffffffffffffffff7203df6b21c6052b53bbf40939d54123", + "0x32c4ae2c1f1981195f9904466a39c9948fe30bbff2660be1715a4589334c74c7", + "0xbc3736a2f4f6779c59bdcee36b692153d0a9877cc62a474002df32e52139f0a0", + 1 + }, + + { NULL, 0, 0, 0, 0, NULL, NULL, NULL, NULL, NULL } + }; + + + + +/* Return a copy of POINT. */ +static gcry_mpi_point_t +point_copy (gcry_mpi_point_t point) +{ + gcry_mpi_point_t newpoint; + + if (point) + { + newpoint = mpi_point_new (0); + point_set (newpoint, point); + } + else + newpoint = NULL; + return newpoint; +} + + +/* Helper to scan a hex string. */ +static gcry_mpi_t +scanval (const char *string) +{ + gpg_err_code_t rc; + gcry_mpi_t val; + + rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL); + if (rc) + log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc)); + return val; +} + + +/* Return the index of the domain_parms table for a curve with NAME. + Return -1 if not found. */ +static int +find_domain_parms_idx (const char *name) +{ + int idx, aliasno; + + /* First check our native curves. */ + for (idx = 0; domain_parms[idx].desc; idx++) + if (!strcmp (name, domain_parms[idx].desc)) + return idx; + + /* If not found consult the alias table. */ + if (!domain_parms[idx].desc) + { + for (aliasno = 0; curve_aliases[aliasno].name; aliasno++) + if (!strcmp (name, curve_aliases[aliasno].other)) + break; + if (curve_aliases[aliasno].name) + { + for (idx = 0; domain_parms[idx].desc; idx++) + if (!strcmp (curve_aliases[aliasno].name, domain_parms[idx].desc)) + return idx; + } + } + + return -1; +} + + +/* Generate the crypto system setup. This function takes the NAME of + a curve or the desired number of bits and stores at R_CURVE the + parameters of the named curve or those of a suitable curve. If + R_NBITS is not NULL, the chosen number of bits is stored there. + NULL may be given for R_CURVE, if the value is not required and for + example only a quick test for availability is desired. Note that + the curve fields should be initialized to zero because fields which + are not NULL are skipped. */ +gpg_err_code_t +_gcry_ecc_fill_in_curve (unsigned int nbits, const char *name, + elliptic_curve_t *curve, unsigned int *r_nbits) +{ + int idx; + const char *resname = NULL; /* Set to a found curve name. */ + + if (name) + idx = find_domain_parms_idx (name); + else + { + for (idx = 0; domain_parms[idx].desc; idx++) + if (nbits == domain_parms[idx].nbits + && domain_parms[idx].model == MPI_EC_WEIERSTRASS) + break; + if (!domain_parms[idx].desc) + idx = -1; + } + if (idx < 0) + return GPG_ERR_UNKNOWN_CURVE; + + resname = domain_parms[idx].desc; + + /* In fips mode we only support NIST curves. Note that it is + possible to bypass this check by specifying the curve parameters + directly. */ + if (fips_mode () && !domain_parms[idx].fips ) + return GPG_ERR_NOT_SUPPORTED; + + switch (domain_parms[idx].model) + { + case MPI_EC_WEIERSTRASS: + case MPI_EC_EDWARDS: + case MPI_EC_MONTGOMERY: + break; + default: + return GPG_ERR_BUG; + } + + + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + + if (curve) + { + curve->model = domain_parms[idx].model; + curve->dialect = domain_parms[idx].dialect; + if (!curve->p) + curve->p = scanval (domain_parms[idx].p); + if (!curve->a) + { + curve->a = scanval (domain_parms[idx].a); + if (curve->a->sign) + { + mpi_resize (curve->a, curve->p->nlimbs); + _gcry_mpih_sub_n (curve->a->d, curve->p->d, + curve->a->d, curve->p->nlimbs); + curve->a->nlimbs = curve->p->nlimbs; + curve->a->sign = 0; + } + } + if (!curve->b) + { + curve->b = scanval (domain_parms[idx].b); + if (curve->b->sign) + { + mpi_resize (curve->b, curve->p->nlimbs); + _gcry_mpih_sub_n (curve->b->d, curve->p->d, + curve->b->d, curve->p->nlimbs); + curve->b->nlimbs = curve->p->nlimbs; + curve->b->sign = 0; + } + } + if (!curve->n) + curve->n = scanval (domain_parms[idx].n); + if (!curve->G.x) + curve->G.x = scanval (domain_parms[idx].g_x); + if (!curve->G.y) + curve->G.y = scanval (domain_parms[idx].g_y); + curve->h = domain_parms[idx].h; + + /* + * In the constants of domain_parms, we defined Curve25519 + * domain parameters as the ones in RFC-7748 before the errata + * (eid4730). To keep the computation having exact same values, + * we recover the new value of g_y, here. + */ + if (!strcmp (resname, "Curve25519")) + mpi_sub (curve->G.y, curve->p, curve->G.y); + + if (!curve->G.z) + curve->G.z = mpi_alloc_set_ui (1); + if (!curve->name) + curve->name = resname; + } + + return 0; +} + + +/* Give the name of the curve NAME, store the curve parameters into P, + A, B, G, and N if they point to NULL value. Note that G is + returned in standard uncompressed format. Also update MODEL and + DIALECT if they are not NULL. */ +gpg_err_code_t +_gcry_ecc_update_curve_param (const char *name, + enum gcry_mpi_ec_models *model, + enum ecc_dialects *dialect, + gcry_mpi_t *p, gcry_mpi_t *a, gcry_mpi_t *b, + gcry_mpi_t *g, gcry_mpi_t *n) +{ + int idx; + + idx = find_domain_parms_idx (name); + if (idx < 0) + return GPG_ERR_UNKNOWN_CURVE; + + if (g) + { + char *buf; + size_t len; + + len = 4; + len += strlen (domain_parms[idx].g_x+2); + len += strlen (domain_parms[idx].g_y+2); + len++; + buf = xtrymalloc (len); + if (!buf) + return gpg_err_code_from_syserror (); + strcpy (stpcpy (stpcpy (buf, "0x04"), domain_parms[idx].g_x+2), + domain_parms[idx].g_y+2); + _gcry_mpi_release (*g); + *g = scanval (buf); + xfree (buf); + } + if (model) + *model = domain_parms[idx].model; + if (dialect) + *dialect = domain_parms[idx].dialect; + if (p) + { + _gcry_mpi_release (*p); + *p = scanval (domain_parms[idx].p); + } + if (a) + { + _gcry_mpi_release (*a); + *a = scanval (domain_parms[idx].a); + } + if (b) + { + _gcry_mpi_release (*b); + *b = scanval (domain_parms[idx].b); + } + if (n) + { + _gcry_mpi_release (*n); + *n = scanval (domain_parms[idx].n); + } + return 0; +} + + +/* Return the name matching the parameters in PKEY. This works only + with curves described by the Weierstrass equation. */ +const char * +_gcry_ecc_get_curve (gcry_sexp_t keyparms, int iterator, unsigned int *r_nbits) +{ + gpg_err_code_t rc; + const char *result = NULL; + elliptic_curve_t E; + gcry_mpi_point_t G = NULL; + gcry_mpi_t tmp = NULL; + int idx; + + memset (&E, 0, sizeof E); + + if (r_nbits) + *r_nbits = 0; + + if (!keyparms) + { + idx = iterator; + if (idx >= 0 && idx < DIM (domain_parms)) + { + result = domain_parms[idx].desc; + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + } + return result; + } + + + /* + * Extract the curve parameters.. + */ + rc = gpg_err_code (sexp_extract_param (keyparms, NULL, "pabn", + &E.p, &E.a, &E.b, &E.n, NULL)); + if (rc == GPG_ERR_NO_OBJ) + { + /* This might be the second use case of checking whether a + specific curve given by name is supported. */ + gcry_sexp_t l1; + char *name; + + l1 = sexp_find_token (keyparms, "curve", 5); + if (!l1) + goto leave; /* No curve name parameter. */ + + name = sexp_nth_string (l1, 1); + sexp_release (l1); + if (!name) + goto leave; /* Name missing or out of core. */ + + idx = find_domain_parms_idx (name); + xfree (name); + if (idx >= 0) /* Curve found. */ + { + result = domain_parms[idx].desc; + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + } + return result; + } + + if (rc) + goto leave; + + rc = point_from_keyparam (&G, keyparms, "g", NULL); + if (rc) + goto leave; + + _gcry_mpi_point_init (&E.G); + _gcry_mpi_point_set (&E.G, G->x, G->y, G->z); + + for (idx = 0; domain_parms[idx].desc; idx++) + { + mpi_free (tmp); + tmp = scanval (domain_parms[idx].p); + if (mpi_cmp (tmp, E.p)) + continue; + + mpi_free (tmp); + tmp = scanval (domain_parms[idx].a); + if (tmp->sign) + { + if (!mpi_cmpabs (tmp, E.a)) + /* For backward compatibility to <= libgcrypt 1.8, we + allow this match to support existing keys in SEXP. */ + ; + else + { + mpi_resize (tmp, E.p->nlimbs); + _gcry_mpih_sub_n (tmp->d, E.p->d, + tmp->d, E.p->nlimbs); + tmp->nlimbs = E.p->nlimbs; + tmp->sign = 0; + if (mpi_cmp (tmp, E.a)) + continue; + } + } + else if (mpi_cmp (tmp, E.a)) + continue; + + mpi_free (tmp); + tmp = scanval (domain_parms[idx].b); + if (tmp->sign) + { + if (!mpi_cmpabs (tmp, E.b)) + /* Same for backward compatibility, see above. */ + ; + else + { + mpi_resize (tmp, E.p->nlimbs); + _gcry_mpih_sub_n (tmp->d, E.p->d, + tmp->d, E.p->nlimbs); + tmp->nlimbs = E.p->nlimbs; + tmp->sign = 0; + if (mpi_cmp (tmp, E.b)) + continue; + } + } + else if (mpi_cmp (tmp, E.b)) + continue; + + mpi_free (tmp); + tmp = scanval (domain_parms[idx].n); + if (mpi_cmp (tmp, E.n)) + continue; + + mpi_free (tmp); + tmp = scanval (domain_parms[idx].g_x); + if (mpi_cmp (tmp, E.G.x)) + continue; + + mpi_free (tmp); + tmp = scanval (domain_parms[idx].g_y); + if (mpi_cmp (tmp, E.G.y)) + continue; + + result = domain_parms[idx].desc; + if (r_nbits) + *r_nbits = domain_parms[idx].nbits; + break; + } + + leave: + _gcry_mpi_point_release (G); + _gcry_mpi_release (tmp); + _gcry_mpi_release (E.p); + _gcry_mpi_release (E.a); + _gcry_mpi_release (E.b); + _gcry_mpi_point_free_parts (&E.G); + _gcry_mpi_release (E.n); + return result; +} + + +/* Helper to extract an MPI from key parameters. */ +static gpg_err_code_t +mpi_from_keyparam (gcry_mpi_t *r_a, gcry_sexp_t keyparam, const char *name, + int opaque) +{ + gcry_err_code_t ec = 0; + gcry_sexp_t l1; + + l1 = sexp_find_token (keyparam, name, 0); + if (l1) + { + *r_a = sexp_nth_mpi (l1, 1, opaque? GCRYMPI_FMT_OPAQUE : GCRYMPI_FMT_USG); + sexp_release (l1); + if (!*r_a) + ec = GPG_ERR_INV_OBJ; + } + return ec; +} + +/* Helper to extract a point from key parameters. If no parameter + with NAME is found, the functions tries to find a non-encoded point + by appending ".x", ".y" and ".z" to NAME. ".z" is in this case + optional and defaults to 1. EC is the context which at this point + may not be fully initialized. */ +static gpg_err_code_t +point_from_keyparam (gcry_mpi_point_t *r_a, + gcry_sexp_t keyparam, const char *name, mpi_ec_t ec) +{ + gcry_err_code_t rc; + gcry_sexp_t l1; + gcry_mpi_point_t point; + + l1 = sexp_find_token (keyparam, name, 0); + if (l1) + { + gcry_mpi_t a; + + a = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE); + sexp_release (l1); + if (!a) + return GPG_ERR_INV_OBJ; + + point = mpi_point_new (0); + rc = _gcry_mpi_ec_decode_point (point, a, ec); + mpi_free (a); + if (rc) + { + mpi_point_release (point); + return rc; + } + } + else + { + char *tmpname; + gcry_mpi_t x = NULL; + gcry_mpi_t y = NULL; + gcry_mpi_t z = NULL; + + tmpname = xtrymalloc (strlen (name) + 2 + 1); + if (!tmpname) + return gpg_err_code_from_syserror (); + strcpy (stpcpy (tmpname, name), ".x"); + rc = mpi_from_keyparam (&x, keyparam, tmpname, 0); + if (rc) + { + xfree (tmpname); + return rc; + } + strcpy (stpcpy (tmpname, name), ".y"); + rc = mpi_from_keyparam (&y, keyparam, tmpname, 0); + if (rc) + { + mpi_free (x); + xfree (tmpname); + return rc; + } + strcpy (stpcpy (tmpname, name), ".z"); + rc = mpi_from_keyparam (&z, keyparam, tmpname, 0); + if (rc) + { + mpi_free (y); + mpi_free (x); + xfree (tmpname); + return rc; + } + if (!z) + z = mpi_set_ui (NULL, 1); + if (x && y) + point = mpi_point_snatch_set (NULL, x, y, z); + else + { + mpi_free (x); + mpi_free (y); + mpi_free (z); + point = NULL; + } + xfree (tmpname); + } + + if (point) + *r_a = point; + return 0; +} + + + +static gpg_err_code_t +mpi_ec_get_elliptic_curve (elliptic_curve_t *E, int *r_flags, + gcry_sexp_t keyparam, const char *curvename) +{ + gpg_err_code_t errc; + unsigned int nbits; + gcry_sexp_t l1; + + errc = _gcry_pk_util_get_nbits (keyparam, &nbits); + if (errc) + return errc; + + E->model = MPI_EC_WEIERSTRASS; + E->dialect = ECC_DIALECT_STANDARD; + E->h = 1; + + if (keyparam) + { + /* Parse an optional flags list. */ + l1 = sexp_find_token (keyparam, "flags", 0); + if (l1) + { + int flags = 0; + + errc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + sexp_release (l1); + l1 = NULL; + if (errc) + goto leave; + + *r_flags |= flags; + } + + /* Parse the deprecated optional transient-key flag. */ + l1 = sexp_find_token (keyparam, "transient-key", 0); + if (l1) + { + *r_flags |= PUBKEY_FLAG_TRANSIENT_KEY; + sexp_release (l1); + } + + /* Check whether a curve name was given. */ + l1 = sexp_find_token (keyparam, "curve", 5); + + /* If we don't have a curve name or if override parameters have + explicitly been requested, parse them. */ + if (!l1 || (*r_flags & PUBKEY_FLAG_PARAM)) + { + gcry_mpi_point_t G = NULL; + gcry_mpi_t cofactor = NULL; + + errc = mpi_from_keyparam (&E->p, keyparam, "p", 0); + if (errc) + goto leave; + errc = mpi_from_keyparam (&E->a, keyparam, "a", 0); + if (errc) + goto leave; + errc = mpi_from_keyparam (&E->b, keyparam, "b", 0); + if (errc) + goto leave; + errc = point_from_keyparam (&G, keyparam, "g", NULL); + if (errc) + goto leave; + if (G) + { + _gcry_mpi_point_init (&E->G); + mpi_point_set (&E->G, G->x, G->y, G->z); + mpi_point_set (G, NULL, NULL, NULL); + mpi_point_release (G); + } + errc = mpi_from_keyparam (&E->n, keyparam, "n", 0); + if (errc) + goto leave; + errc = mpi_from_keyparam (&cofactor, keyparam, "h", 0); + if (errc) + goto leave; + if (cofactor) + { + mpi_get_ui (&E->h, cofactor); + mpi_free (cofactor); + } + } + } + else + l1 = NULL; /* No curvename. */ + + /* Check whether a curve parameter is available and use that to fill + in missing values. If no curve parameter is available try an + optional provided curvename. If only the curvename has been + given use that one. */ + if (l1 || curvename || nbits) + { + char *name; + + if (l1) + { + name = sexp_nth_string (l1, 1); + sexp_release (l1); + if (!name) + { + errc = GPG_ERR_INV_OBJ; /* Name missing or out of core. */ + goto leave; + } + } + else + name = NULL; + + errc = _gcry_ecc_fill_in_curve (nbits, name? name : curvename, E, NULL); + xfree (name); + if (errc) + goto leave; + } + + leave: + return errc; +} + +static gpg_err_code_t +mpi_ec_setup_elliptic_curve (mpi_ec_t ec, int flags, + elliptic_curve_t *E, gcry_sexp_t keyparam) +{ + gpg_err_code_t errc = 0; + + ec->G = mpi_point_snatch_set (NULL, E->G.x, E->G.y, E->G.z); + E->G.x = NULL; + E->G.y = NULL; + E->G.z = NULL; + ec->n = E->n; + E->n = NULL; + ec->h = E->h; + ec->name = E->name; + + /* Now that we know the curve name we can look for the public key + Q. point_from_keyparam needs to know the curve parameters so + that it is able to use the correct decompression. Parsing + the private key D could have been done earlier but it is less + surprising if we do it here as well. */ + if (keyparam) + { + int is_opaque_bytes = ((ec->dialect == ECC_DIALECT_ED25519 + && (flags & PUBKEY_FLAG_EDDSA)) + || (ec->dialect == ECC_DIALECT_SAFECURVE)); + + errc = point_from_keyparam (&ec->Q, keyparam, "q", ec); + if (errc) + return errc; + errc = mpi_from_keyparam (&ec->d, keyparam, "d", is_opaque_bytes); + + /* Size of opaque bytes should match size of P. */ + if (!errc && ec->d && is_opaque_bytes) + { + unsigned int n = mpi_get_nbits (ec->d); + unsigned int len; + + len = (ec->nbits+7)/8; + /* EdDSA requires additional bit for sign. */ + if ((ec->nbits%8) == 0 && ec->model == MPI_EC_EDWARDS) + len++; + + if ((n+7)/8 != len) + { + if (ec->dialect == ECC_DIALECT_ED25519) + { + /* + * GnuPG (<= 2.2) or OpenPGP implementations with no + * SOS support may remove zeros at the beginning. + * Recover those zeros. + */ + /* + * Also, GnuPG (<= 2.2) may add additional zero at + * the beginning, when private key is moved from + * OpenPGP to gpg-agent. Remove such a zero-prefix. + */ + const unsigned char *buf; + unsigned char *value; + + buf = mpi_get_opaque (ec->d, &n); + if (!buf) + return GPG_ERR_INV_OBJ; + + value = xtrymalloc_secure (len); + if (!value) + return gpg_err_code_from_syserror (); + + if ((n+7)/8 < len) + /* Recover zeros. */ + { + memset (value, 0, len - (n+7)/8); + memcpy (value + len - (n+7)/8, buf, (n+7)/8); + } + else if ((n+7)/8 == len + 1) + /* Remove a zero. */ + memcpy (value, buf+1, len); + else + { + xfree (value); + return GPG_ERR_INV_OBJ; + } + + mpi_set_opaque (ec->d, value, len*8); + } + else + { + if (DBG_CIPHER) + log_debug ("scalar size (%d) != prime size (%d)", + (n+7)/8, len); + + errc = GPG_ERR_INV_OBJ; + } + } + } + } + + return errc; +} + +gpg_err_code_t +_gcry_mpi_ec_internal_new (mpi_ec_t *r_ec, int *r_flags, const char *name_op, + gcry_sexp_t keyparam, const char *curvename) +{ + gpg_err_code_t errc; + elliptic_curve_t E; + mpi_ec_t ec; + + *r_ec = NULL; + + memset (&E, 0, sizeof E); + errc = mpi_ec_get_elliptic_curve (&E, r_flags, keyparam, curvename); + if (errc) + goto leave; + + ec = _gcry_mpi_ec_p_internal_new (E.model, E.dialect, *r_flags, + E.p, E.a, E.b); + if (!ec) + goto leave; + + errc = mpi_ec_setup_elliptic_curve (ec, *r_flags, &E, keyparam); + if (errc) + { + _gcry_mpi_ec_free (ec); + goto leave; + } + else + *r_ec = ec; + + if (!errc && DBG_CIPHER) + { + gcry_mpi_t mpi_q = NULL; + gcry_sexp_t l1; + char msg[80]; + + l1 = sexp_find_token (keyparam, "q", 0); + if (l1) + { + mpi_q = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_OPAQUE); + sexp_release (l1); + } + + log_debug ("%s info: %s/%s%s\n", name_op, + _gcry_ecc_model2str (ec->model), + _gcry_ecc_dialect2str (ec->dialect), + (*r_flags & PUBKEY_FLAG_EDDSA)? "+EdDSA" : ""); + if (ec->name) + log_debug ("%s name: %s\n", name_op, ec->name); + snprintf (msg, sizeof msg, "%s p", name_op); + log_printmpi (msg, ec->p); + snprintf (msg, sizeof msg, "%s a", name_op); + log_printmpi (msg, ec->a); + snprintf (msg, sizeof msg, "%s b", name_op); + log_printmpi (msg, ec->b); + snprintf (msg, sizeof msg, "%s g", name_op); + log_printpnt (msg, ec->G, NULL); + snprintf (msg, sizeof msg, "%s n", name_op); + log_printmpi (msg, ec->n); + log_debug ("%s h:+%02x\n", name_op, ec->h); + if (mpi_q) + { + snprintf (msg, sizeof msg, "%s q", name_op); + log_printmpi (msg, mpi_q); + mpi_free (mpi_q); + } + if (!fips_mode () && ec->d) + { + snprintf (msg, sizeof msg, "%s d", name_op); + log_printmpi (msg, ec->d); + } + } + + leave: + _gcry_ecc_curve_free (&E); + return errc; +} + +/* This function creates a new context for elliptic curve operations. + Either KEYPARAM or CURVENAME must be given. If both are given and + KEYPARAM has no curve parameter, CURVENAME is used to add missing + parameters. On success 0 is returned and the new context stored at + R_CTX. On error NULL is stored at R_CTX and an error code is + returned. The context needs to be released using + gcry_ctx_release. */ +gpg_err_code_t +_gcry_mpi_ec_new (gcry_ctx_t *r_ctx, + gcry_sexp_t keyparam, const char *curvename) +{ + gpg_err_code_t errc; + elliptic_curve_t E; + gcry_ctx_t ctx = NULL; + int flags = 0; + mpi_ec_t ec; + + *r_ctx = NULL; + + memset (&E, 0, sizeof E); + errc = mpi_ec_get_elliptic_curve (&E, &flags, keyparam, curvename); + if (errc) + goto leave; + + errc = _gcry_mpi_ec_p_new (&ctx, E.model, E.dialect, flags, E.p, E.a, E.b); + if (errc) + goto leave; + + ec = _gcry_ctx_get_pointer (ctx, CONTEXT_TYPE_EC); + errc = mpi_ec_setup_elliptic_curve (ec, flags, &E, keyparam); + if (errc) + goto leave; + + *r_ctx = ctx; + ctx = NULL; + + leave: + _gcry_ecc_curve_free (&E); + _gcry_ctx_release (ctx); + return errc; +} + + +/* Return the parameters of the curve NAME as an S-expression. */ +gcry_sexp_t +_gcry_ecc_get_param_sexp (const char *name) +{ + unsigned int nbits; + elliptic_curve_t E; + mpi_ec_t ctx; + gcry_mpi_t g_x, g_y; + gcry_mpi_t pkey[5]; + gcry_sexp_t result; + int i; + + memset (&E, 0, sizeof E); + if (_gcry_ecc_fill_in_curve (0, name, &E, &nbits)) + return NULL; + + g_x = mpi_new (0); + g_y = mpi_new (0); + ctx = _gcry_mpi_ec_p_internal_new (E.model, + E.dialect, + 0, + E.p, E.a, E.b); + if (_gcry_mpi_ec_get_affine (g_x, g_y, &E.G, ctx)) + log_fatal ("ecc get param: Failed to get affine coordinates\n"); + _gcry_mpi_ec_free (ctx); + _gcry_mpi_point_free_parts (&E.G); + + pkey[0] = E.p; + pkey[1] = E.a; + pkey[2] = E.b; + pkey[3] = _gcry_ecc_ec2os (g_x, g_y, E.p); + pkey[4] = E.n; + + mpi_free (g_x); + mpi_free (g_y); + + if (sexp_build (&result, NULL, + "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)))", + pkey[0], pkey[1], pkey[2], pkey[3], pkey[4], E.h)) + result = NULL; + + for (i=0; i < DIM (pkey); i++) + _gcry_mpi_release (pkey[i]); + + return result; +} + + +/* Return an MPI (or opaque MPI) described by NAME and the context EC. + If COPY is true a copy is returned, if not a const MPI may be + returned. In any case mpi_free must be used. */ +gcry_mpi_t +_gcry_ecc_get_mpi (const char *name, mpi_ec_t ec, int copy) +{ + if (!*name) + return NULL; + + if (!strcmp (name, "p") && ec->p) + return mpi_is_const (ec->p) && !copy? ec->p : mpi_copy (ec->p); + if (!strcmp (name, "a") && ec->a) + return mpi_is_const (ec->a) && !copy? ec->a : mpi_copy (ec->a); + if (!strcmp (name, "b") && ec->b) + return mpi_is_const (ec->b) && !copy? ec->b : mpi_copy (ec->b); + if (!strcmp (name, "n") && ec->n) + return mpi_is_const (ec->n) && !copy? ec->n : mpi_copy (ec->n); + if (!strcmp (name, "h")) + { + gcry_mpi_t h = _gcry_mpi_get_const (ec->h); + + return !copy? h : mpi_set (NULL, h); + } + if (!strcmp (name, "d") && ec->d) + return mpi_is_const (ec->d) && !copy? ec->d : mpi_copy (ec->d); + + /* Return a requested point coordinate. */ + if (!strcmp (name, "g.x") && ec->G && ec->G->x) + return mpi_is_const (ec->G->x) && !copy? ec->G->x : mpi_copy (ec->G->x); + if (!strcmp (name, "g.y") && ec->G && ec->G->y) + return mpi_is_const (ec->G->y) && !copy? ec->G->y : mpi_copy (ec->G->y); + if (!strcmp (name, "q.x") && ec->Q && ec->Q->x) + return mpi_is_const (ec->Q->x) && !copy? ec->Q->x : mpi_copy (ec->Q->x); + if (!strcmp (name, "q.y") && ec->Q && ec->Q->y) + return mpi_is_const (ec->Q->y) && !copy? ec->Q->y : mpi_copy (ec->Q->y); + + /* If the base point has been requested, return it in standard + encoding. */ + if (!strcmp (name, "g") && ec->G) + return _gcry_mpi_ec_ec2os (ec->G, ec); + + /* If the public key has been requested, return it by default in + standard uncompressed encoding or if requested in other + encodings. */ + if (*name == 'q' && (!name[1] || name[1] == '@')) + { + /* If only the private key is given, compute the public key. */ + if (!ec->Q) + ec->Q = _gcry_ecc_compute_public (NULL, ec); + + if (!ec->Q) + return NULL; + + if (name[1] != '@') + return _gcry_mpi_ec_ec2os (ec->Q, ec); + + if (!strcmp (name+2, "eddsa") && ec->model == MPI_EC_EDWARDS) + { + unsigned char *encpk; + unsigned int encpklen; + + if (!_gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0, + &encpk, &encpklen)) + return mpi_set_opaque (NULL, encpk, encpklen*8); + } + } + + return NULL; +} + + +/* Return a point described by NAME and the context EC. */ +gcry_mpi_point_t +_gcry_ecc_get_point (const char *name, mpi_ec_t ec) +{ + if (!strcmp (name, "g") && ec->G) + return point_copy (ec->G); + if (!strcmp (name, "q")) + { + /* If only the private key is given, compute the public key. */ + if (!ec->Q) + ec->Q = _gcry_ecc_compute_public (NULL, ec); + + if (ec->Q) + return point_copy (ec->Q); + } + + return NULL; +} + + +/* Store the MPI NEWVALUE into the context EC under NAME. */ +gpg_err_code_t +_gcry_ecc_set_mpi (const char *name, gcry_mpi_t newvalue, mpi_ec_t ec) +{ + gpg_err_code_t rc = 0; + + if (!*name) + ; + else if (!strcmp (name, "p")) + { + mpi_free (ec->p); + ec->p = mpi_copy (newvalue); + _gcry_mpi_ec_get_reset (ec); + } + else if (!strcmp (name, "a")) + { + mpi_free (ec->a); + ec->a = mpi_copy (newvalue); + _gcry_mpi_ec_get_reset (ec); + } + else if (!strcmp (name, "b")) + { + mpi_free (ec->b); + ec->b = mpi_copy (newvalue); + } + else if (!strcmp (name, "n")) + { + mpi_free (ec->n); + ec->n = mpi_copy (newvalue); + } + else if (!strcmp (name, "h")) + { + mpi_get_ui (&ec->h, newvalue); + } + else if (*name == 'q' && (!name[1] || name[1] == '@')) + { + if (newvalue) + { + if (!ec->Q) + ec->Q = mpi_point_new (0); + rc = _gcry_mpi_ec_decode_point (ec->Q, newvalue, ec); + } + if (rc || !newvalue) + { + _gcry_mpi_point_release (ec->Q); + ec->Q = NULL; + } + /* Note: We assume that Q matches d and thus do not reset d. */ + } + else if (!strcmp (name, "d")) + { + mpi_free (ec->d); + ec->d = mpi_copy (newvalue); + if (ec->d) + { + /* We need to reset the public key because it may not + anymore match. */ + _gcry_mpi_point_release (ec->Q); + ec->Q = NULL; + } + } + else + rc = GPG_ERR_UNKNOWN_NAME; + + return rc; +} + + +/* Store the point NEWVALUE into the context EC under NAME. */ +gpg_err_code_t +_gcry_ecc_set_point (const char *name, gcry_mpi_point_t newvalue, mpi_ec_t ec) +{ + if (!strcmp (name, "g")) + { + _gcry_mpi_point_release (ec->G); + ec->G = point_copy (newvalue); + } + else if (!strcmp (name, "q")) + { + _gcry_mpi_point_release (ec->Q); + ec->Q = point_copy (newvalue); + } + else + return GPG_ERR_UNKNOWN_NAME; + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc-ecdh.c b/comm/third_party/libgcrypt/cipher/ecc-ecdh.c new file mode 100644 index 0000000000..d6b8991af6 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-ecdh.c @@ -0,0 +1,127 @@ +/* ecc-ecdh.c - Elliptic Curve Diffie-Hellman key agreement + * Copyright (C) 2019 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, see . + * SPDX-License-Identifier: LGPL-2.1+ + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" + +#define ECC_CURVE25519_BYTES 32 +#define ECC_CURVE448_BYTES 56 + +static gpg_err_code_t +prepare_ec (mpi_ec_t *r_ec, const char *name) +{ + int flags = 0; + + if (!strcmp (name, "Curve25519")) + flags = PUBKEY_FLAG_DJB_TWEAK; + + return _gcry_mpi_ec_internal_new (r_ec, &flags, "ecc_mul_point", NULL, name); +} + +unsigned int +_gcry_ecc_get_algo_keylen (int curveid) +{ + unsigned int len = 0; + + if (curveid == GCRY_ECC_CURVE25519) + len = ECC_CURVE25519_BYTES; + else if (curveid == GCRY_ECC_CURVE448) + len = ECC_CURVE448_BYTES; + + return len; +} + +gpg_error_t +_gcry_ecc_mul_point (int curveid, unsigned char *result, + const unsigned char *scalar, const unsigned char *point) +{ + unsigned int nbits; + unsigned int nbytes; + const char *curve; + gpg_err_code_t err; + gcry_mpi_t mpi_k; + mpi_ec_t ec; + mpi_point_struct Q; + gcry_mpi_t x; + unsigned int len; + unsigned char *buf; + + if (curveid == GCRY_ECC_CURVE25519) + curve = "Curve25519"; + else if (curveid == GCRY_ECC_CURVE448) + curve = "X448"; + else + return gpg_error (GPG_ERR_UNKNOWN_CURVE); + + err = prepare_ec (&ec, curve); + if (err) + return err; + + nbits = ec->nbits; + nbytes = (nbits + 7)/8; + + mpi_k = _gcry_mpi_set_opaque_copy (NULL, scalar, nbytes*8); + x = mpi_new (nbits); + point_init (&Q); + + if (point) + { + gcry_mpi_t mpi_u = _gcry_mpi_set_opaque_copy (NULL, point, nbytes*8); + mpi_point_struct P; + + point_init (&P); + err = _gcry_ecc_mont_decodepoint (mpi_u, ec, &P); + _gcry_mpi_release (mpi_u); + if (err) + goto leave; + _gcry_mpi_ec_mul_point (&Q, mpi_k, &P, ec); + point_free (&P); + } + else + _gcry_mpi_ec_mul_point (&Q, mpi_k, ec->G, ec); + + _gcry_mpi_ec_get_affine (x, NULL, &Q, ec); + + buf = _gcry_mpi_get_buffer (x, nbytes, &len, NULL); + if (!buf) + err = gpg_error_from_syserror (); + else + { + memcpy (result, buf, nbytes); + xfree (buf); + } + + leave: + _gcry_mpi_release (x); + point_free (&Q); + _gcry_mpi_release (mpi_k); + _gcry_mpi_ec_free (ec); + return err; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc-ecdsa.c b/comm/third_party/libgcrypt/cipher/ecc-ecdsa.c new file mode 100644 index 0000000000..30103f1417 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-ecdsa.c @@ -0,0 +1,248 @@ +/* ecc-ecdsa.c - Elliptic Curve ECDSA signatures + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + + +/* Compute an ECDSA signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_ecdsa_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo) +{ + gpg_err_code_t rc = 0; + int extraloops = 0; + gcry_mpi_t k, dr, sum, k_1, x; + mpi_point_struct I; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + gcry_mpi_t b; /* Random number needed for blinding. */ + gcry_mpi_t bi; /* multiplicative inverse of B. */ + + if (DBG_CIPHER) + log_mpidump ("ecdsa sign hash ", input ); + + qbits = mpi_get_nbits (ec->n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + b = mpi_snew (qbits); + bi = mpi_snew (qbits); + do + { + _gcry_mpi_randomize (b, qbits, GCRY_WEAK_RANDOM); + mpi_mod (b, b, ec->n); + } + while (!mpi_invm (bi, b, ec->n)); + + k = NULL; + dr = mpi_alloc (0); + sum = mpi_alloc (0); + k_1 = mpi_alloc (0); + x = mpi_alloc (0); + point_init (&I); + + /* Two loops to avoid R or S are zero. This is more of a joke than + a real demand because the probability of them being zero is less + than any hardware failure. Some specs however require it. */ + do + { + do + { + mpi_free (k); + k = NULL; + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this + flag is set, it is expected that HASH is an opaque + MPI with the to be signed hash. That hash is also + used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d, + abuf, (abits+7)/8, + hashalgo, extraloops); + if (rc) + goto leave; + extraloops++; + } + else + k = _gcry_dsa_gen_k (ec->n, GCRY_STRONG_RANDOM); + + mpi_invm (k_1, k, ec->n); /* k_1 = k^(-1) mod n */ + + _gcry_dsa_modify_k (k, ec->n, qbits); + + _gcry_mpi_ec_mul_point (&I, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x, NULL, &I, ec)) + { + if (DBG_CIPHER) + log_debug ("ecc sign: Failed to get affine coordinates\n"); + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (r, x, ec->n); /* r = x mod n */ + } + while (!mpi_cmp_ui (r, 0)); + + /* Computation of dr, sum, and s are blinded with b. */ + mpi_mulm (dr, b, ec->d, ec->n); + mpi_mulm (dr, dr, r, ec->n); /* dr = d*r mod n */ + mpi_mulm (sum, b, hash, ec->n); + mpi_addm (sum, sum, dr, ec->n); /* sum = hash + (d*r) mod n */ + mpi_mulm (s, k_1, sum, ec->n); /* s = k^(-1)*(hash+(d*r)) mod n */ + /* Undo blinding by b^-1 */ + mpi_mulm (s, bi, s, ec->n); + } + while (!mpi_cmp_ui (s, 0)); + + if (DBG_CIPHER) + { + log_mpidump ("ecdsa sign result r ", r); + log_mpidump ("ecdsa sign result s ", s); + } + + leave: + mpi_free (b); + mpi_free (bi); + point_free (&I); + mpi_free (x); + mpi_free (k_1); + mpi_free (sum); + mpi_free (dr); + mpi_free (k); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify an ECDSA signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_ecdsa_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t hash, h, h1, h2, x; + mpi_point_struct Q, Q1, Q2; + unsigned int nbits; + + if (!_gcry_mpi_ec_curve_point (ec->Q, ec)) + return GPG_ERR_BROKEN_PUBKEY; + + if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, ec->n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, ec->n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ + + nbits = mpi_get_nbits (ec->n); + err = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (err) + return err; + + h = mpi_alloc (0); + h1 = mpi_alloc (0); + h2 = mpi_alloc (0); + x = mpi_alloc (0); + point_init (&Q); + point_init (&Q1); + point_init (&Q2); + + /* h = s^(-1) (mod n) */ + mpi_invm (h, s, ec->n); + /* h1 = hash * s^(-1) (mod n) */ + mpi_mulm (h1, hash, h, ec->n); + /* Q1 = [ hash * s^(-1) ]G */ + _gcry_mpi_ec_mul_point (&Q1, h1, ec->G, ec); + /* h2 = r * s^(-1) (mod n) */ + mpi_mulm (h2, r, h, ec->n); + /* Q2 = [ r * s^(-1) ]Q */ + _gcry_mpi_ec_mul_point (&Q2, h2, ec->Q, ec); + /* Q = ([hash * s^(-1)]G) + ([r * s^(-1)]Q) */ + _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ec); + + if (!mpi_cmp_ui (Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Rejected\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ec)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Failed to get affine coordinates\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (x, x, ec->n); /* x = x mod E_n */ + if (mpi_cmp (x, r)) /* x != r */ + { + if (DBG_CIPHER) + { + log_mpidump (" x", x); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + leave: + point_free (&Q2); + point_free (&Q1); + point_free (&Q); + mpi_free (x); + mpi_free (h2); + mpi_free (h1); + mpi_free (h); + if (hash != input) + mpi_free (hash); + + return err; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc-eddsa.c b/comm/third_party/libgcrypt/cipher/ecc-eddsa.c new file mode 100644 index 0000000000..2a1a89073c --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-eddsa.c @@ -0,0 +1,1182 @@ +/* ecc-eddsa.c - Elliptic Curve EdDSA signatures + * Copyright (C) 2013, 2014 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" + + + +void +reverse_buffer (unsigned char *buffer, unsigned int length) +{ + unsigned int tmp, i; + + for (i=0; i < length/2; i++) + { + tmp = buffer[i]; + buffer[i] = buffer[length-1-i]; + buffer[length-1-i] = tmp; + } +} + + +/* Helper to scan a hex string. */ +static gcry_mpi_t +scanval (const char *string) +{ + gpg_err_code_t rc; + gcry_mpi_t val; + + rc = _gcry_mpi_scan (&val, GCRYMPI_FMT_HEX, string, 0, NULL); + if (rc) + log_fatal ("scanning ECC parameter failed: %s\n", gpg_strerror (rc)); + return val; +} + + + +/* Encode MPI using the EdDSA scheme. MINLEN specifies the required + length of the buffer in bytes. On success 0 is returned an a + malloced buffer with the encoded point is stored at R_BUFFER; the + length of this buffer is stored at R_BUFLEN. */ +static gpg_err_code_t +eddsa_encodempi (gcry_mpi_t mpi, unsigned int nbits, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + unsigned int minlen = (nbits%8) == 0 ? (nbits/8 + 1): (nbits+7)/8; + + rawmpi = _gcry_mpi_get_buffer (mpi, minlen, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + + *r_buffer = rawmpi; + *r_buflen = rawmpilen; + return 0; +} + + +/* Encode (X,Y) using the EdDSA scheme. NBITS is the number of bits + of the field of the curve. If WITH_PREFIX is set the returned + buffer is prefixed with a 0x40 byte. On success 0 is returned and + a malloced buffer with the encoded point is stored at R_BUFFER; the + length of this buffer is stored at R_BUFLEN. */ +static gpg_err_code_t +eddsa_encode_x_y (gcry_mpi_t x, gcry_mpi_t y, unsigned int nbits, + int with_prefix, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + int off = with_prefix? 1:0; + unsigned int minlen = (nbits%8) == 0 ? (nbits/8 + 1): (nbits+7)/8; + + rawmpi = _gcry_mpi_get_buffer_extra (y, minlen, off?-1:0, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + if (mpi_test_bit (x, 0) && rawmpilen) + rawmpi[off + rawmpilen - 1] |= 0x80; /* Set sign bit. */ + if (off) + rawmpi[0] = 0x40; + + *r_buffer = rawmpi; + *r_buflen = rawmpilen + off; + return 0; +} + +/* Encode POINT using the EdDSA scheme. X and Y are either scratch + variables supplied by the caller or NULL. CTX is the usual + context. If WITH_PREFIX is set the returned buffer is prefixed + with a 0x40 byte. On success 0 is returned and a malloced buffer + with the encoded point is stored at R_BUFFER; the length of this + buffer is stored at R_BUFLEN. */ +gpg_err_code_t +_gcry_ecc_eddsa_encodepoint (mpi_point_t point, mpi_ec_t ec, + gcry_mpi_t x_in, gcry_mpi_t y_in, + int with_prefix, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + gpg_err_code_t rc; + gcry_mpi_t x, y; + + x = x_in? x_in : mpi_new (0); + y = y_in? y_in : mpi_new (0); + + if (_gcry_mpi_ec_get_affine (x, y, point, ec)) + { + log_error ("eddsa_encodepoint: Failed to get affine coordinates\n"); + rc = GPG_ERR_INTERNAL; + } + else + rc = eddsa_encode_x_y (x, y, ec->nbits, with_prefix, r_buffer, r_buflen); + + if (!x_in) + mpi_free (x); + if (!y_in) + mpi_free (y); + return rc; +} + + +/* Make sure that the opaque MPI VALUE is in compact EdDSA format. + This function updates MPI if needed. */ +gpg_err_code_t +_gcry_ecc_eddsa_ensure_compact (gcry_mpi_t value, unsigned int nbits) +{ + gpg_err_code_t rc; + const unsigned char *buf; + unsigned int rawmpilen; + gcry_mpi_t x, y; + unsigned char *enc; + unsigned int enclen; + + if (!mpi_is_opaque (value)) + return GPG_ERR_INV_OBJ; + buf = mpi_get_opaque (value, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (rawmpilen > 1 && (rawmpilen%2)) + { + if (buf[0] == 0x04) + { + /* Buffer is in SEC1 uncompressed format. Extract y and + compress. */ + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, + buf+1, (rawmpilen-1)/2, NULL); + if (rc) + return rc; + rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG, + buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2, NULL); + if (rc) + { + mpi_free (x); + return rc; + } + + rc = eddsa_encode_x_y (x, y, nbits, 0, &enc, &enclen); + mpi_free (x); + mpi_free (y); + if (rc) + return rc; + + mpi_set_opaque (value, enc, 8*enclen); + } + else if (buf[0] == 0x40) + { + /* Buffer is compressed but with our SEC1 alike compression + indicator. Remove that byte. FIXME: We should write and + use a function to manipulate an opaque MPI in place. */ + if (!_gcry_mpi_set_opaque_copy (value, buf + 1, (rawmpilen - 1)*8)) + return gpg_err_code_from_syserror (); + } + } + + return 0; +} + + +static gpg_err_code_t +ecc_ed448_recover_x (gcry_mpi_t x, gcry_mpi_t y, int x_0, mpi_ec_t ec) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t u, v, u3, v3, t; + static gcry_mpi_t p34; /* Hard coded (P-3)/4 */ + + if (mpi_cmp (y, ec->p) >= 0) + rc = GPG_ERR_INV_OBJ; + + if (!p34) + p34 = scanval ("3FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "BFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF"); + + u = mpi_new (0); + v = mpi_new (0); + u3 = mpi_new (0); + v3 = mpi_new (0); + t = mpi_new (0); + + /* Compute u and v */ + /* u = y^2 */ + mpi_mulm (u, y, y, ec->p); + /* v = b*y^2 */ + mpi_mulm (v, ec->b, u, ec->p); + /* u = y^2-1 */ + mpi_sub_ui (u, u, 1); + /* v = b*y^2-1 */ + mpi_sub_ui (v, v, 1); + + /* Compute sqrt(u/v) */ + /* u3 = u^3 */ + mpi_powm (u3, u, mpi_const (MPI_C_THREE), ec->p); + mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p); + /* t = u^4 * u * v3 = u^5 * v^3 */ + mpi_powm (t, u, mpi_const (MPI_C_FOUR), ec->p); + mpi_mulm (t, t, u, ec->p); + mpi_mulm (t, t, v3, ec->p); + /* t = t^((p-3)/4) = (u^5 * v^3)^((p-3)/4) */ + mpi_powm (t, t, p34, ec->p); + /* x = t * u^3 * v = (u^3 * v) * (u^5 * v^3)^((p-3)/4) */ + mpi_mulm (t, t, u3, ec->p); + mpi_mulm (x, t, v, ec->p); + + /* t = v * x^2 */ + mpi_mulm (t, x, x, ec->p); + mpi_mulm (t, t, v, ec->p); + + if (mpi_cmp (t, u) != 0) + rc = GPG_ERR_INV_OBJ; + else + { + if (!mpi_cmp_ui (x, 0) && x_0) + rc = GPG_ERR_INV_OBJ; + + /* Choose the desired square root according to parity */ + if (mpi_test_bit (x, 0) != !!x_0) + mpi_sub (x, ec->p, x); + } + + mpi_free (t); + mpi_free (u3); + mpi_free (v3); + mpi_free (v); + mpi_free (u); + + return rc; +} + + +/* Recover X from Y and SIGN (which actually is a parity bit). */ +gpg_err_code_t +_gcry_ecc_eddsa_recover_x (gcry_mpi_t x, gcry_mpi_t y, int sign, mpi_ec_t ec) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t u, v, v3, t; + static gcry_mpi_t p58, seven; + + /* + * This routine is actually curve specific. Now, only supports + * Ed25519 and Ed448. + */ + + if (ec->dialect != ECC_DIALECT_ED25519) + /* For now, it's only Ed448. */ + return ecc_ed448_recover_x (x, y, sign, ec); + + /* It's Ed25519. */ + + if (!p58) + p58 = scanval ("0FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF" + "FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFD"); + if (!seven) + seven = mpi_set_ui (NULL, 7); + + u = mpi_new (0); + v = mpi_new (0); + v3 = mpi_new (0); + t = mpi_new (0); + + /* Compute u and v */ + /* u = y^2 */ + mpi_mulm (u, y, y, ec->p); + /* v = b*y^2 */ + mpi_mulm (v, ec->b, u, ec->p); + /* u = y^2-1 */ + mpi_sub_ui (u, u, 1); + /* v = b*y^2+1 */ + mpi_add_ui (v, v, 1); + + /* Compute sqrt(u/v) */ + /* v3 = v^3 */ + mpi_powm (v3, v, mpi_const (MPI_C_THREE), ec->p); + /* t = v3 * v3 * u * v = u * v^7 */ + mpi_powm (t, v, seven, ec->p); + mpi_mulm (t, t, u, ec->p); + /* t = t^((p-5)/8) = (u * v^7)^((p-5)/8) */ + mpi_powm (t, t, p58, ec->p); + /* x = t * u * v^3 = (u * v^3) * (u * v^7)^((p-5)/8) */ + mpi_mulm (t, t, u, ec->p); + mpi_mulm (x, t, v3, ec->p); + + /* Adjust if needed. */ + /* t = v * x^2 */ + mpi_mulm (t, x, x, ec->p); + mpi_mulm (t, t, v, ec->p); + /* -t == u ? x = x * sqrt(-1) */ + mpi_sub (t, ec->p, t); + if (!mpi_cmp (t, u)) + { + static gcry_mpi_t m1; /* Fixme: this is not thread-safe. */ + if (!m1) + m1 = scanval ("2B8324804FC1DF0B2B4D00993DFBD7A7" + "2F431806AD2FE478C4EE1B274A0EA0B0"); + mpi_mulm (x, x, m1, ec->p); + /* t = v * x^2 */ + mpi_mulm (t, x, x, ec->p); + mpi_mulm (t, t, v, ec->p); + /* -t == u ? x = x * sqrt(-1) */ + mpi_sub (t, ec->p, t); + if (!mpi_cmp (t, u)) + rc = GPG_ERR_INV_OBJ; + } + + /* Choose the desired square root according to parity */ + if (mpi_test_bit (x, 0) != !!sign) + mpi_sub (x, ec->p, x); + + mpi_free (t); + mpi_free (v3); + mpi_free (v); + mpi_free (u); + + return rc; +} + + +/* Decode the EdDSA style encoded PK and set it into RESULT. CTX is + the usual curve context. If R_ENCPK is not NULL, the encoded PK is + stored at that address; this is a new copy to be released by the + caller. In contrast to the supplied PK, this is not an MPI and + thus guaranteed to be properly padded. R_ENCPKLEN receives the + length of that encoded key. */ +gpg_err_code_t +_gcry_ecc_eddsa_decodepoint (gcry_mpi_t pk, mpi_ec_t ctx, mpi_point_t result, + unsigned char **r_encpk, unsigned int *r_encpklen) +{ + gpg_err_code_t rc; + unsigned char *rawmpi; + unsigned int rawmpilen; + int sign; + + if (mpi_is_opaque (pk)) + { + const unsigned char *buf; + unsigned int len; + + len = (ctx->nbits%8) == 0 ? (ctx->nbits/8 + 1): (ctx->nbits+7)/8; + + buf = mpi_get_opaque (pk, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (!(rawmpilen == len + || rawmpilen == len + 1 + || rawmpilen == len * 2 + 1)) + return GPG_ERR_INV_OBJ; + + /* Handle compression prefixes. The size of the buffer will be + odd in this case. */ + if (rawmpilen > 1 && (rawmpilen == len + 1 || rawmpilen == len * 2 + 1)) + { + /* First check whether the public key has been given in + standard uncompressed format (SEC1). No need to recover + x in this case. */ + if (buf[0] == 0x04) + { + gcry_mpi_t x, y; + + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, + buf+1, (rawmpilen-1)/2, NULL); + if (rc) + return rc; + rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG, + buf+1+(rawmpilen-1)/2, (rawmpilen-1)/2,NULL); + if (rc) + { + mpi_free (x); + return rc; + } + + if (r_encpk) + { + rc = eddsa_encode_x_y (x, y, ctx->nbits, 0, + r_encpk, r_encpklen); + if (rc) + { + mpi_free (x); + mpi_free (y); + return rc; + } + } + mpi_snatch (result->x, x); + mpi_snatch (result->y, y); + mpi_set_ui (result->z, 1); + return 0; + } + + /* Check whether the public key has been prefixed with a 0x40 + byte to explicitly indicate compressed format using a SEC1 + alike prefix byte. This is a Libgcrypt extension. */ + if (buf[0] == 0x40) + { + rawmpilen--; + buf++; + } + } + + /* EdDSA compressed point. */ + rawmpi = xtrymalloc (rawmpilen); + if (!rawmpi) + return gpg_err_code_from_syserror (); + memcpy (rawmpi, buf, rawmpilen); + reverse_buffer (rawmpi, rawmpilen); + } + else + { + /* Note: Without using an opaque MPI it is not reliable possible + to find out whether the public key has been given in + uncompressed format. Thus we expect native EdDSA format. */ + rawmpi = _gcry_mpi_get_buffer (pk, (ctx->nbits+7)/8, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + } + + if (rawmpilen) + { + sign = !!(rawmpi[0] & 0x80); + rawmpi[0] &= 0x7f; + } + else + sign = 0; + _gcry_mpi_set_buffer (result->y, rawmpi, rawmpilen, 0); + if (r_encpk) + { + /* Revert to little endian. */ + if (sign && rawmpilen) + rawmpi[0] |= 0x80; + reverse_buffer (rawmpi, rawmpilen); + *r_encpk = rawmpi; + if (r_encpklen) + *r_encpklen = rawmpilen; + } + else + xfree (rawmpi); + + rc = _gcry_ecc_eddsa_recover_x (result->x, result->y, sign, ctx); + mpi_set_ui (result->z, 1); + + return rc; +} + + +/* Compute the A value as used by EdDSA. The caller needs to provide + the context EC and the actual secret D as an MPI. The function + returns a newly allocated 64 byte buffer at r_digest; the first 32 + bytes represent the A value. NULL is returned on error and NULL + stored at R_DIGEST. */ +gpg_err_code_t +_gcry_ecc_eddsa_compute_h_d (unsigned char **r_digest, mpi_ec_t ec) +{ + gpg_err_code_t rc; + unsigned char *rawmpi = NULL; + unsigned int rawmpilen; + unsigned char *digest; + int hashalgo, b; + + *r_digest = NULL; + + b = (ec->nbits+7)/8; + + /* + * Choice of hashalgo is curve specific. + * For now, it's determine by the bit size of the field. + */ + if (ec->nbits == 255) + hashalgo = GCRY_MD_SHA512; + else if (ec->nbits == 448) + { + b++; + hashalgo = GCRY_MD_SHAKE256; + } + else + return GPG_ERR_NOT_IMPLEMENTED; + + /* Note that we clear DIGEST so we can use it as input to left pad + the key with zeroes for hashing. */ + digest = xtrycalloc_secure (2, b); + if (!digest) + return gpg_err_code_from_syserror (); + + rawmpi = _gcry_mpi_get_buffer (ec->d, 0, &rawmpilen, NULL); + if (!rawmpi) + { + xfree (digest); + return gpg_err_code_from_syserror (); + } + + if (hashalgo == GCRY_MD_SHAKE256) + { + gcry_error_t err; + gcry_md_hd_t hd; + + err = _gcry_md_open (&hd, hashalgo, 0); + if (err) + rc = gcry_err_code (err); + else + { + _gcry_md_write (hd, rawmpi, rawmpilen); + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b); + _gcry_md_close (hd); + rc = 0; + } + } + else + { + gcry_buffer_t hvec[2]; + + memset (hvec, 0, sizeof hvec); + + hvec[0].data = digest; + hvec[0].len = b > rawmpilen? b - rawmpilen : 0; + hvec[1].data = rawmpi; + hvec[1].len = rawmpilen; + rc = _gcry_md_hash_buffers (hashalgo, 0, digest, hvec, 2); + } + + xfree (rawmpi); + if (rc) + { + xfree (digest); + return rc; + } + + /* Compute the A value. */ + reverse_buffer (digest, b); /* Only the first half of the hash. */ + + /* Field specific handling of clearing/setting bits. */ + if (ec->nbits == 255) + { + digest[0] = (digest[0] & 0x7f) | 0x40; + digest[31] &= 0xf8; + } + else + { + digest[0] = 0; + digest[1] |= 0x80; + digest[56] &= 0xfc; + } + + *r_digest = digest; + return 0; +} + + +/** + * _gcry_ecc_eddsa_genkey - EdDSA version of the key generation. + * + * @ec: Elliptic curve computation context. + * @flags: Flags controlling aspects of the creation. + * + * Return: An error code. + * + * The only @flags bit used by this function is %PUBKEY_FLAG_TRANSIENT + * to use a faster RNG. + */ +gpg_err_code_t +_gcry_ecc_eddsa_genkey (mpi_ec_t ec, int flags) +{ + gpg_err_code_t rc; + int b; + gcry_mpi_t a, x, y; + mpi_point_struct Q; + gcry_random_level_t random_level; + char *dbuf; + size_t dlen; + unsigned char *hash_d = NULL; + + point_init (&Q); + + if ((flags & PUBKEY_FLAG_TRANSIENT_KEY)) + random_level = GCRY_STRONG_RANDOM; + else + random_level = GCRY_VERY_STRONG_RANDOM; + + b = (ec->nbits+7)/8; + + if (ec->nbits == 255) + ; + else if (ec->nbits == 448) + b++; + else + return GPG_ERR_NOT_IMPLEMENTED; + + dlen = b; + + a = mpi_snew (0); + x = mpi_new (0); + y = mpi_new (0); + + /* Generate a secret. */ + dbuf = _gcry_random_bytes_secure (dlen, random_level); + ec->d = _gcry_mpi_set_opaque (NULL, dbuf, dlen*8); + rc = _gcry_ecc_eddsa_compute_h_d (&hash_d, ec); + if (rc) + goto leave; + + _gcry_mpi_set_buffer (a, hash_d, b, 0); + xfree (hash_d); + /* log_printmpi ("ecgen a", a); */ + + /* Compute Q. */ + _gcry_mpi_ec_mul_point (&Q, a, ec->G, ec); + if (DBG_CIPHER) + log_printpnt ("ecgen pk", &Q, ec); + + ec->Q = mpi_point_snatch_set (NULL, Q.x, Q.y, Q.z); + Q.x = NULL; + Q.y = NULL; + Q.x = NULL; + + leave: + _gcry_mpi_release (a); + _gcry_mpi_release (x); + _gcry_mpi_release (y); + return rc; +} + + +/* Compute an EdDSA signature. See: + * [ed25519] 23pp. (PDF) Daniel J. Bernstein, Niels Duif, Tanja + * Lange, Peter Schwabe, Bo-Yin Yang. High-speed high-security + * signatures. Journal of Cryptographic Engineering 2 (2012), 77-89. + * Document ID: a1a62a2f76d23f65d622484ddd09caf8. + * URL: http://cr.yp.to/papers.html#ed25519. Date: 2011.09.26. + * + * Despite that this function requires the specification of a hash + * algorithm, we only support what has been specified by the paper. + * This may change in the future. + * + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R_R and S. + */ + +/* String to be used with Ed448 */ +#define DOM25519 "SigEd25519 no Ed25519 collisions" +#define DOM25519_LEN 32 +#define DOM448 "SigEd448" +#define DOM448_LEN 8 + +gpg_err_code_t +_gcry_ecc_eddsa_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r_r, gcry_mpi_t s, + struct pk_encoding_ctx *ctx) +{ + int rc; + unsigned int tmp; + unsigned char *digest = NULL; + const void *mbuf; + size_t mlen; + unsigned char *rawmpi = NULL; + unsigned int rawmpilen; + unsigned char *encpk = NULL; /* Encoded public key. */ + unsigned int encpklen; + mpi_point_struct I; /* Intermediate value. */ + gcry_mpi_t a, x, y, r; + int b; + unsigned char x_olen[2]; + unsigned char prehashed_msg[64]; + + b = (ec->nbits+7)/8; + + if (ec->nbits == 255) + ; + else if (ec->nbits == 448) + b++; + else + return GPG_ERR_NOT_IMPLEMENTED; + + if (!mpi_is_opaque (input)) + return GPG_ERR_INV_DATA; + + /* Initialize some helpers. */ + point_init (&I); + a = mpi_snew (0); + x = mpi_new (0); + y = mpi_new (0); + r = mpi_snew (0); + + rc = _gcry_ecc_eddsa_compute_h_d (&digest, ec); + if (rc) + goto leave; + _gcry_mpi_set_buffer (a, digest, b, 0); + + /* Compute the public key if it's not available (only secret part). */ + if (ec->Q == NULL) + { + mpi_point_struct Q; + + point_init (&Q); + _gcry_mpi_ec_mul_point (&Q, a, ec->G, ec); + ec->Q = mpi_point_snatch_set (NULL, Q.x, Q.y, Q.z); + } + rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, x, y, 0, &encpk, &encpklen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex (" e_pk", encpk, encpklen); + + /* Compute R. */ + mbuf = mpi_get_opaque (input, &tmp); + mlen = (tmp +7)/8; + if (DBG_CIPHER) + log_printhex (" m", mbuf, mlen); + + if (ctx->hash_algo == GCRY_MD_SHAKE256) + { + gcry_error_t err; + gcry_md_hd_t hd; + + err = _gcry_md_open (&hd, ctx->hash_algo, 0); + if (err) + rc = gcry_err_code (err); + else + { + _gcry_md_write (hd, DOM448, DOM448_LEN); + x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH); + x_olen[1] = ctx->labellen; + _gcry_md_write (hd, x_olen, 2); + if (ctx->labellen) + _gcry_md_write (hd, ctx->label, ctx->labellen); + _gcry_md_write (hd, digest+b, b); + if ((ctx->flags & PUBKEY_FLAG_PREHASH)) + { + gcry_md_hd_t hd2; + + err = _gcry_md_open (&hd2, ctx->hash_algo, 0); + if (err) + { + rc = gcry_err_code (err); + _gcry_md_close (hd); + goto leave; + } + _gcry_md_write (hd2, mbuf, mlen); + _gcry_md_ctl (hd2, GCRYCTL_FINALIZE, NULL, 0); + _gcry_md_extract (hd2, GCRY_MD_SHAKE256, prehashed_msg, 64); + _gcry_md_close (hd2); + _gcry_md_write (hd, prehashed_msg, 64); + } + else + _gcry_md_write (hd, mbuf, mlen); + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b); + _gcry_md_close (hd); + rc = 0; + } + } + else + { + gcry_buffer_t hvec[6]; + int i = 0; + + memset (hvec, 0, sizeof hvec); + + if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen) + { + hvec[i].data = (void *)DOM25519; + hvec[i].len = DOM25519_LEN; + i++; + x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH); + x_olen[1] = ctx->labellen; + hvec[i].data = x_olen; + hvec[i].len = 2; + i++; + if (ctx->labellen) + { + hvec[i].data = ctx->label; + hvec[i].len = ctx->labellen; + i++; + } + } + + hvec[i].data = digest; + hvec[i].off = b; + hvec[i].len = b; + i++; + if ((ctx->flags & PUBKEY_FLAG_PREHASH)) + { + _gcry_md_hash_buffer (ctx->hash_algo, prehashed_msg, mbuf, mlen); + hvec[i].data = (char*)prehashed_msg; + hvec[i].len = 64; + } + else + { + hvec[i].data = (char*)mbuf; + hvec[i].len = mlen; + } + i++; + rc = _gcry_md_hash_buffers (ctx->hash_algo, 0, digest, hvec, i); + } + + if (rc) + goto leave; + reverse_buffer (digest, 2*b); + if (DBG_CIPHER) + log_printhex (" r", digest, 2*b); + _gcry_mpi_set_buffer (r, digest, 2*b, 0); + mpi_mod (r, r, ec->n); + _gcry_mpi_ec_mul_point (&I, r, ec->G, ec); + if (DBG_CIPHER) + log_printpnt (" r", &I, ec); + + /* Convert R into affine coordinates and apply encoding. */ + rc = _gcry_ecc_eddsa_encodepoint (&I, ec, x, y, 0, &rawmpi, &rawmpilen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex (" e_r", rawmpi, rawmpilen); + + if (ctx->hash_algo == GCRY_MD_SHAKE256) + { + gcry_error_t err; + gcry_md_hd_t hd; + + err = _gcry_md_open (&hd, ctx->hash_algo, 0); + if (err) + rc = gcry_err_code (err); + else + { + _gcry_md_write (hd, DOM448, DOM448_LEN); + x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH); + x_olen[1] = ctx->labellen; + _gcry_md_write (hd, x_olen, 2); + if (ctx->labellen) + _gcry_md_write (hd, ctx->label, ctx->labellen); + _gcry_md_write (hd, rawmpi, rawmpilen); + _gcry_md_write (hd, encpk, encpklen); + if ((ctx->flags & PUBKEY_FLAG_PREHASH)) + _gcry_md_write (hd, prehashed_msg, 64); + else + _gcry_md_write (hd, mbuf, mlen); + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b); + _gcry_md_close (hd); + rc = 0; + } + } + else + { + gcry_buffer_t hvec[6]; + int i = 0; + + memset (hvec, 0, sizeof hvec); + + if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen) + { + hvec[i].data = (void *)DOM25519; + hvec[i].len = DOM25519_LEN; + i++; + x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH); + x_olen[1] = ctx->labellen; + hvec[i].data = x_olen; + hvec[i].len = 2; + i++; + if (ctx->labellen) + { + hvec[i].data = ctx->label; + hvec[i].len = ctx->labellen; + i++; + } + } + + /* S = r + a * H(dom2(F,C)+encodepoint(R)+encodepoint(pk)+m) mod n */ + hvec[i].data = rawmpi; /* (this is R) */ + hvec[i].len = rawmpilen; + i++; + hvec[i].data = encpk; + hvec[i].len = encpklen; + i++; + if ((ctx->flags & PUBKEY_FLAG_PREHASH)) + { + hvec[i].data = (char*)prehashed_msg; + hvec[i].len = 64; + } + else + { + hvec[i].data = (char*)mbuf; + hvec[i].len = mlen; + } + i++; + rc = _gcry_md_hash_buffers (ctx->hash_algo, 0, digest, hvec, i); + } + + if (rc) + goto leave; + + /* No more need for RAWMPI thus we now transfer it to R_R. */ + mpi_set_opaque (r_r, rawmpi, rawmpilen*8); + rawmpi = NULL; + + reverse_buffer (digest, 2*b); + if (DBG_CIPHER) + log_printhex (" H(R+)", digest, 2*b); + _gcry_mpi_set_buffer (s, digest, 2*b, 0); + mpi_mulm (s, s, a, ec->n); + mpi_addm (s, s, r, ec->n); + rc = eddsa_encodempi (s, ec->nbits, &rawmpi, &rawmpilen); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printhex (" e_s", rawmpi, rawmpilen); + mpi_set_opaque (s, rawmpi, rawmpilen*8); + rawmpi = NULL; + + rc = 0; + + leave: + _gcry_mpi_release (a); + _gcry_mpi_release (x); + _gcry_mpi_release (y); + _gcry_mpi_release (r); + xfree (digest); + point_free (&I); + xfree (encpk); + xfree (rawmpi); + return rc; +} + + +/* Verify an EdDSA signature. See sign_eddsa for the reference. + * Check if R_IN and S_IN verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_eddsa_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r_in, gcry_mpi_t s_in, + struct pk_encoding_ctx *ctx) +{ + int rc; + int b; + unsigned int tmp; + unsigned char *encpk = NULL; /* Encoded public key. */ + unsigned int encpklen; + const void *mbuf, *rbuf; + unsigned char *tbuf = NULL; + size_t mlen, rlen; + unsigned int tlen; + unsigned char digest[114]; + gcry_mpi_t h, s; + mpi_point_struct Ia, Ib; + unsigned char x_olen[2]; + unsigned char prehashed_msg[64]; + + if (!mpi_is_opaque (input) || !mpi_is_opaque (r_in) || !mpi_is_opaque (s_in)) + return GPG_ERR_INV_DATA; + + point_init (&Ia); + point_init (&Ib); + h = mpi_new (0); + s = mpi_new (0); + + b = (ec->nbits+7)/8; + + if (ec->nbits == 255) + ; + else if (ec->nbits == 448) + b++; + else + return GPG_ERR_NOT_IMPLEMENTED; + + /* Encode and check the public key. */ + rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0, + &encpk, &encpklen); + if (rc) + goto leave; + if (!_gcry_mpi_ec_curve_point (ec->Q, ec)) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + if (DBG_CIPHER) + log_printhex (" e_pk", encpk, encpklen); + if (encpklen != b) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + + /* Convert the other input parameters. */ + mbuf = mpi_get_opaque (input, &tmp); + mlen = (tmp +7)/8; + if (DBG_CIPHER) + log_printhex (" m", mbuf, mlen); + rbuf = mpi_get_opaque (r_in, &tmp); + rlen = (tmp +7)/8; + if (DBG_CIPHER) + log_printhex (" r", rbuf, rlen); + if (rlen != b) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + + if (ctx->hash_algo == GCRY_MD_SHAKE256) + { + gcry_error_t err; + gcry_md_hd_t hd; + + err = _gcry_md_open (&hd, ctx->hash_algo, 0); + if (err) + rc = gcry_err_code (err); + else + { + _gcry_md_write (hd, DOM448, DOM448_LEN); + x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH); + x_olen[1] = ctx->labellen; + _gcry_md_write (hd, x_olen, 2); + if (ctx->labellen) + _gcry_md_write (hd, ctx->label, ctx->labellen); + _gcry_md_write (hd, rbuf, rlen); + _gcry_md_write (hd, encpk, encpklen); + if ((ctx->flags & PUBKEY_FLAG_PREHASH)) + { + gcry_md_hd_t hd2; + + err = _gcry_md_open (&hd2, ctx->hash_algo, 0); + if (err) + { + rc = gcry_err_code (err); + _gcry_md_close (hd); + goto leave; + } + _gcry_md_write (hd2, mbuf, mlen); + _gcry_md_ctl (hd2, GCRYCTL_FINALIZE, NULL, 0); + _gcry_md_extract (hd2, GCRY_MD_SHAKE256, prehashed_msg, 64); + _gcry_md_close (hd2); + _gcry_md_write (hd, prehashed_msg, 64); + } + else + _gcry_md_write (hd, mbuf, mlen); + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + _gcry_md_extract (hd, GCRY_MD_SHAKE256, digest, 2*b); + _gcry_md_close (hd); + rc = 0; + } + } + else + { + gcry_buffer_t hvec[6]; + int i = 0; + + memset (hvec, 0, sizeof hvec); + + /* h = H(dom2(F,C)+encodepoint(R)+encodepoint(pk)+m) */ + if ((ctx->flags & PUBKEY_FLAG_PREHASH) || ctx->labellen) + { + hvec[i].data = (void *)DOM25519; + hvec[i].len = DOM25519_LEN; + i++; + x_olen[0] = !!(ctx->flags & PUBKEY_FLAG_PREHASH); + x_olen[1] = ctx->labellen; + hvec[i].data = x_olen; + hvec[i].len = 2; + i++; + if (ctx->labellen) + { + hvec[i].data = ctx->label; + hvec[i].len = ctx->labellen; + i++; + } + } + + hvec[i].data = (char*)rbuf; + hvec[i].len = rlen; + i++; + hvec[i].data = encpk; + hvec[i].len = encpklen; + i++; + if ((ctx->flags & PUBKEY_FLAG_PREHASH)) + { + _gcry_md_hash_buffer (ctx->hash_algo, prehashed_msg, mbuf, mlen); + hvec[i].data = (char*)prehashed_msg; + hvec[i].len = 64; + } + else + { + hvec[i].data = (char*)mbuf; + hvec[i].len = mlen; + } + i++; + rc = _gcry_md_hash_buffers (ctx->hash_algo, 0, digest, hvec, i); + } + + if (rc) + goto leave; + reverse_buffer (digest, 2*b); + if (DBG_CIPHER) + log_printhex (" H(R+)", digest, 2*b); + _gcry_mpi_set_buffer (h, digest, 2*b, 0); + + /* According to the paper the best way for verification is: + encodepoint(sG - h·Q) = encodepoint(r) + because we don't need to decode R. */ + { + void *sbuf; + unsigned int slen; + + sbuf = _gcry_mpi_get_opaque_copy (s_in, &tmp); + slen = (tmp +7)/8; + reverse_buffer (sbuf, slen); + if (DBG_CIPHER) + log_printhex (" s", sbuf, slen); + _gcry_mpi_set_buffer (s, sbuf, slen, 0); + xfree (sbuf); + if (slen != b) + { + rc = GPG_ERR_INV_LENGTH; + goto leave; + } + } + + _gcry_mpi_ec_mul_point (&Ia, s, ec->G, ec); + _gcry_mpi_ec_mul_point (&Ib, h, ec->Q, ec); + _gcry_mpi_sub (Ib.x, ec->p, Ib.x); + _gcry_mpi_ec_add_points (&Ia, &Ia, &Ib, ec); + rc = _gcry_ecc_eddsa_encodepoint (&Ia, ec, s, h, 0, &tbuf, &tlen); + if (rc) + goto leave; + if (tlen != rlen || memcmp (tbuf, rbuf, tlen)) + { + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + rc = 0; + + leave: + xfree (encpk); + xfree (tbuf); + _gcry_mpi_release (s); + _gcry_mpi_release (h); + point_free (&Ia); + point_free (&Ib); + return rc; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc-gost.c b/comm/third_party/libgcrypt/cipher/ecc-gost.c new file mode 100644 index 0000000000..36230f8a32 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-gost.c @@ -0,0 +1,218 @@ +/* ecc-gots.c - Elliptic Curve GOST signatures + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" +#include "pubkey-internal.h" + + +/* Compute an GOST R 34.10-01/-12 signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_gost_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t rc = 0; + gcry_mpi_t k, dr, sum, ke, x, e; + mpi_point_struct I; + gcry_mpi_t hash; + unsigned int qbits; + + if (DBG_CIPHER) + log_mpidump ("gost sign hash ", input ); + + qbits = mpi_get_nbits (ec->n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + k = NULL; + dr = mpi_alloc (0); + sum = mpi_alloc (0); + ke = mpi_alloc (0); + e = mpi_alloc (0); + x = mpi_alloc (0); + point_init (&I); + + mpi_mod (e, input, ec->n); /* e = hash mod n */ + + if (!mpi_cmp_ui (e, 0)) + mpi_set_ui (e, 1); + + /* Two loops to avoid R or S are zero. This is more of a joke than + a real demand because the probability of them being zero is less + than any hardware failure. Some specs however require it. */ + do + { + do + { + mpi_free (k); + k = _gcry_dsa_gen_k (ec->n, GCRY_STRONG_RANDOM); + + _gcry_dsa_modify_k (k, ec->n, qbits); + + _gcry_mpi_ec_mul_point (&I, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x, NULL, &I, ec)) + { + if (DBG_CIPHER) + log_debug ("ecc sign: Failed to get affine coordinates\n"); + rc = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (r, x, ec->n); /* r = x mod n */ + } + while (!mpi_cmp_ui (r, 0)); + mpi_mulm (dr, ec->d, r, ec->n); /* dr = d*r mod n */ + mpi_mulm (ke, k, e, ec->n); /* ke = k*e mod n */ + mpi_addm (s, ke, dr, ec->n); /* sum = (k*e+ d*r) mod n */ + } + while (!mpi_cmp_ui (s, 0)); + + if (DBG_CIPHER) + { + log_mpidump ("gost sign result r ", r); + log_mpidump ("gost sign result s ", s); + } + + leave: + point_free (&I); + mpi_free (x); + mpi_free (e); + mpi_free (ke); + mpi_free (sum); + mpi_free (dr); + mpi_free (k); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify a GOST R 34.10-01/-12 signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_gost_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t e, x, z1, z2, v, rv, zero; + mpi_point_struct Q, Q1, Q2; + + if (!_gcry_mpi_ec_curve_point (ec->Q, ec)) + return GPG_ERR_BROKEN_PUBKEY; + + if( !(mpi_cmp_ui (r, 0) > 0 && mpi_cmp (r, ec->n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < r < n failed. */ + if( !(mpi_cmp_ui (s, 0) > 0 && mpi_cmp (s, ec->n) < 0) ) + return GPG_ERR_BAD_SIGNATURE; /* Assertion 0 < s < n failed. */ + + x = mpi_alloc (0); + e = mpi_alloc (0); + z1 = mpi_alloc (0); + z2 = mpi_alloc (0); + v = mpi_alloc (0); + rv = mpi_alloc (0); + zero = mpi_alloc (0); + + point_init (&Q); + point_init (&Q1); + point_init (&Q2); + + mpi_mod (e, input, ec->n); /* e = hash mod n */ + if (!mpi_cmp_ui (e, 0)) + mpi_set_ui (e, 1); + mpi_invm (v, e, ec->n); /* v = e^(-1) (mod n) */ + mpi_mulm (z1, s, v, ec->n); /* z1 = s*v (mod n) */ + mpi_mulm (rv, r, v, ec->n); /* rv = r*v (mod n) */ + mpi_subm (z2, zero, rv, ec->n); /* z2 = -r*v (mod n) */ + + _gcry_mpi_ec_mul_point (&Q1, z1, ec->G, ec); +/* log_mpidump ("Q1.x", Q1.x); */ +/* log_mpidump ("Q1.y", Q1.y); */ +/* log_mpidump ("Q1.z", Q1.z); */ + _gcry_mpi_ec_mul_point (&Q2, z2, ec->Q, ec); +/* log_mpidump ("Q2.x", Q2.x); */ +/* log_mpidump ("Q2.y", Q2.y); */ +/* log_mpidump ("Q2.z", Q2.z); */ + _gcry_mpi_ec_add_points (&Q, &Q1, &Q2, ec); +/* log_mpidump (" Q.x", Q.x); */ +/* log_mpidump (" Q.y", Q.y); */ +/* log_mpidump (" Q.z", Q.z); */ + + if (!mpi_cmp_ui (Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Rejected\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (_gcry_mpi_ec_get_affine (x, NULL, &Q, ec)) + { + if (DBG_CIPHER) + log_debug ("ecc verify: Failed to get affine coordinates\n"); + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + mpi_mod (x, x, ec->n); /* x = x mod E_n */ + if (mpi_cmp (x, r)) /* x != r */ + { + if (DBG_CIPHER) + { + log_mpidump (" x", x); + log_mpidump (" r", r); + log_mpidump (" s", s); + log_debug ("ecc verify: Not verified\n"); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (DBG_CIPHER) + log_debug ("ecc verify: Accepted\n"); + + leave: + point_free (&Q2); + point_free (&Q1); + point_free (&Q); + mpi_free (zero); + mpi_free (rv); + mpi_free (v); + mpi_free (z2); + mpi_free (z1); + mpi_free (x); + mpi_free (e); + return err; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc-misc.c b/comm/third_party/libgcrypt/cipher/ecc-misc.c new file mode 100644 index 0000000000..6470a83bf4 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-misc.c @@ -0,0 +1,438 @@ +/* ecc-misc.c - Elliptic Curve miscellaneous functions + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "ecc-common.h" + + +/* + * Release a curve object. + */ +void +_gcry_ecc_curve_free (elliptic_curve_t *E) +{ + mpi_free (E->p); E->p = NULL; + mpi_free (E->a); E->a = NULL; + mpi_free (E->b); E->b = NULL; + _gcry_mpi_point_free_parts (&E->G); + mpi_free (E->n); E->n = NULL; +} + + +/* + * Return a copy of a curve object. + */ +elliptic_curve_t +_gcry_ecc_curve_copy (elliptic_curve_t E) +{ + elliptic_curve_t R; + + R.model = E.model; + R.dialect = E.dialect; + R.name = E.name; + R.p = mpi_copy (E.p); + R.a = mpi_copy (E.a); + R.b = mpi_copy (E.b); + _gcry_mpi_point_init (&R.G); + point_set (&R.G, &E.G); + R.n = mpi_copy (E.n); + R.h = E.h; + + return R; +} + + +/* + * Return a description of the curve model. + */ +const char * +_gcry_ecc_model2str (enum gcry_mpi_ec_models model) +{ + const char *str = "?"; + switch (model) + { + case MPI_EC_WEIERSTRASS: str = "Weierstrass"; break; + case MPI_EC_MONTGOMERY: str = "Montgomery"; break; + case MPI_EC_EDWARDS: str = "Edwards"; break; + } + return str; +} + + +/* + * Return a description of the curve dialect. + */ +const char * +_gcry_ecc_dialect2str (enum ecc_dialects dialect) +{ + const char *str = "?"; + switch (dialect) + { + case ECC_DIALECT_STANDARD: str = "Standard"; break; + case ECC_DIALECT_ED25519: str = "Ed25519"; break; + case ECC_DIALECT_SAFECURVE: str = "SafeCurve"; break; + } + return str; +} + + +gcry_mpi_t +_gcry_ecc_ec2os (gcry_mpi_t x, gcry_mpi_t y, gcry_mpi_t p) +{ + gpg_err_code_t rc; + int pbytes = (mpi_get_nbits (p)+7)/8; + size_t n; + unsigned char *buf, *ptr; + + buf = xmalloc ( 1 + 2*pbytes ); + *buf = 04; /* Uncompressed point. */ + ptr = buf+1; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, x); + if (rc) + log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc)); + if (n < pbytes) + { + memmove (ptr+(pbytes-n), ptr, n); + memset (ptr, 0, (pbytes-n)); + } + ptr += pbytes; + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, ptr, pbytes, &n, y); + if (rc) + log_fatal ("mpi_print failed: %s\n", gpg_strerror (rc)); + if (n < pbytes) + { + memmove (ptr+(pbytes-n), ptr, n); + memset (ptr, 0, (pbytes-n)); + } + + return mpi_set_opaque (NULL, buf, (1+2*pbytes)*8); +} + + +/* Convert POINT into affine coordinates using the context CTX and + return a newly allocated MPI. If the conversion is not possible + NULL is returned. This function won't print an error message. */ +gcry_mpi_t +_gcry_mpi_ec_ec2os (gcry_mpi_point_t point, mpi_ec_t ec) +{ + gcry_mpi_t g_x, g_y, result; + + g_x = mpi_new (0); + g_y = mpi_new (0); + if (_gcry_mpi_ec_get_affine (g_x, g_y, point, ec)) + result = NULL; + else + result = _gcry_ecc_ec2os (g_x, g_y, ec->p); + mpi_free (g_x); + mpi_free (g_y); + + return result; +} + + +/* Decode octet string in VALUE into RESULT, in the format defined by SEC 1. + RESULT must have been initialized and is set on success to the + point given by VALUE. */ +gpg_err_code_t +_gcry_ecc_sec_decodepoint (gcry_mpi_t value, mpi_ec_t ec, mpi_point_t result) +{ + gpg_err_code_t rc; + size_t n; + const unsigned char *buf; + unsigned char *buf_memory; + gcry_mpi_t x, y; + + if (mpi_is_opaque (value)) + { + unsigned int nbits; + + buf = mpi_get_opaque (value, &nbits); + if (!buf) + return GPG_ERR_INV_OBJ; + n = (nbits + 7)/8; + buf_memory = NULL; + } + else + { + n = (mpi_get_nbits (value)+7)/8; + buf_memory = xmalloc (n); + rc = _gcry_mpi_print (GCRYMPI_FMT_USG, buf_memory, n, &n, value); + if (rc) + { + xfree (buf_memory); + return rc; + } + buf = buf_memory; + } + + if (n < 1) + { + xfree (buf_memory); + return GPG_ERR_INV_OBJ; + } + + if (*buf == 2 || *buf == 3) + { + gcry_mpi_t x3; + gcry_mpi_t t; + gcry_mpi_t p1_4; + int y_bit = (*buf == 3); + + if (!mpi_test_bit (ec->p, 1)) + { + xfree (buf_memory); + return GPG_ERR_NOT_IMPLEMENTED; /* No support for point compression. */ + } + + n = n - 1; + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL); + xfree (buf_memory); + if (rc) + return rc; + + /* + * Recover Y. The Weierstrass curve: y^2 = x^3 + a*x + b + */ + + x3 = mpi_new (0); + t = mpi_new (0); + p1_4 = mpi_new (0); + y = mpi_new (0); + + /* Compute right hand side. */ + mpi_powm (x3, x, mpi_const (MPI_C_THREE), ec->p); + mpi_mul (t, ec->a, x); + mpi_mod (t, t, ec->p); + mpi_add (t, t, ec->b); + mpi_mod (t, t, ec->p); + mpi_add (t, t, x3); + mpi_mod (t, t, ec->p); + + /* + * When p mod 4 = 3, modular square root of A can be computed by + * A^((p+1)/4) mod p + */ + + /* Compute (p+1)/4 into p1_4 */ + mpi_rshift (p1_4, ec->p, 2); + _gcry_mpi_add_ui (p1_4, p1_4, 1); + + mpi_powm (y, t, p1_4, ec->p); + + if (y_bit != mpi_test_bit (y, 0)) + mpi_sub (y, ec->p, y); + + mpi_free (p1_4); + mpi_free (t); + mpi_free (x3); + } + else if (*buf == 4) + { + if ( ((n-1)%2) ) + { + xfree (buf_memory); + return GPG_ERR_INV_OBJ; + } + n = (n-1)/2; + rc = _gcry_mpi_scan (&x, GCRYMPI_FMT_USG, buf+1, n, NULL); + if (rc) + { + xfree (buf_memory); + return rc; + } + rc = _gcry_mpi_scan (&y, GCRYMPI_FMT_USG, buf+1+n, n, NULL); + xfree (buf_memory); + if (rc) + { + mpi_free (x); + return rc; + } + } + else + { + xfree (buf_memory); + return GPG_ERR_INV_OBJ; + } + + mpi_set (result->x, x); + mpi_set (result->y, y); + mpi_set_ui (result->z, 1); + + mpi_free (x); + mpi_free (y); + + return 0; +} + + +/* Compute the public key from the the context EC. Obviously a + requirement is that the secret key is available in EC. On success + Q is returned; on error NULL. If Q is NULL a newly allocated point + is returned. If G or D are given they override the values taken + from EC. */ +mpi_point_t +_gcry_ecc_compute_public (mpi_point_t Q, mpi_ec_t ec) +{ + if (!ec->d || !ec->G || !ec->p || !ec->a) + return NULL; + if (ec->model == MPI_EC_EDWARDS && !ec->b) + return NULL; + + if ((ec->dialect == ECC_DIALECT_ED25519 && (ec->flags & PUBKEY_FLAG_EDDSA)) + || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)) + { + gcry_mpi_t a; + unsigned char *digest; + + if (_gcry_ecc_eddsa_compute_h_d (&digest, ec)) + return NULL; + + a = mpi_snew (0); + _gcry_mpi_set_buffer (a, digest, 32, 0); + xfree (digest); + + /* And finally the public key. */ + if (!Q) + Q = mpi_point_new (0); + if (Q) + _gcry_mpi_ec_mul_point (Q, a, ec->G, ec); + mpi_free (a); + } + else + { + if (!Q) + Q = mpi_point_new (0); + if (Q) + _gcry_mpi_ec_mul_point (Q, ec->d, ec->G, ec); + } + + return Q; +} + + +gpg_err_code_t +_gcry_ecc_mont_encodepoint (gcry_mpi_t x, unsigned int nbits, + int with_prefix, + unsigned char **r_buffer, unsigned int *r_buflen) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer_extra (x, (nbits+7)/8, + with_prefix? -1 : 0, &rawmpilen, NULL); + if (rawmpi == NULL) + return gpg_err_code_from_syserror (); + + if (with_prefix) + { + rawmpi[0] = 0x40; + rawmpilen++; + } + + *r_buffer = rawmpi; + *r_buflen = rawmpilen; + return 0; +} + + +gpg_err_code_t +_gcry_ecc_mont_decodepoint (gcry_mpi_t pk, mpi_ec_t ec, mpi_point_t result) +{ + unsigned char *rawmpi; + unsigned int rawmpilen; + unsigned int nbytes = (ec->nbits+7)/8; + + /* + * It is not reliable to assume that the first byte of 0x40 + * means the prefix. + * + * For newer implementation, it is reliable since we always put + * 0x40 for x-only coordinate. + * + * For data by older implementation (non-released development + * version in 2015), there is no 0x40 prefix added. + * + * So, it is possible to have shorter length of data when it was + * handled as MPI, removing preceding zeros. + * + * Besides, when data was parsed as MPI, we might have 0x00 + * prefix (when the MSB in the first byte is set). + */ + + if (mpi_is_opaque (pk)) + { + const unsigned char *buf; + unsigned char *p; + + buf = mpi_get_opaque (pk, &rawmpilen); + if (!buf) + return GPG_ERR_INV_OBJ; + rawmpilen = (rawmpilen + 7)/8; + + if (rawmpilen > nbytes + && (buf[0] == 0x00 || buf[0] == 0x40)) + { + rawmpilen--; + buf++; + } + + rawmpi = xtrymalloc (nbytes); + if (!rawmpi) + return gpg_err_code_from_syserror (); + + p = rawmpi + rawmpilen; + while (p > rawmpi) + *--p = *buf++; + + if (rawmpilen < nbytes) + memset (rawmpi + nbytes - rawmpilen, 0, nbytes - rawmpilen); + } + else + { + rawmpi = _gcry_mpi_get_buffer (pk, nbytes, &rawmpilen, NULL); + if (!rawmpi) + return gpg_err_code_from_syserror (); + /* + * When we have the prefix (0x40 or 0x00), it comes at the end, + * since it is taken by _gcry_mpi_get_buffer with little endian. + * Just setting RAWMPILEN to NBYTES is enough in this case. + * Othewise, RAWMPILEN is NBYTES already. + */ + rawmpilen = nbytes; + } + + if ((ec->nbits % 8)) + rawmpi[0] &= (1 << (ec->nbits % 8)) - 1; + _gcry_mpi_set_buffer (result->x, rawmpi, rawmpilen, 0); + xfree (rawmpi); + mpi_set_ui (result->z, 1); + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc-sm2.c b/comm/third_party/libgcrypt/cipher/ecc-sm2.c new file mode 100644 index 0000000000..c52629fd3f --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc-sm2.c @@ -0,0 +1,569 @@ +/* ecc-sm2.c - Elliptic Curve SM2 implementation + * Copyright (C) 2020 Tianjia Zhang + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "bithelp.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + +#define MPI_NBYTES(m) ((mpi_get_nbits(m) + 7) / 8) + + +/* Key derivation function from X9.63/SECG */ +static gpg_err_code_t +kdf_x9_63 (int algo, const void *in, size_t inlen, void *out, size_t outlen) +{ + gpg_err_code_t rc; + gcry_md_hd_t hd; + int mdlen; + u32 counter = 1; + u32 counter_be; + unsigned char *dgst; + unsigned char *pout = out; + size_t rlen = outlen; + size_t len; + + rc = _gcry_md_open (&hd, algo, 0); + if (rc) + return rc; + + mdlen = _gcry_md_get_algo_dlen (algo); + + while (rlen > 0) + { + counter_be = be_bswap32 (counter); /* cpu_to_be32 */ + counter++; + + _gcry_md_write (hd, in, inlen); + _gcry_md_write (hd, &counter_be, sizeof(counter_be)); + + dgst = _gcry_md_read (hd, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + len = mdlen < rlen ? mdlen : rlen; /* min(mdlen, rlen) */ + memcpy (pout, dgst, len); + rlen -= len; + pout += len; + + _gcry_md_reset (hd); + } + + _gcry_md_close (hd); + return rc; +} + + +/* _gcry_ecc_sm2_encrypt description: + * input: + * data[0] : octet string + * output: A new S-expression with the parameters: + * a: c1 : generated ephemeral public key (kG) + * b: c3 : Hash(x2 || IN || y2) + * c: c2 : cipher + * + * sm2_decrypt description: + * in contrast to encrypt + */ +gpg_err_code_t +_gcry_ecc_sm2_encrypt (gcry_sexp_t *r_ciph, gcry_mpi_t input, mpi_ec_t ec) +{ + gpg_err_code_t rc; + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + gcry_mpi_t k = NULL; + mpi_point_struct kG, kP; + gcry_mpi_t x1, y1; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *raw; + unsigned int rawlen; + unsigned char *cipher = NULL; + int i; + + point_init (&kG); + point_init (&kP); + x1 = mpi_new (0); + y1 = mpi_new (0); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = _gcry_mpi_get_buffer (input, 0, &inlen, NULL); + if (!in) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + cipher = xtrymalloc (inlen); + if (!cipher) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + + /* rand k in [1, n-1] */ + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &kG, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: kG can not be a Point at Infinity!\n"); + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* [k]P = (x2, y2) */ + _gcry_mpi_ec_mul_point (&kP, k, ec->Q, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* t = KDF(x2 || y2, klen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, cipher, inlen); + if (rc) + goto leave; + + /* cipher = t xor in */ + for (i = 0; i < inlen; i++) + cipher[i] ^= in[i]; + + /* hash(x2 || IN || y2) */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, in, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + rc = GPG_ERR_DIGEST_ALGO; + goto leave; + } + + if (!rc) + { + gcry_mpi_t c1; + gcry_mpi_t c3; + gcry_mpi_t c2; + + c3 = mpi_new (0); + c2 = mpi_new (0); + + c1 = _gcry_ecc_ec2os (x1, y1, ec->p); + _gcry_mpi_set_opaque_copy (c3, dgst, mdlen * 8); + _gcry_mpi_set_opaque_copy (c2, cipher, inlen * 8); + + rc = sexp_build (r_ciph, NULL, + "(enc-val(flags sm2)(sm2(a%M)(b%M)(c%M)))", + c1, c3, c2); + + mpi_free (c1); + mpi_free (c3); + mpi_free (c2); + } + +leave: + _gcry_md_close (md); + mpi_free (x2y2); + mpi_free (k); + + point_free (&kG); + point_free (&kP); + mpi_free (x1); + mpi_free (y1); + mpi_free (x2); + mpi_free (y2); + + xfree (cipher); + xfree (in); + + return rc; +} + + +gpg_err_code_t +_gcry_ecc_sm2_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t data_list, mpi_ec_t ec) +{ + gpg_err_code_t rc; + gcry_mpi_t data_c1 = NULL; + gcry_mpi_t data_c3 = NULL; + gcry_mpi_t data_c2 = NULL; + + /* + * Extract the data. + */ + rc = sexp_extract_param (data_list, NULL, "/a/b/c", + &data_c1, &data_c3, &data_c2, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("ecc_decrypt d_c1", data_c1); + log_printmpi ("ecc_decrypt d_c3", data_c3); + log_printmpi ("ecc_decrypt d_c2", data_c2); + } + + { + const int algo = GCRY_MD_SM3; + gcry_md_hd_t md = NULL; + int mdlen; + unsigned char *dgst; + mpi_point_struct c1; + mpi_point_struct kP; + gcry_mpi_t x2, y2; + gcry_mpi_t x2y2 = NULL; + unsigned char *in = NULL; + unsigned int inlen; + unsigned char *plain = NULL; + unsigned char *raw; + unsigned int rawlen; + unsigned char *c3 = NULL; + unsigned int c3_len; + int i; + + point_init (&c1); + point_init (&kP); + x2 = mpi_new (0); + y2 = mpi_new (0); + + in = mpi_get_opaque (data_c2, &inlen); + inlen = (inlen + 7) / 8; + plain = xtrymalloc (inlen); + if (!plain) + { + rc = gpg_err_code_from_syserror (); + goto leave_main; + } + + rc = _gcry_ecc_sec_decodepoint (data_c1, ec, &c1); + if (rc) + goto leave_main; + + if (!_gcry_mpi_ec_curve_point (&c1, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* [d]C1 = (x2, y2), C1 = [k]G */ + _gcry_mpi_ec_mul_point (&kP, ec->d, &c1, ec); + if (_gcry_mpi_ec_get_affine (x2, y2, &kP, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + /* t = KDF(x2 || y2, inlen) */ + x2y2 = _gcry_mpi_ec_ec2os (&kP, ec); + raw = mpi_get_opaque (x2y2, &rawlen); + rawlen = (rawlen + 7) / 8; + /* skip the prefix '0x04' */ + raw += 1; + rawlen -= 1; + rc = kdf_x9_63 (algo, raw, rawlen, plain, inlen); + if (rc) + goto leave_main; + + /* plain = C2 xor t */ + for (i = 0; i < inlen; i++) + plain[i] ^= in[i]; + + /* Hash(x2 || IN || y2) == C3 */ + mdlen = _gcry_md_get_algo_dlen (algo); + rc = _gcry_md_open (&md, algo, 0); + if (rc) + goto leave_main; + _gcry_md_write (md, raw, MPI_NBYTES(x2)); + _gcry_md_write (md, plain, inlen); + _gcry_md_write (md, raw + MPI_NBYTES(x2), MPI_NBYTES(y2)); + dgst = _gcry_md_read (md, algo); + if (dgst == NULL) + { + memset (plain, 0, inlen); + rc = GPG_ERR_DIGEST_ALGO; + goto leave_main; + } + c3 = mpi_get_opaque (data_c3, &c3_len); + c3_len = (c3_len + 7) / 8; + if (c3_len != mdlen || memcmp (dgst, c3, c3_len) != 0) + { + memset (plain, 0, inlen); + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + + if (!rc) + { + gcry_mpi_t r; + + r = mpi_new (inlen * 8); + _gcry_mpi_set_buffer (r, plain, inlen, 0); + + rc = sexp_build (r_plain, NULL, "(value %m)", r); + + mpi_free (r); + } + + leave_main: + _gcry_md_close (md); + mpi_free (x2y2); + xfree (plain); + + point_free (&c1); + point_free (&kP); + mpi_free (x2); + mpi_free (y2); + } + + leave: + _gcry_mpi_release (data_c1); + _gcry_mpi_release (data_c3); + _gcry_mpi_release (data_c2); + + return rc; +} + + +/* Compute an SM2 signature. + * Return the signature struct (r,s) from the message hash. The caller + * must have allocated R and S. + */ +gpg_err_code_t +_gcry_ecc_sm2_sign (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s, + int flags, int hashalgo) +{ + gpg_err_code_t rc = 0; + int extraloops = 0; + gcry_mpi_t hash; + const void *abuf; + unsigned int abits, qbits; + gcry_mpi_t tmp = NULL; + gcry_mpi_t k = NULL; + gcry_mpi_t rk = NULL; + mpi_point_struct kG; + gcry_mpi_t x1; + + if (DBG_CIPHER) + log_mpidump ("sm2 sign hash ", input); + + qbits = mpi_get_nbits (ec->n); + + /* Convert the INPUT into an MPI if needed. */ + rc = _gcry_dsa_normalize_hash (input, &hash, qbits); + if (rc) + return rc; + + point_init (&kG); + x1 = mpi_new (0); + rk = mpi_new (0); + tmp = mpi_new (0); + + for (;;) + { + /* rand k in [1, n-1] */ + if ((flags & PUBKEY_FLAG_RFC6979) && hashalgo) + { + /* Use Pornin's method for deterministic DSA. If this + flag is set, it is expected that HASH is an opaque + MPI with the to be signed hash. That hash is also + used as h1 from 3.2.a. */ + if (!mpi_is_opaque (input)) + { + rc = GPG_ERR_CONFLICT; + goto leave; + } + + abuf = mpi_get_opaque (input, &abits); + rc = _gcry_dsa_gen_rfc6979_k (&k, ec->n, ec->d, + abuf, (abits+7)/8, + hashalgo, extraloops); + if (rc) + goto leave; + extraloops++; + } + else + k = _gcry_dsa_gen_k (ec->n, GCRY_VERY_STRONG_RANDOM); + + _gcry_dsa_modify_k (k, ec->n, qbits); + + /* [k]G = (x1, y1) */ + _gcry_mpi_ec_mul_point (&kG, k, ec->G, ec); + if (_gcry_mpi_ec_get_affine (x1, NULL, &kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* r = (e + x1) % n */ + mpi_addm (r, hash, x1, ec->n); + + /* r != 0 && r + k != n */ + if (mpi_cmp_ui (r, 0) == 0) + continue; + mpi_add (rk, r, k); + if (mpi_cmp (rk, ec->n) == 0) + continue; + + /* s = ((d + 1)^-1 * (k - rd)) % n */ + mpi_addm (s, ec->d, GCRYMPI_CONST_ONE, ec->n); + mpi_invm (s, s, ec->n); + mpi_mulm (tmp, r, ec->d, ec->n); + mpi_subm (tmp, k, tmp, ec->n); + mpi_mulm (s, s, tmp, ec->n); + + /* s != 0 */ + if (mpi_cmp_ui (s, 0) == 0) + continue; + + break; /* Okay */ + } + + if (DBG_CIPHER) + { + log_mpidump ("sm2 sign result r ", r); + log_mpidump ("sm2 sign result s ", s); + } + +leave: + point_free (&kG); + mpi_free (k); + mpi_free (x1); + mpi_free (rk); + mpi_free (tmp); + + if (hash != input) + mpi_free (hash); + + return rc; +} + + +/* Verify an SM2 signature. + * Check if R and S verifies INPUT. + */ +gpg_err_code_t +_gcry_ecc_sm2_verify (gcry_mpi_t input, mpi_ec_t ec, + gcry_mpi_t r, gcry_mpi_t s) +{ + gpg_err_code_t err = 0; + gcry_mpi_t hash = NULL; + gcry_mpi_t t = NULL; + mpi_point_struct sG, tP; + gcry_mpi_t x1, y1; + unsigned int nbits; + + if (!_gcry_mpi_ec_curve_point (ec->Q, ec)) + return GPG_ERR_BROKEN_PUBKEY; + + /* r, s within [1, n-1] */ + if (mpi_cmp_ui (r, 1) < 0 || mpi_cmp (r, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + if (mpi_cmp_ui (s, 1) < 0 || mpi_cmp (s, ec->n) > 0) + return GPG_ERR_BAD_SIGNATURE; + + nbits = mpi_get_nbits (ec->n); + err = _gcry_dsa_normalize_hash (input, &hash, nbits); + if (err) + return err; + + point_init (&sG); + point_init (&tP); + x1 = mpi_new (0); + y1 = mpi_new (0); + t = mpi_new (0); + + /* t = (r + s) % n, t != 0 */ + mpi_addm (t, r, s, ec->n); + if (mpi_cmp_ui (t, 0) == 0) + { + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + + /* sG + tP = (x1, y1) */ + _gcry_mpi_ec_mul_point (&sG, s, ec->G, ec); + _gcry_mpi_ec_mul_point (&tP, t, ec->Q, ec); + _gcry_mpi_ec_add_points (&sG, &sG, &tP, ec); + if (_gcry_mpi_ec_get_affine (x1, y1, &sG, ec)) + { + err = GPG_ERR_INV_DATA; + goto leave; + } + + /* R = (e + x1) % n */ + mpi_addm (t, hash, x1, ec->n); + + /* check R == r */ + if (mpi_cmp (t, r)) + { + if (DBG_CIPHER) + { + log_mpidump (" R", t); + log_mpidump (" r", r); + log_mpidump (" s", s); + } + err = GPG_ERR_BAD_SIGNATURE; + goto leave; + } + if (DBG_CIPHER) + log_debug ("sm2 verify: Accepted\n"); + + leave: + point_free (&sG); + point_free (&tP); + mpi_free (x1); + mpi_free (y1); + mpi_free (t); + if (hash != input) + mpi_free (hash); + + return err; +} diff --git a/comm/third_party/libgcrypt/cipher/ecc.c b/comm/third_party/libgcrypt/cipher/ecc.c new file mode 100644 index 0000000000..5d8c7607ab --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/ecc.c @@ -0,0 +1,1779 @@ +/* ecc.c - Elliptic Curve Cryptography + * Copyright (C) 2007, 2008, 2010, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013, 2015 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* This code is originally based on the Patch 0.1.6 for the gnupg + 1.4.x branch as retrieved on 2007-03-21 from + http://www.calcurco.cat/eccGnuPG/src/gnupg-1.4.6-ecc0.2.0beta1.diff.bz2 + The original authors are: + Written by + Sergi Blanch i Torne , + Ramiro Moreno Chiral + Maintainers + Sergi Blanch i Torne + Ramiro Moreno Chiral + Mikael Mylnikov (mmr) + For use in Libgcrypt the code has been heavily modified and cleaned + up. In fact there is not much left of the originally code except for + some variable names and the text book implementaion of the sign and + verification algorithms. The arithmetic functions have entirely + been rewritten and moved to mpi/ec.c. + + ECDH encrypt and decrypt code written by Andrey Jivsov. +*/ + + +/* TODO: + + - In mpi/ec.c we use mpi_powm for x^2 mod p: Either implement a + special case in mpi_powm or check whether mpi_mulm is faster. + +*/ + + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "context.h" +#include "ec-context.h" +#include "pubkey-internal.h" +#include "ecc-common.h" + + +static const char *ecc_names[] = + { + "ecc", + "ecdsa", + "ecdh", + "eddsa", + "gost", + "sm2", + NULL, + }; + + +/* Sample NIST P-256 key from RFC 6979 A.2.5 */ +static const char sample_public_key_secp256[] = + "(public-key" + " (ecc" + " (curve secp256r1)" + " (q #04" + /**/ "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6" + /**/ "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))"; + +static const char sample_secret_key_secp256[] = + "(private-key" + " (ecc" + " (curve secp256r1)" + " (d #C9AFA9D845BA75166B5C215767B1D6934E50C3DB36E89B127B8A622B120F6721#)" + " (q #04" + /**/ "60FED4BA255A9D31C961EB74C6356D68C049B8923B61FA6CE669622E60F29FB6" + /**/ "7903FE1008B8BC99A41AE9E95628BC64F2F1B20C2D7E9F5177A3C294D4462299#)))"; + + +/* Registered progress function and its callback value. */ +static void (*progress_cb) (void *, const char*, int, int, int); +static void *progress_cb_data; + + + +/* Local prototypes. */ +static void test_keys (mpi_ec_t ec, unsigned int nbits); +static void test_ecdh_only_keys (mpi_ec_t ec, unsigned int nbits, int flags); +static unsigned int ecc_get_nbits (gcry_sexp_t parms); + + + + +void +_gcry_register_pk_ecc_progress (void (*cb) (void *, const char *, + int, int, int), + void *cb_data) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + +/* static void */ +/* progress (int c) */ +/* { */ +/* if (progress_cb) */ +/* progress_cb (progress_cb_data, "pk_ecc", c, 0, 0); */ +/* } */ + + + +/** + * nist_generate_key - Standard version of the ECC key generation. + * @ec: Elliptic curve computation context. + * @flags: Flags controlling aspects of the creation. + * @r_x: On success this receives an allocated MPI with the affine + * x-coordinate of the poblic key. On error NULL is stored. + * @r_y: Ditto for the y-coordinate. + * + * Return: An error code. + * + * The @flags bits used by this function are %PUBKEY_FLAG_TRANSIENT to + * use a faster RNG, and %PUBKEY_FLAG_NO_KEYTEST to skip the assertion + * that the key works as expected. + * + * FIXME: Check whether N is needed. + */ +static gpg_err_code_t +nist_generate_key (mpi_ec_t ec, int flags, + gcry_mpi_t *r_x, gcry_mpi_t *r_y) +{ + mpi_point_struct Q; + gcry_random_level_t random_level; + gcry_mpi_t x, y; + const unsigned int pbits = ec->nbits; + + point_init (&Q); + + if ((flags & PUBKEY_FLAG_TRANSIENT_KEY)) + random_level = GCRY_STRONG_RANDOM; + else + random_level = GCRY_VERY_STRONG_RANDOM; + + /* Generate a secret. */ + if (ec->dialect == ECC_DIALECT_ED25519 + || ec->dialect == ECC_DIALECT_SAFECURVE + || (flags & PUBKEY_FLAG_DJB_TWEAK)) + { + char *rndbuf; + int len = (pbits+7)/8; + + rndbuf = _gcry_random_bytes_secure (len, random_level); + if (ec->dialect == ECC_DIALECT_SAFECURVE) + ec->d = mpi_set_opaque (NULL, rndbuf, len*8); + else + { + ec->d = mpi_snew (pbits); + if ((pbits % 8)) + rndbuf[0] &= (1 << (pbits % 8)) - 1; + rndbuf[0] |= (1 << ((pbits + 7) % 8)); + rndbuf[len-1] &= (256 - ec->h); + _gcry_mpi_set_buffer (ec->d, rndbuf, len, 0); + xfree (rndbuf); + } + } + else + ec->d = _gcry_dsa_gen_k (ec->n, random_level); + + /* Compute Q. */ + _gcry_mpi_ec_mul_point (&Q, ec->d, ec->G, ec); + + x = mpi_new (pbits); + if (r_y == NULL) + y = NULL; + else + y = mpi_new (pbits); + if (_gcry_mpi_ec_get_affine (x, y, &Q, ec)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q"); + + /* We want the Q=(x,y) be a "compliant key" in terms of the + * http://tools.ietf.org/html/draft-jivsov-ecc-compact, which simply + * means that we choose either Q=(x,y) or -Q=(x,p-y) such that we + * end up with the min(y,p-y) as the y coordinate. Such a public + * key allows the most efficient compression: y can simply be + * dropped because we know that it's a minimum of the two + * possibilities without any loss of security. Note that we don't + * do that for Ed25519 so that we do not violate the special + * construction of the secret key. */ + if (r_y == NULL || ec->dialect == ECC_DIALECT_ED25519) + ec->Q = mpi_point_set (NULL, Q.x, Q.y, Q.z); + else + { + gcry_mpi_t negative; + + negative = mpi_new (pbits); + + if (ec->model == MPI_EC_WEIERSTRASS) + mpi_sub (negative, ec->p, y); /* negative = p - y */ + else + mpi_sub (negative, ec->p, x); /* negative = p - x */ + + if (mpi_cmp (negative, y) < 0) /* p - y < p */ + { + /* We need to end up with -Q; this assures that new Q's y is + the smallest one */ + if (ec->model == MPI_EC_WEIERSTRASS) + { + mpi_free (y); + y = negative; + } + else + { + mpi_free (x); + x = negative; + } + mpi_sub (ec->d, ec->n, ec->d); /* d = order - d */ + ec->Q = mpi_point_set (NULL, x, y, mpi_const (MPI_C_ONE)); + + if (DBG_CIPHER) + log_debug ("ecgen converted Q to a compliant point\n"); + } + else /* p - y >= p */ + { + /* No change is needed exactly 50% of the time: just copy. */ + mpi_free (negative); + ec->Q = mpi_point_set (NULL, Q.x, Q.y, Q.z); + if (DBG_CIPHER) + log_debug ("ecgen didn't need to convert Q to a compliant point\n"); + } + } + + *r_x = x; + if (r_y) + *r_y = y; + + point_free (&Q); + /* Now we can test our keys (this should never fail!). */ + if ((flags & PUBKEY_FLAG_NO_KEYTEST)) + ; /* User requested to skip the test. */ + else if (ec->model == MPI_EC_MONTGOMERY) + test_ecdh_only_keys (ec, ec->nbits - 63, flags); + else + test_keys (ec, ec->nbits - 64); + + return 0; +} + + +/* + * To verify correct skey it use a random information. + * First, encrypt and decrypt this dummy value, + * test if the information is recuperated. + * Second, test with the sign and verify functions. + */ +static void +test_keys (mpi_ec_t ec, unsigned int nbits) +{ + gcry_mpi_t test = mpi_new (nbits); + mpi_point_struct R_; + gcry_mpi_t c = mpi_new (nbits); + gcry_mpi_t out = mpi_new (nbits); + gcry_mpi_t r = mpi_new (nbits); + gcry_mpi_t s = mpi_new (nbits); + + if (DBG_CIPHER) + log_debug ("Testing key.\n"); + + point_init (&R_); + + _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM); + + if (_gcry_ecc_ecdsa_sign (test, ec, r, s, 0, 0) ) + log_fatal ("ECDSA operation: sign failed\n"); + + if (_gcry_ecc_ecdsa_verify (test, ec, r, s)) + { + log_fatal ("ECDSA operation: sign, verify failed\n"); + } + + if (DBG_CIPHER) + log_debug ("ECDSA operation: sign, verify ok.\n"); + + point_free (&R_); + mpi_free (s); + mpi_free (r); + mpi_free (out); + mpi_free (c); + mpi_free (test); +} + + +static void +test_ecdh_only_keys (mpi_ec_t ec, unsigned int nbits, int flags) +{ + gcry_mpi_t test; + mpi_point_struct R_; + gcry_mpi_t x0, x1; + + if (DBG_CIPHER) + log_debug ("Testing ECDH only key.\n"); + + point_init (&R_); + + if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK)) + { + char *rndbuf; + const unsigned int pbits = ec->nbits; + int len = (pbits+7)/8; + + rndbuf = _gcry_random_bytes (len, GCRY_WEAK_RANDOM); + if (ec->dialect == ECC_DIALECT_SAFECURVE) + test = mpi_set_opaque (NULL, rndbuf, len*8); + else + { + test = mpi_new (pbits); + if ((pbits % 8)) + rndbuf[0] &= (1 << (pbits % 8)) - 1; + rndbuf[0] |= (1 << ((pbits + 7) % 8)); + rndbuf[len-1] &= (256 - ec->h); + _gcry_mpi_set_buffer (test, rndbuf, len, 0); + xfree (rndbuf); + } + } + else + { + test = mpi_new (nbits); + _gcry_mpi_randomize (test, nbits, GCRY_WEAK_RANDOM); + } + + x0 = mpi_new (0); + x1 = mpi_new (0); + + /* R_ = hkQ <=> R_ = hkdG */ + _gcry_mpi_ec_mul_point (&R_, test, ec->Q, ec); + if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK)) + _gcry_mpi_ec_mul_point (&R_, _gcry_mpi_get_const (ec->h), &R_, ec); + if (_gcry_mpi_ec_get_affine (x0, NULL, &R_, ec)) + log_fatal ("ecdh: Failed to get affine coordinates for hkQ\n"); + + _gcry_mpi_ec_mul_point (&R_, test, ec->G, ec); + _gcry_mpi_ec_mul_point (&R_, ec->d, &R_, ec); + /* R_ = hdkG */ + if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK)) + _gcry_mpi_ec_mul_point (&R_, _gcry_mpi_get_const (ec->h), &R_, ec); + + if (_gcry_mpi_ec_get_affine (x1, NULL, &R_, ec)) + log_fatal ("ecdh: Failed to get affine coordinates for hdkG\n"); + + if (mpi_cmp (x0, x1)) + { + log_fatal ("ECDH test failed.\n"); + } + + mpi_free (x0); + mpi_free (x1); + + point_free (&R_); + mpi_free (test); +} + + +/* + * To check the validity of the value, recalculate the correspondence + * between the public value and the secret one. + */ +static int +check_secret_key (mpi_ec_t ec, int flags) +{ + int rc = 1; + mpi_point_struct Q; + gcry_mpi_t x1, y1; + gcry_mpi_t x2 = NULL; + gcry_mpi_t y2 = NULL; + + point_init (&Q); + x1 = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y1 = NULL; + else + y1 = mpi_new (0); + + /* G in E(F_p) */ + if (!_gcry_mpi_ec_curve_point (ec->G, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Point 'G' does not belong to curve 'E'!\n"); + goto leave; + } + + /* G != PaI */ + if (!mpi_cmp_ui (ec->G->z, 0)) + { + if (DBG_CIPHER) + log_debug ("Bad check: 'G' cannot be Point at Infinity!\n"); + goto leave; + } + + /* Check order of curve. */ + if (ec->dialect == ECC_DIALECT_STANDARD && !(flags & PUBKEY_FLAG_DJB_TWEAK)) + { + _gcry_mpi_ec_mul_point (&Q, ec->n, ec->G, ec); + if (mpi_cmp_ui (Q.z, 0)) + { + if (DBG_CIPHER) + log_debug ("check_secret_key: E is not a curve of order n\n"); + goto leave; + } + } + + /* Pubkey cannot be PaI */ + if (!mpi_cmp_ui (ec->Q->z, 0)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + goto leave; + } + + /* pubkey = [d]G over E */ + if (!_gcry_ecc_compute_public (&Q, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: computation of dG failed\n"); + goto leave; + } + if (_gcry_mpi_ec_get_affine (x1, y1, &Q, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + goto leave; + } + + if ((flags & PUBKEY_FLAG_EDDSA) + || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)) + ; /* Fixme: EdDSA is special. */ + else if (!mpi_cmp_ui (ec->Q->z, 1)) + { + /* Fast path if Q is already in affine coordinates. */ + if (mpi_cmp (x1, ec->Q->x) || (y1 && mpi_cmp (y1, ec->Q->y))) + { + if (DBG_CIPHER) + log_debug + ("Bad check: There is NO correspondence between 'd' and 'Q'!\n"); + goto leave; + } + } + else + { + x2 = mpi_new (0); + y2 = mpi_new (0); + if (_gcry_mpi_ec_get_affine (x2, y2, ec->Q, ec)) + { + if (DBG_CIPHER) + log_debug ("Bad check: Q can not be a Point at Infinity!\n"); + goto leave; + } + + if (mpi_cmp (x1, x2) || mpi_cmp (y1, y2)) + { + if (DBG_CIPHER) + log_debug + ("Bad check: There is NO correspondence between 'd' and 'Q'!\n"); + goto leave; + } + } + rc = 0; /* Okay. */ + + leave: + mpi_free (x2); + mpi_free (x1); + mpi_free (y1); + mpi_free (y2); + point_free (&Q); + return rc; +} + + + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gcry_err_code_t +ecc_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t rc; + gcry_mpi_t Gx = NULL; + gcry_mpi_t Gy = NULL; + gcry_mpi_t Qx = NULL; + gcry_mpi_t Qy = NULL; + mpi_ec_t ec = NULL; + gcry_sexp_t curve_info = NULL; + gcry_sexp_t curve_flags = NULL; + gcry_mpi_t base = NULL; + gcry_mpi_t public = NULL; + int flags = 0; + + rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecgen curve", genparms, NULL); + if (rc) + goto leave; + + if ((flags & PUBKEY_FLAG_EDDSA) + || (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE)) + rc = _gcry_ecc_eddsa_genkey (ec, flags); + else if (ec->model == MPI_EC_MONTGOMERY) + rc = nist_generate_key (ec, flags, &Qx, NULL); + else + rc = nist_generate_key (ec, flags, &Qx, &Qy); + if (rc) + goto leave; + + /* Copy data to the result. */ + Gx = mpi_new (0); + Gy = mpi_new (0); + if (ec->model != MPI_EC_MONTGOMERY) + { + if (_gcry_mpi_ec_get_affine (Gx, Gy, ec->G, ec)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "G"); + base = _gcry_ecc_ec2os (Gx, Gy, ec->p); + } + if (((ec->dialect == ECC_DIALECT_SAFECURVE && ec->model == MPI_EC_EDWARDS) + || ec->dialect == ECC_DIALECT_ED25519 || ec->model == MPI_EC_MONTGOMERY) + && !(flags & PUBKEY_FLAG_NOCOMP)) + { + unsigned char *encpk; + unsigned int encpklen; + + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_encodepoint (Qx, ec->nbits, + ec->dialect != ECC_DIALECT_SAFECURVE, + &encpk, &encpklen); + else + /* (Gx and Gy are used as scratch variables) */ + rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, Gx, Gy, + (ec->dialect != ECC_DIALECT_SAFECURVE + && !!(flags & PUBKEY_FLAG_COMP)), + &encpk, &encpklen); + if (rc) + goto leave; + public = mpi_new (0); + mpi_set_opaque (public, encpk, encpklen*8); + } + else + { + if (!Qx) + { + /* This is the case for a key from _gcry_ecc_eddsa_generate + with no compression. */ + Qx = mpi_new (0); + Qy = mpi_new (0); + if (_gcry_mpi_ec_get_affine (Qx, Qy, ec->Q, ec)) + log_fatal ("ecgen: Failed to get affine coordinates for %s\n", "Q"); + } + public = _gcry_ecc_ec2os (Qx, Qy, ec->p); + } + if (ec->name) + { + rc = sexp_build (&curve_info, NULL, "(curve %s)", ec->name); + if (rc) + goto leave; + } + + if ((flags & PUBKEY_FLAG_PARAM) || (flags & PUBKEY_FLAG_EDDSA) + || (flags & PUBKEY_FLAG_DJB_TWEAK)) + { + rc = sexp_build + (&curve_flags, NULL, + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_EDDSA))? + "(flags param eddsa)" : + ((flags & PUBKEY_FLAG_PARAM) && (flags & PUBKEY_FLAG_DJB_TWEAK))? + "(flags param djb-tweak)" : + ((flags & PUBKEY_FLAG_PARAM))? + "(flags param)" : ((flags & PUBKEY_FLAG_EDDSA))? + "(flags eddsa)" : "(flags djb-tweak)" ); + if (rc) + goto leave; + } + + if ((flags & PUBKEY_FLAG_PARAM) && ec->name) + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)))" + " (private-key" + " (ecc%S%S(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)(d%m)))" + " )", + curve_info, curve_flags, + ec->p, ec->a, ec->b, base, ec->n, ec->h, public, + curve_info, curve_flags, + ec->p, ec->a, ec->b, base, ec->n, ec->h, public, + ec->d); + else + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (ecc%S%S(q%m)))" + " (private-key" + " (ecc%S%S(q%m)(d%m)))" + " )", + curve_info, curve_flags, + public, + curve_info, curve_flags, + public, ec->d); + if (rc) + goto leave; + + if (DBG_CIPHER) + { + log_printmpi ("ecgen result p", ec->p); + log_printmpi ("ecgen result a", ec->a); + log_printmpi ("ecgen result b", ec->b); + log_printmpi ("ecgen result G", base); + log_printmpi ("ecgen result n", ec->n); + log_debug ("ecgen result h:+%02x\n", ec->h); + log_printmpi ("ecgen result Q", public); + log_printmpi ("ecgen result d", ec->d); + if ((flags & PUBKEY_FLAG_EDDSA)) + log_debug ("ecgen result using Ed25519+EdDSA\n"); + } + + leave: + mpi_free (public); + mpi_free (base); + mpi_free (Gx); + mpi_free (Gy); + mpi_free (Qx); + mpi_free (Qy); + _gcry_mpi_ec_free (ec); + sexp_release (curve_flags); + sexp_release (curve_info); + return rc; +} + + +static gcry_err_code_t +ecc_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + int flags = 0; + mpi_ec_t ec = NULL; + + /* + * Extract the key. + */ + rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_testkey", keyparms, NULL); + if (rc) + goto leave; + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q || !ec->d) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + if (check_secret_key (ec, flags)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_ec_free (ec); + if (DBG_CIPHER) + log_debug ("ecc_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +ecc_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + mpi_ec_t ec = NULL; + int flags = 0; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, 0); + + /* + * Extract the key. + */ + rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_sign", keyparms, NULL); + if (rc) + goto leave; + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->d) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + ctx.flags |= flags; + if (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE) + ctx.flags |= PUBKEY_FLAG_EDDSA; + /* Clear hash algo for EdDSA. */ + if ((ctx.flags & PUBKEY_FLAG_EDDSA)) + ctx.hash_algo = GCRY_MD_NONE; + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("ecc_sign data", data); + + /* Hash algo is determined by curve in EdDSA. Fill it if not specified. */ + if ((ctx.flags & PUBKEY_FLAG_EDDSA) && !ctx.hash_algo) + { + if (ec->dialect == ECC_DIALECT_ED25519) + ctx.hash_algo = GCRY_MD_SHA512; + else if (ec->dialect == ECC_DIALECT_SAFECURVE) + ctx.hash_algo = GCRY_MD_SHAKE256; + } + + sig_r = mpi_new (0); + sig_s = mpi_new (0); + if ((ctx.flags & PUBKEY_FLAG_EDDSA)) + { + /* EdDSA requires the public key. */ + rc = _gcry_ecc_eddsa_sign (data, ec, sig_r, sig_s, &ctx); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(eddsa(r%M)(s%M)))", sig_r, sig_s); + } + else if ((ctx.flags & PUBKEY_FLAG_GOST)) + { + rc = _gcry_ecc_gost_sign (data, ec, sig_r, sig_s); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(gost(r%M)(s%M)))", sig_r, sig_s); + } + else if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_sign (data, ec, sig_r, sig_s, + ctx.flags, ctx.hash_algo); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(sm2(r%M)(s%M)))", sig_r, sig_s); + } + else + { + rc = _gcry_ecc_ecdsa_sign (data, ec, sig_r, sig_s, + ctx.flags, ctx.hash_algo); + if (!rc) + rc = sexp_build (r_sig, NULL, + "(sig-val(ecdsa(r%M)(s%M)))", sig_r, sig_s); + } + + leave: + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + _gcry_mpi_release (data); + _gcry_mpi_ec_free (ec); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +ecc_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + gcry_mpi_t data = NULL; + int sigflags; + mpi_ec_t ec = NULL; + int flags = 0; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + ecc_get_nbits (s_keyparms)); + + /* + * Extract the key. + */ + rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_verify", + s_keyparms, NULL); + if (rc) + goto leave; + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + if (ec->model == MPI_EC_MONTGOMERY) + { + if (DBG_CIPHER) + log_debug ("ecc_verify: Can't use a Montgomery curve\n"); + rc = GPG_ERR_INTERNAL; + goto leave; + } + + ctx.flags |= flags; + if (ec->model == MPI_EC_EDWARDS && ec->dialect == ECC_DIALECT_SAFECURVE) + ctx.flags |= PUBKEY_FLAG_EDDSA; + /* Clear hash algo for EdDSA. */ + if ((ctx.flags & PUBKEY_FLAG_EDDSA)) + ctx.hash_algo = GCRY_MD_NONE; + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("ecc_verify data", data); + + /* Hash algo is determined by curve in EdDSA. Fill it if not specified. */ + if ((ctx.flags & PUBKEY_FLAG_EDDSA) && !ctx.hash_algo) + { + if (ec->dialect == ECC_DIALECT_ED25519) + ctx.hash_algo = GCRY_MD_SHA512; + else if (ec->dialect == ECC_DIALECT_SAFECURVE) + ctx.hash_algo = GCRY_MD_SHAKE256; + } + + /* + * Extract the signature value. + */ + rc = _gcry_pk_util_preparse_sigval (s_sig, ecc_names, &l1, &sigflags); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, (sigflags & PUBKEY_FLAG_EDDSA)? "/rs":"rs", + &sig_r, &sig_s, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("ecc_verify s_r", sig_r); + log_mpidump ("ecc_verify s_s", sig_s); + } + if ((ctx.flags & PUBKEY_FLAG_EDDSA) ^ (sigflags & PUBKEY_FLAG_EDDSA)) + { + rc = GPG_ERR_CONFLICT; /* Inconsistent use of flag/algoname. */ + goto leave; + } + + /* + * Verify the signature. + */ + if ((sigflags & PUBKEY_FLAG_EDDSA)) + { + rc = _gcry_ecc_eddsa_verify (data, ec, sig_r, sig_s, &ctx); + } + else if ((sigflags & PUBKEY_FLAG_GOST)) + { + rc = _gcry_ecc_gost_verify (data, ec, sig_r, sig_s); + } + else if ((sigflags & PUBKEY_FLAG_SM2)) + { + rc = _gcry_ecc_sm2_verify (data, ec, sig_r, sig_s); + } + else + { + rc = _gcry_ecc_ecdsa_verify (data, ec, sig_r, sig_s); + } + + leave: + _gcry_mpi_release (data); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + _gcry_mpi_ec_free (ec); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + +/* ecdh raw is classic 2-round DH protocol published in 1976. + * + * Overview of ecc_encrypt_raw and ecc_decrypt_raw. + * + * As with any PK operation, encrypt version uses a public key and + * decrypt -- private. + * + * Symbols used below: + * G - field generator point + * d - private long-term scalar + * dG - public long-term key + * k - ephemeral scalar + * kG - ephemeral public key + * dkG - shared secret + * + * ecc_encrypt_raw description: + * input: + * data[0] : private scalar (k) + * output: A new S-expression with the parameters: + * s : shared point (kdG) + * e : generated ephemeral public key (kG) + * + * ecc_decrypt_raw description: + * input: + * data[0] : a point kG (ephemeral public key) + * output: + * result[0] : shared point (kdG) + */ +static gcry_err_code_t +ecc_encrypt_raw (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + unsigned int nbits; + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t mpi_s = NULL; + gcry_mpi_t mpi_e = NULL; + gcry_mpi_t data = NULL; + mpi_ec_t ec = NULL; + int flags = 0; + int no_error_on_infinity; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, + (nbits = ecc_get_nbits (keyparms))); + + /* + * Extract the key. + */ + rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_encrypt", keyparms, NULL); + if (rc) + goto leave; + + if (ec->dialect == ECC_DIALECT_SAFECURVE) + { + ctx.flags |= PUBKEY_FLAG_RAW_FLAG; + no_error_on_infinity = 1; + } + else if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + no_error_on_infinity = 1; + else + no_error_on_infinity = 0; + + /* + * Extract the data. + */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + + /* + * Tweak the scalar bits by cofactor and number of bits of the field. + * It assumes the cofactor is a power of 2. + */ + if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + { + int i; + + for (i = 0; (ec->h & (1 << i)) == 0; i++) + mpi_clear_bit (data, i); + mpi_set_highbit (data, ec->nbits - 1); + } + if (DBG_CIPHER) + log_mpidump ("ecc_encrypt data", data); + + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->Q) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All encryption will be done, return it. */ + rc = _gcry_ecc_sm2_encrypt (r_ciph, data, ec); + goto leave; + } + + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so */ + { + mpi_point_struct R; /* Result that we return. */ + gcry_mpi_t x, y; + unsigned char *rawmpi; + unsigned int rawmpilen; + + rc = 0; + x = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); + + point_init (&R); + + /* R = kQ <=> R = kdG */ + _gcry_mpi_ec_mul_point (&R, data, ec->Q, ec); + + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) + { + /* + * Here, X is 0. In the X25519 computation on Curve25519, X0 + * function maps infinity to zero. So, when PUBKEY_FLAG_DJB_TWEAK + * is enabled, return the result of 0 not raising an error. + * + * This is a corner case. It never occurs with properly + * generated public keys, but it might happen with blindly + * imported public key which might not follow the key + * generation procedure. + */ + if (!no_error_on_infinity) + { /* It's not for X25519, then, the input data was simply wrong. */ + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + } + if (y) + mpi_s = _gcry_ecc_ec2os (x, y, ec->p); + else + { + rc = _gcry_ecc_mont_encodepoint (x, nbits, + ec->dialect != ECC_DIALECT_SAFECURVE, + &rawmpi, &rawmpilen); + if (rc) + goto leave_main; + mpi_s = mpi_new (0); + mpi_set_opaque (mpi_s, rawmpi, rawmpilen*8); + } + + /* R = kG */ + _gcry_mpi_ec_mul_point (&R, data, ec->G, ec); + + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave_main; + } + if (y) + mpi_e = _gcry_ecc_ec2os (x, y, ec->p); + else + { + rc = _gcry_ecc_mont_encodepoint (x, nbits, + ec->dialect != ECC_DIALECT_SAFECURVE, + &rawmpi, &rawmpilen); + if (!rc) + { + mpi_e = mpi_new (0); + mpi_set_opaque (mpi_e, rawmpi, rawmpilen*8); + } + } + + leave_main: + mpi_free (x); + mpi_free (y); + point_free (&R); + if (rc) + goto leave; + } + + if (!rc) + rc = sexp_build (r_ciph, NULL, "(enc-val(ecdh(s%m)(e%m)))", mpi_s, mpi_e); + + leave: + _gcry_mpi_release (data); + _gcry_mpi_release (mpi_s); + _gcry_mpi_release (mpi_e); + _gcry_mpi_ec_free (ec); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_encrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +/* input: + * data[0] : a point kG (ephemeral public key) + * output: + * resaddr[0] : shared point kdG + * + * see ecc_encrypt_raw for details. + */ +static gcry_err_code_t +ecc_decrypt_raw (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + unsigned int nbits; + gpg_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t data_e = NULL; + mpi_ec_t ec = NULL; + mpi_point_struct kG; + mpi_point_struct R; + gcry_mpi_t r = NULL; + int flags = 0; + int enable_specific_point_validation; + + point_init (&kG); + point_init (&R); + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, + (nbits = ecc_get_nbits (keyparms))); + + /* + * Extract the key. + */ + rc = _gcry_mpi_ec_internal_new (&ec, &flags, "ecc_decrypt", keyparms, NULL); + if (rc) + goto leave; + + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n || !ec->d) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + + /* + * Extract the data. + */ + rc = _gcry_pk_util_preparse_encval (s_data, ecc_names, &l1, &ctx); + if (rc) + goto leave; + if ((ctx.flags & PUBKEY_FLAG_SM2)) + { + /* All decryption will be done, return it. */ + rc = _gcry_ecc_sm2_decrypt (r_plain, l1, ec); + goto leave; + } + else + { + rc = sexp_extract_param (l1, NULL, "/e", &data_e, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt d_e", data_e); + } + + if (ec->dialect == ECC_DIALECT_SAFECURVE || (flags & PUBKEY_FLAG_DJB_TWEAK)) + enable_specific_point_validation = 1; + else + enable_specific_point_validation = 0; + + /* + * Compute the plaintext. + */ + if (ec->model == MPI_EC_MONTGOMERY) + rc = _gcry_ecc_mont_decodepoint (data_e, ec, &kG); + else + rc = _gcry_ecc_sec_decodepoint (data_e, ec, &kG); + if (rc) + goto leave; + + if (DBG_CIPHER) + log_printpnt ("ecc_decrypt kG", &kG, NULL); + + if (enable_specific_point_validation) + { + /* For X25519, by its definition, validation should not be done. */ + /* (Instead, we do output check.) + * + * However, to mitigate secret key leak from our implementation, + * we also do input validation here. For constant-time + * implementation, we can remove this input validation. + */ + if (_gcry_mpi_ec_bad_point (&kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + } + else if (!_gcry_mpi_ec_curve_point (&kG, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* R = dkG */ + _gcry_mpi_ec_mul_point (&R, ec->d, &kG, ec); + + /* The following is false: assert( mpi_cmp_ui( R.x, 1 )==0 );, so: */ + { + gcry_mpi_t x, y; + + x = mpi_new (0); + if (ec->model == MPI_EC_MONTGOMERY) + y = NULL; + else + y = mpi_new (0); + + if (_gcry_mpi_ec_get_affine (x, y, &R, ec)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + /* + * Note for X25519. + * + * By the definition of X25519, this is the case where X25519 + * returns 0, mapping infinity to zero. However, we + * deliberately let it return an error. + * + * For X25519 ECDH, comming here means that it might be + * decrypted by anyone with the shared secret of 0 (the result + * of this function could be always 0 by other scalar values, + * other than the private key of D). + * + * So, it looks like an encrypted message but it can be + * decrypted by anyone, or at least something wrong + * happens. Recipient should not proceed as if it were + * properly encrypted message. + * + * This handling is needed for our major usage of GnuPG, + * where it does the One-Pass Diffie-Hellman method, + * C(1, 1, ECC CDH), with an ephemeral key. + */ + } + + if (y) + r = _gcry_ecc_ec2os (x, y, ec->p); + else + { + + unsigned char *rawmpi; + unsigned int rawmpilen; + + rc = _gcry_ecc_mont_encodepoint (x, nbits, + ec->dialect != ECC_DIALECT_SAFECURVE, + &rawmpi, &rawmpilen); + if (rc) + goto leave; + + r = mpi_new (0); + mpi_set_opaque (r, rawmpi, rawmpilen*8); + } + if (!r) + rc = gpg_err_code_from_syserror (); + else + rc = 0; + mpi_free (x); + mpi_free (y); + } + if (DBG_CIPHER) + log_printmpi ("ecc_decrypt res", r); + + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %m)", r); + + leave: + point_free (&R); + point_free (&kG); + _gcry_mpi_release (r); + _gcry_mpi_release (data_e); + sexp_release (l1); + _gcry_mpi_ec_free (ec); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("ecc_decrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (ecc + * (curve ) + * (p ) + * (a ) + * (b ) + * (g ) + * (n ) + * (q )) + * + * More parameters may be given. Either P or CURVE is needed. + */ +static unsigned int +ecc_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t p; + unsigned int nbits = 0; + char *curve; + + l1 = sexp_find_token (parms, "p", 1); + if (!l1) + { /* Parameter P not found - check whether we have "curve". */ + l1 = sexp_find_token (parms, "curve", 5); + if (!l1) + return 0; /* Neither P nor CURVE found. */ + + curve = sexp_nth_string (l1, 1); + sexp_release (l1); + if (!curve) + return 0; /* No curve name given (or out of core). */ + + if (_gcry_ecc_fill_in_curve (0, curve, NULL, &nbits)) + nbits = 0; + xfree (curve); + } + else + { + p = sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + if (p) + { + nbits = mpi_get_nbits (p); + _gcry_mpi_release (p); + } + } + return nbits; +} + + +/* See rsa.c for a description of this function. */ +static gpg_err_code_t +compute_keygrip (gcry_md_hd_t md, gcry_sexp_t keyparms) +{ +#define N_COMPONENTS 6 + static const char names[N_COMPONENTS] = "pabgnq"; + gpg_err_code_t rc; + gcry_sexp_t l1; + gcry_mpi_t values[N_COMPONENTS]; + int idx; + char *curvename = NULL; + int flags = 0; + enum gcry_mpi_ec_models model = 0; + enum ecc_dialects dialect = 0; + const unsigned char *raw; + unsigned int n; + + /* Clear the values first. */ + for (idx=0; idx < N_COMPONENTS; idx++) + values[idx] = NULL; + + + /* Look for flags. */ + l1 = sexp_find_token (keyparms, "flags", 0); + if (l1) + { + rc = _gcry_pk_util_parse_flaglist (l1, &flags, NULL); + if (rc) + goto leave; + } + + /* Extract the parameters. */ + if ((flags & PUBKEY_FLAG_PARAM)) + rc = sexp_extract_param (keyparms, NULL, "p?a?b?g?n?/q", + &values[0], &values[1], &values[2], + &values[3], &values[4], &values[5], + NULL); + else + rc = sexp_extract_param (keyparms, NULL, "/q", &values[5], NULL); + if (rc) + goto leave; + + /* Check whether a curve parameter is available and use that to fill + in missing values. */ + sexp_release (l1); + l1 = sexp_find_token (keyparms, "curve", 5); + if (l1) + { + curvename = sexp_nth_string (l1, 1); + if (curvename) + { + rc = _gcry_ecc_update_curve_param (curvename, + &model, &dialect, + &values[0], &values[1], &values[2], + &values[3], &values[4]); + if (rc) + goto leave; + } + } + + /* Guess required fields if a curve parameter has not been given. + FIXME: This is a crude hacks. We need to fix that. */ + if (!curvename) + { + model = ((flags & PUBKEY_FLAG_EDDSA) + ? MPI_EC_EDWARDS + : MPI_EC_WEIERSTRASS); + dialect = ((flags & PUBKEY_FLAG_EDDSA) + ? ECC_DIALECT_ED25519 + : ECC_DIALECT_STANDARD); + } + + /* Check that all parameters are known and normalize all MPIs (that + should not be required but we use an internal function later and + thus we better make 100% sure that they are normalized). */ + for (idx = 0; idx < N_COMPONENTS; idx++) + if (!values[idx]) + { + rc = GPG_ERR_NO_OBJ; + goto leave; + } + else + _gcry_mpi_normalize (values[idx]); + + /* Uncompress the public key with the exception of EdDSA where + compression is the default and we thus compute the keygrip using + the compressed version. Because we don't support any non-eddsa + compression, the only thing we need to do is to compress + EdDSA. */ + if ((flags & PUBKEY_FLAG_EDDSA) && dialect == ECC_DIALECT_ED25519) + { + const unsigned int pbits = mpi_get_nbits (values[0]); + + rc = _gcry_ecc_eddsa_ensure_compact (values[5], pbits); + if (rc) + goto leave; + } + else if ((flags & PUBKEY_FLAG_DJB_TWEAK)) + { + /* Remove the prefix 0x40 for keygrip computation. */ + raw = mpi_get_opaque (values[5], &n); + if (raw) + { + n = (n + 7)/8; + + if (n > 1 && (n%2) && raw[0] == 0x40) + if (!_gcry_mpi_set_opaque_copy (values[5], raw + 1, (n - 1)*8)) + rc = gpg_err_code_from_syserror (); + } + else + { + rc = GPG_ERR_INV_OBJ; + goto leave; + } + } + + /* Hash them all. */ + for (idx = 0; idx < N_COMPONENTS; idx++) + { + char buf[30]; + + if (mpi_is_opaque (values[idx])) + { + raw = mpi_get_opaque (values[idx], &n); + n = (n + 7)/8; + snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], n); + _gcry_md_write (md, buf, strlen (buf)); + _gcry_md_write (md, raw, n); + _gcry_md_write (md, ")", 1); + } + else + { + unsigned char *rawmpi; + unsigned int rawmpilen; + + rawmpi = _gcry_mpi_get_buffer (values[idx], 0, &rawmpilen, NULL); + if (!rawmpi) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + snprintf (buf, sizeof buf, "(1:%c%u:", names[idx], rawmpilen); + _gcry_md_write (md, buf, strlen (buf)); + _gcry_md_write (md, rawmpi, rawmpilen); + _gcry_md_write (md, ")", 1); + xfree (rawmpi); + } + } + + leave: + xfree (curvename); + sexp_release (l1); + for (idx = 0; idx < N_COMPONENTS; idx++) + _gcry_mpi_release (values[idx]); + + return rc; +#undef N_COMPONENTS +} + + + +/* + Low-level API helper functions. + */ + +/* This is the worker function for gcry_pubkey_get_sexp for ECC + algorithms. Note that the caller has already stored NULL at + R_SEXP. */ +gpg_err_code_t +_gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode, mpi_ec_t ec) +{ + gpg_err_code_t rc; + gcry_mpi_t mpi_G = NULL; + gcry_mpi_t mpi_Q = NULL; + + if (!ec->p || !ec->a || !ec->b || !ec->G || !ec->n) + return GPG_ERR_BAD_CRYPT_CTX; + + if (mode == GCRY_PK_GET_SECKEY && !ec->d) + return GPG_ERR_NO_SECKEY; + + /* Compute the public point if it is missing. */ + if (!ec->Q && ec->d) + ec->Q = _gcry_ecc_compute_public (NULL, ec); + + /* Encode G and Q. */ + mpi_G = _gcry_mpi_ec_ec2os (ec->G, ec); + if (!mpi_G) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + if (!ec->Q) + { + rc = GPG_ERR_BAD_CRYPT_CTX; + goto leave; + } + + if (ec->dialect == ECC_DIALECT_ED25519) + { + unsigned char *encpk; + unsigned int encpklen; + + rc = _gcry_ecc_eddsa_encodepoint (ec->Q, ec, NULL, NULL, 0, + &encpk, &encpklen); + if (rc) + goto leave; + mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8); + encpk = NULL; + } + else if (ec->model == MPI_EC_MONTGOMERY) + { + unsigned char *encpk; + unsigned int encpklen; + + rc = _gcry_ecc_mont_encodepoint (ec->Q->x, ec->nbits, + ec->dialect != ECC_DIALECT_SAFECURVE, + &encpk, &encpklen); + if (rc) + goto leave; + mpi_Q = mpi_set_opaque (NULL, encpk, encpklen*8); + } + else + { + mpi_Q = _gcry_mpi_ec_ec2os (ec->Q, ec); + } + if (!mpi_Q) + { + rc = GPG_ERR_BROKEN_PUBKEY; + goto leave; + } + + /* Fixme: We should return a curve name instead of the parameters if + if know that they match a curve. */ + + if (ec->d && (!mode || mode == GCRY_PK_GET_SECKEY)) + { + /* Let's return a private key. */ + rc = sexp_build (r_sexp, NULL, + "(private-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)(d%m)))", + ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q, ec->d); + } + else if (ec->Q) + { + /* Let's return a public key. */ + rc = sexp_build (r_sexp, NULL, + "(public-key(ecc(p%m)(a%m)(b%m)(g%m)(n%m)(h%u)(q%m)))", + ec->p, ec->a, ec->b, mpi_G, ec->n, ec->h, mpi_Q); + } + else + rc = GPG_ERR_BAD_CRYPT_CTX; + + leave: + mpi_free (mpi_Q); + mpi_free (mpi_G); + return rc; +} + + + +/* + Self-test section. + */ + +static const char * +selftest_sign (gcry_sexp_t pkey, gcry_sexp_t skey) +{ + /* Sample data from RFC 6979 section A.2.5, hash is of message "sample" */ + static const char sample_data[] = + "(data (flags rfc6979)" + " (hash sha256 #af2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915" + /**/ "62113d8a62add1bf#))"; + static const char sample_data_bad[] = + "(data (flags rfc6979)" + " (hash sha256 #bf2bdbe1aa9b6ec1e2ade1d694f41fc71a831d0268e98915" + /**/ "62113d8a62add1bf#))"; + static const char signature_r[] = + "efd48b2aacb6a8fd1140dd9cd45e81d69d2c877b56aaf991c34d0ea84eaf3716"; + static const char signature_s[] = + "f7cb1c942d657c41d436c7a1b6e29f65f3e900dbb9aff4064dc4ab2f843acda8"; + + const char *errtxt = NULL; + gcry_error_t err; + gcry_sexp_t data = NULL; + gcry_sexp_t data_bad = NULL; + gcry_sexp_t sig = NULL; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + gcry_mpi_t r = NULL; + gcry_mpi_t s = NULL; + gcry_mpi_t calculated_r = NULL; + gcry_mpi_t calculated_s = NULL; + int cmp; + + err = sexp_sscan (&data, NULL, sample_data, strlen (sample_data)); + if (!err) + err = sexp_sscan (&data_bad, NULL, + sample_data_bad, strlen (sample_data_bad)); + if (!err) + err = _gcry_mpi_scan (&r, GCRYMPI_FMT_HEX, signature_r, 0, NULL); + if (!err) + err = _gcry_mpi_scan (&s, GCRYMPI_FMT_HEX, signature_s, 0, NULL); + + if (err) + { + errtxt = "converting data failed"; + goto leave; + } + + err = _gcry_pk_sign (&sig, data, skey); + if (err) + { + errtxt = "signing failed"; + goto leave; + } + + /* check against known signature */ + errtxt = "signature validity failed"; + l1 = _gcry_sexp_find_token (sig, "sig-val", 0); + if (!l1) + goto leave; + l2 = _gcry_sexp_find_token (l1, "ecdsa", 0); + if (!l2) + goto leave; + + sexp_release (l1); + l1 = l2; + + l2 = _gcry_sexp_find_token (l1, "r", 0); + if (!l2) + goto leave; + calculated_r = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_r) + goto leave; + + sexp_release (l2); + l2 = _gcry_sexp_find_token (l1, "s", 0); + if (!l2) + goto leave; + calculated_s = _gcry_sexp_nth_mpi (l2, 1, GCRYMPI_FMT_USG); + if (!calculated_s) + goto leave; + + errtxt = "known sig check failed"; + + cmp = _gcry_mpi_cmp (r, calculated_r); + if (cmp) + goto leave; + cmp = _gcry_mpi_cmp (s, calculated_s); + if (cmp) + goto leave; + + errtxt = NULL; + + /* verify generated signature */ + err = _gcry_pk_verify (sig, data, pkey); + if (err) + { + errtxt = "verify failed"; + goto leave; + } + err = _gcry_pk_verify (sig, data_bad, pkey); + if (gcry_err_code (err) != GPG_ERR_BAD_SIGNATURE) + { + errtxt = "bad signature not detected"; + goto leave; + } + + + leave: + sexp_release (sig); + sexp_release (data_bad); + sexp_release (data); + sexp_release (l1); + sexp_release (l2); + mpi_release (r); + mpi_release (s); + mpi_release (calculated_r); + mpi_release (calculated_s); + return errtxt; +} + + +static gpg_err_code_t +selftests_ecdsa (selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + gcry_error_t err; + gcry_sexp_t skey = NULL; + gcry_sexp_t pkey = NULL; + + what = "convert"; + err = sexp_sscan (&skey, NULL, sample_secret_key_secp256, + strlen (sample_secret_key_secp256)); + if (!err) + err = sexp_sscan (&pkey, NULL, sample_public_key_secp256, + strlen (sample_public_key_secp256)); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "key consistency"; + err = ecc_check_secret_key(skey); + if (err) + { + errtxt = _gcry_strerror (err); + goto failed; + } + + what = "sign"; + errtxt = selftest_sign (pkey, skey); + if (errtxt) + goto failed; + + sexp_release(pkey); + sexp_release(skey); + return 0; /* Succeeded. */ + + failed: + sexp_release(pkey); + sexp_release(skey); + if (report) + report ("pubkey", GCRY_PK_ECC, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + (void)extended; + + if (algo != GCRY_PK_ECC) + return GPG_ERR_PUBKEY_ALGO; + + return selftests_ecdsa (report); +} + + + + +gcry_pk_spec_t _gcry_pubkey_spec_ecc = + { + GCRY_PK_ECC, { 0, 1 }, + (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), + "ECC", ecc_names, + "pabgnhq", "pabgnhqd", "se", "rs", "pabgnhq", + ecc_generate, + ecc_check_secret_key, + ecc_encrypt_raw, + ecc_decrypt_raw, + ecc_sign, + ecc_verify, + ecc_get_nbits, + run_selftests, + compute_keygrip, + _gcry_ecc_get_curve, + _gcry_ecc_get_param_sexp + }; diff --git a/comm/third_party/libgcrypt/cipher/elgamal.c b/comm/third_party/libgcrypt/cipher/elgamal.c new file mode 100644 index 0000000000..4eb52d620b --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/elgamal.c @@ -0,0 +1,1149 @@ +/* Elgamal.c - Elgamal Public Key encryption + * Copyright (C) 1998, 2000, 2001, 2002, 2003, + * 2008 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + * + * For a description of the algorithm, see: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. Pages 476 ff. + */ + +#include +#include +#include +#include +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* Blinding is used to mitigate side-channel attacks. You may undef + this to speed up the operation in case the system is secured + against physical and network mounted side-channel attacks. */ +#define USE_BLINDING 1 + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ +} ELG_public_key; + + +typedef struct +{ + gcry_mpi_t p; /* prime */ + gcry_mpi_t g; /* group generator */ + gcry_mpi_t y; /* g^x mod p */ + gcry_mpi_t x; /* secret exponent */ +} ELG_secret_key; + + +static const char *elg_names[] = + { + "elg", + "openpgp-elg", + "openpgp-elg-sig", + NULL, + }; + + +static int test_keys (ELG_secret_key *sk, unsigned int nbits, int nodie); +static gcry_mpi_t gen_k (gcry_mpi_t p, int small_k); +static gcry_err_code_t generate (ELG_secret_key *sk, unsigned nbits, + gcry_mpi_t **factors); +static int check_secret_key (ELG_secret_key *sk); +static void do_encrypt (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, + ELG_public_key *pkey); +static void decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, + ELG_secret_key *skey); +static void sign (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, + ELG_secret_key *skey); +static int verify (gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, + ELG_public_key *pkey); +static unsigned int elg_get_nbits (gcry_sexp_t parms); + + +static void (*progress_cb) (void *, const char *, int, int, int); +static void *progress_cb_data; + +void +_gcry_register_pk_elg_progress (void (*cb) (void *, const char *, + int, int, int), + void *cb_data) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + + +static void +progress (int c) +{ + if (progress_cb) + progress_cb (progress_cb_data, "pk_elg", c, 0, 0); +} + + +/**************** + * Michael Wiener's table on subgroup sizes to match field sizes. + * (floating around somewhere, probably based on the paper from + * Eurocrypt 96, page 332) + */ +static unsigned int +wiener_map( unsigned int n ) +{ + static struct { unsigned int p_n, q_n; } t[] = + { /* p q attack cost */ + { 512, 119 }, /* 9 x 10^17 */ + { 768, 145 }, /* 6 x 10^21 */ + { 1024, 165 }, /* 7 x 10^24 */ + { 1280, 183 }, /* 3 x 10^27 */ + { 1536, 198 }, /* 7 x 10^29 */ + { 1792, 212 }, /* 9 x 10^31 */ + { 2048, 225 }, /* 8 x 10^33 */ + { 2304, 237 }, /* 5 x 10^35 */ + { 2560, 249 }, /* 3 x 10^37 */ + { 2816, 259 }, /* 1 x 10^39 */ + { 3072, 269 }, /* 3 x 10^40 */ + { 3328, 279 }, /* 8 x 10^41 */ + { 3584, 288 }, /* 2 x 10^43 */ + { 3840, 296 }, /* 4 x 10^44 */ + { 4096, 305 }, /* 7 x 10^45 */ + { 4352, 313 }, /* 1 x 10^47 */ + { 4608, 320 }, /* 2 x 10^48 */ + { 4864, 328 }, /* 2 x 10^49 */ + { 5120, 335 }, /* 3 x 10^50 */ + { 0, 0 } + }; + int i; + + for(i=0; t[i].p_n; i++ ) + { + if( n <= t[i].p_n ) + return t[i].q_n; + } + /* Not in table - use an arbitrary high number. */ + return n / 8 + 200; +} + +static int +test_keys ( ELG_secret_key *sk, unsigned int nbits, int nodie ) +{ + ELG_public_key pk; + gcry_mpi_t test = mpi_new ( 0 ); + gcry_mpi_t out1_a = mpi_new ( nbits ); + gcry_mpi_t out1_b = mpi_new ( nbits ); + gcry_mpi_t out2 = mpi_new ( nbits ); + int failed = 0; + + pk.p = sk->p; + pk.g = sk->g; + pk.y = sk->y; + + _gcry_mpi_randomize ( test, nbits, GCRY_WEAK_RANDOM ); + + do_encrypt ( out1_a, out1_b, test, &pk ); + decrypt ( out2, out1_a, out1_b, sk ); + if ( mpi_cmp( test, out2 ) ) + failed |= 1; + + sign ( out1_a, out1_b, test, sk ); + if ( !verify( out1_a, out1_b, test, &pk ) ) + failed |= 2; + + _gcry_mpi_release ( test ); + _gcry_mpi_release ( out1_a ); + _gcry_mpi_release ( out1_b ); + _gcry_mpi_release ( out2 ); + + if (failed && !nodie) + log_fatal ("Elgamal test key for %s %s failed\n", + (failed & 1)? "encrypt+decrypt":"", + (failed & 2)? "sign+verify":""); + if (failed && DBG_CIPHER) + log_debug ("Elgamal test key for %s %s failed\n", + (failed & 1)? "encrypt+decrypt":"", + (failed & 2)? "sign+verify":""); + + return failed; +} + + +/**************** + * Generate a random secret exponent k from prime p, so that k is + * relatively prime to p-1. With SMALL_K set, k will be selected for + * better encryption performance - this must never be used signing! + */ +static gcry_mpi_t +gen_k( gcry_mpi_t p, int small_k ) +{ + gcry_mpi_t k = mpi_alloc_secure( 0 ); + gcry_mpi_t temp = mpi_alloc( mpi_get_nlimbs(p) ); + gcry_mpi_t p_1 = mpi_copy(p); + unsigned int orig_nbits = mpi_get_nbits(p); + unsigned int nbits, nbytes; + char *rndbuf = NULL; + + if (small_k) + { + /* Using a k much lesser than p is sufficient for encryption and + * it greatly improves the encryption performance. We use + * Wiener's table and add a large safety margin. */ + nbits = wiener_map( orig_nbits ) * 3 / 2; + if( nbits >= orig_nbits ) + BUG(); + } + else + nbits = orig_nbits; + + + nbytes = (nbits+7)/8; + if( DBG_CIPHER ) + log_debug("choosing a random k\n"); + mpi_sub_ui( p_1, p, 1); + for(;;) + { + if( !rndbuf || nbits < 32 ) + { + xfree(rndbuf); + rndbuf = _gcry_random_bytes_secure( nbytes, GCRY_STRONG_RANDOM ); + } + else + { + /* Change only some of the higher bits. We could improve + this by directly requesting more memory at the first call + to get_random_bytes() and use this the here maybe it is + easier to do this directly in random.c Anyway, it is + highly inlikely that we will ever reach this code. */ + char *pp = _gcry_random_bytes_secure( 4, GCRY_STRONG_RANDOM ); + memcpy( rndbuf, pp, 4 ); + xfree(pp); + } + _gcry_mpi_set_buffer( k, rndbuf, nbytes, 0 ); + + for(;;) + { + if( !(mpi_cmp( k, p_1 ) < 0) ) /* check: k < (p-1) */ + { + if( DBG_CIPHER ) + progress('+'); + break; /* no */ + } + if( !(mpi_cmp_ui( k, 0 ) > 0) ) /* check: k > 0 */ + { + if( DBG_CIPHER ) + progress('-'); + break; /* no */ + } + if (mpi_gcd( temp, k, p_1 )) + goto found; /* okay, k is relative prime to (p-1) */ + mpi_add_ui( k, k, 1 ); + if( DBG_CIPHER ) + progress('.'); + } + } + found: + xfree (rndbuf); + if( DBG_CIPHER ) + progress('\n'); + mpi_free(p_1); + mpi_free(temp); + + return k; +} + +/**************** + * Generate a key pair with a key of size NBITS + * Returns: 2 structures filled with all needed values + * and an array with n-1 factors of (p-1) + */ +static gcry_err_code_t +generate ( ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t **ret_factors ) +{ + gcry_err_code_t rc; + gcry_mpi_t p; /* the prime */ + gcry_mpi_t p_min1; + gcry_mpi_t g; + gcry_mpi_t x; /* the secret exponent */ + gcry_mpi_t y; + unsigned int qbits; + unsigned int xbits; + byte *rndbuf; + + p_min1 = mpi_new ( nbits ); + qbits = wiener_map( nbits ); + if( qbits & 1 ) /* better have a even one */ + qbits++; + g = mpi_alloc(1); + rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors); + if (rc) + { + mpi_free (p_min1); + mpi_free (g); + return rc; + } + mpi_sub_ui(p_min1, p, 1); + + + /* Select a random number which has these properties: + * 0 < x < p-1 + * This must be a very good random number because this is the + * secret part. The prime is public and may be shared anyway, + * so a random generator level of 1 is used for the prime. + * + * I don't see a reason to have a x of about the same size + * as the p. It should be sufficient to have one about the size + * of q or the later used k plus a large safety margin. Decryption + * will be much faster with such an x. + */ + xbits = qbits * 3 / 2; + if( xbits >= nbits ) + BUG(); + x = mpi_snew ( xbits ); + if( DBG_CIPHER ) + log_debug("choosing a random x of size %u\n", xbits ); + rndbuf = NULL; + do + { + if( DBG_CIPHER ) + progress('.'); + if( rndbuf ) + { /* Change only some of the higher bits */ + if( xbits < 16 ) /* should never happen ... */ + { + xfree(rndbuf); + rndbuf = _gcry_random_bytes_secure ((xbits+7)/8, + GCRY_VERY_STRONG_RANDOM); + } + else + { + char *r = _gcry_random_bytes_secure (2, GCRY_VERY_STRONG_RANDOM); + memcpy(rndbuf, r, 2 ); + xfree (r); + } + } + else + { + rndbuf = _gcry_random_bytes_secure ((xbits+7)/8, + GCRY_VERY_STRONG_RANDOM ); + } + _gcry_mpi_set_buffer( x, rndbuf, (xbits+7)/8, 0 ); + mpi_clear_highbit( x, xbits+1 ); + } + while( !( mpi_cmp_ui( x, 0 )>0 && mpi_cmp( x, p_min1 )<0 ) ); + xfree(rndbuf); + + y = mpi_new (nbits); + mpi_powm( y, g, x, p ); + + if( DBG_CIPHER ) + { + progress ('\n'); + log_mpidump ("elg p", p ); + log_mpidump ("elg g", g ); + log_mpidump ("elg y", y ); + log_mpidump ("elg x", x ); + } + + /* Copy the stuff to the key structures */ + sk->p = p; + sk->g = g; + sk->y = y; + sk->x = x; + + _gcry_mpi_release ( p_min1 ); + + /* Now we can test our keys (this should never fail!) */ + test_keys ( sk, nbits - 64, 0 ); + + return 0; +} + + +/* Generate a key pair with a key of size NBITS not using a random + value for the secret key but the one given as X. This is useful to + implement a passphrase based decryption for a public key based + encryption. It has appliactions in backup systems. + + Returns: A structure filled with all needed values and an array + with n-1 factors of (p-1). */ +static gcry_err_code_t +generate_using_x (ELG_secret_key *sk, unsigned int nbits, gcry_mpi_t x, + gcry_mpi_t **ret_factors ) +{ + gcry_err_code_t rc; + gcry_mpi_t p; /* The prime. */ + gcry_mpi_t p_min1; /* The prime minus 1. */ + gcry_mpi_t g; /* The generator. */ + gcry_mpi_t y; /* g^x mod p. */ + unsigned int qbits; + unsigned int xbits; + + sk->p = NULL; + sk->g = NULL; + sk->y = NULL; + sk->x = NULL; + + /* Do a quick check to see whether X is suitable. */ + xbits = mpi_get_nbits (x); + if ( xbits < 64 || xbits >= nbits ) + return GPG_ERR_INV_VALUE; + + p_min1 = mpi_new ( nbits ); + qbits = wiener_map ( nbits ); + if ( (qbits & 1) ) /* Better have an even one. */ + qbits++; + g = mpi_alloc (1); + rc = _gcry_generate_elg_prime (0, nbits, qbits, g, &p, ret_factors ); + if (rc) + { + mpi_free (p_min1); + mpi_free (g); + return rc; + } + mpi_sub_ui (p_min1, p, 1); + + if (DBG_CIPHER) + log_debug ("using a supplied x of size %u", xbits ); + if ( !(mpi_cmp_ui ( x, 0 ) > 0 && mpi_cmp ( x, p_min1 ) <0 ) ) + { + _gcry_mpi_release ( p_min1 ); + _gcry_mpi_release ( p ); + _gcry_mpi_release ( g ); + return GPG_ERR_INV_VALUE; + } + + y = mpi_new (nbits); + mpi_powm ( y, g, x, p ); + + if ( DBG_CIPHER ) + { + progress ('\n'); + log_mpidump ("elg p", p ); + log_mpidump ("elg g", g ); + log_mpidump ("elg y", y ); + log_mpidump ("elg x", x ); + } + + /* Copy the stuff to the key structures */ + sk->p = p; + sk->g = g; + sk->y = y; + sk->x = mpi_copy (x); + + _gcry_mpi_release ( p_min1 ); + + /* Now we can test our keys. */ + if ( test_keys ( sk, nbits - 64, 1 ) ) + { + _gcry_mpi_release ( sk->p ); sk->p = NULL; + _gcry_mpi_release ( sk->g ); sk->g = NULL; + _gcry_mpi_release ( sk->y ); sk->y = NULL; + _gcry_mpi_release ( sk->x ); sk->x = NULL; + return GPG_ERR_BAD_SECKEY; + } + + return 0; +} + + +/**************** + * Test whether the secret key is valid. + * Returns: if this is a valid key. + */ +static int +check_secret_key( ELG_secret_key *sk ) +{ + int rc; + gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs(sk->y) ); + + mpi_powm (y, sk->g, sk->x, sk->p); + rc = !mpi_cmp( y, sk->y ); + mpi_free( y ); + return rc; +} + + +static void +do_encrypt(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey ) +{ + gcry_mpi_t k; + + /* Note: maybe we should change the interface, so that it + * is possible to check that input is < p and return an + * error code. + */ + + k = gen_k( pkey->p, 1 ); + mpi_powm (a, pkey->g, k, pkey->p); + + /* b = (y^k * input) mod p + * = ((y^k mod p) * (input mod p)) mod p + * and because input is < p + * = ((y^k mod p) * input) mod p + */ + mpi_powm (b, pkey->y, k, pkey->p); + mpi_mulm (b, b, input, pkey->p); +#if 0 + if( DBG_CIPHER ) + { + log_mpidump("elg encrypted y", pkey->y); + log_mpidump("elg encrypted p", pkey->p); + log_mpidump("elg encrypted k", k); + log_mpidump("elg encrypted M", input); + log_mpidump("elg encrypted a", a); + log_mpidump("elg encrypted b", b); + } +#endif + mpi_free(k); +} + + + + +static void +decrypt (gcry_mpi_t output, gcry_mpi_t a, gcry_mpi_t b, ELG_secret_key *skey ) +{ + gcry_mpi_t t1, t2, r; + unsigned int nbits = mpi_get_nbits (skey->p); + + mpi_normalize (a); + mpi_normalize (b); + + t1 = mpi_snew (nbits); + +#ifdef USE_BLINDING + + t2 = mpi_snew (nbits); + r = mpi_new (nbits); + + /* We need a random number of about the prime size. The random + number merely needs to be unpredictable; thus we use level 0. */ + _gcry_mpi_randomize (r, nbits, GCRY_WEAK_RANDOM); + + /* t1 = r^x mod p */ + mpi_powm (t1, r, skey->x, skey->p); + /* t2 = (a * r)^-x mod p */ + mpi_mulm (t2, a, r, skey->p); + mpi_powm (t2, t2, skey->x, skey->p); + mpi_invm (t2, t2, skey->p); + /* t1 = (t1 * t2) mod p*/ + mpi_mulm (t1, t1, t2, skey->p); + + mpi_free (r); + mpi_free (t2); + +#else /*!USE_BLINDING*/ + + /* output = b/(a^x) mod p */ + mpi_powm (t1, a, skey->x, skey->p); + mpi_invm (t1, t1, skey->p); + +#endif /*!USE_BLINDING*/ + + mpi_mulm (output, b, t1, skey->p); + +#if 0 + if( DBG_CIPHER ) + { + log_mpidump ("elg decrypted x", skey->x); + log_mpidump ("elg decrypted p", skey->p); + log_mpidump ("elg decrypted a", a); + log_mpidump ("elg decrypted b", b); + log_mpidump ("elg decrypted M", output); + } +#endif + mpi_free (t1); +} + + +/**************** + * Make an Elgamal signature out of INPUT + */ + +static void +sign(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_secret_key *skey ) +{ + gcry_mpi_t k; + gcry_mpi_t t = mpi_alloc( mpi_get_nlimbs(a) ); + gcry_mpi_t inv = mpi_alloc( mpi_get_nlimbs(a) ); + gcry_mpi_t p_1 = mpi_copy(skey->p); + + /* + * b = (t * inv) mod (p-1) + * b = (t * inv(k,(p-1),(p-1)) mod (p-1) + * b = (((M-x*a) mod (p-1)) * inv(k,(p-1),(p-1))) mod (p-1) + * + */ + mpi_sub_ui(p_1, p_1, 1); + k = gen_k( skey->p, 0 /* no small K ! */ ); + mpi_powm( a, skey->g, k, skey->p ); + mpi_mul(t, skey->x, a ); + mpi_subm(t, input, t, p_1 ); + mpi_invm(inv, k, p_1 ); + mpi_mulm(b, t, inv, p_1 ); + +#if 0 + if( DBG_CIPHER ) + { + log_mpidump ("elg sign p", skey->p); + log_mpidump ("elg sign g", skey->g); + log_mpidump ("elg sign y", skey->y); + log_mpidump ("elg sign x", skey->x); + log_mpidump ("elg sign k", k); + log_mpidump ("elg sign M", input); + log_mpidump ("elg sign a", a); + log_mpidump ("elg sign b", b); + } +#endif + mpi_free(k); + mpi_free(t); + mpi_free(inv); + mpi_free(p_1); +} + + +/**************** + * Returns true if the signature composed of A and B is valid. + */ +static int +verify(gcry_mpi_t a, gcry_mpi_t b, gcry_mpi_t input, ELG_public_key *pkey ) +{ + int rc; + gcry_mpi_t t1; + gcry_mpi_t t2; + gcry_mpi_t base[4]; + gcry_mpi_t ex[4]; + + if( !(mpi_cmp_ui( a, 0 ) > 0 && mpi_cmp( a, pkey->p ) < 0) ) + return 0; /* assertion 0 < a < p failed */ + + t1 = mpi_alloc( mpi_get_nlimbs(a) ); + t2 = mpi_alloc( mpi_get_nlimbs(a) ); + +#if 0 + /* t1 = (y^a mod p) * (a^b mod p) mod p */ + gcry_mpi_powm( t1, pkey->y, a, pkey->p ); + gcry_mpi_powm( t2, a, b, pkey->p ); + mpi_mulm( t1, t1, t2, pkey->p ); + + /* t2 = g ^ input mod p */ + gcry_mpi_powm( t2, pkey->g, input, pkey->p ); + + rc = !mpi_cmp( t1, t2 ); +#elif 0 + /* t1 = (y^a mod p) * (a^b mod p) mod p */ + base[0] = pkey->y; ex[0] = a; + base[1] = a; ex[1] = b; + base[2] = NULL; ex[2] = NULL; + mpi_mulpowm( t1, base, ex, pkey->p ); + + /* t2 = g ^ input mod p */ + gcry_mpi_powm( t2, pkey->g, input, pkey->p ); + + rc = !mpi_cmp( t1, t2 ); +#else + /* t1 = g ^ - input * y ^ a * a ^ b mod p */ + mpi_invm(t2, pkey->g, pkey->p ); + base[0] = t2 ; ex[0] = input; + base[1] = pkey->y; ex[1] = a; + base[2] = a; ex[2] = b; + base[3] = NULL; ex[3] = NULL; + mpi_mulpowm( t1, base, ex, pkey->p ); + rc = !mpi_cmp_ui( t1, 1 ); + +#endif + + mpi_free(t1); + mpi_free(t2); + return rc; +} + +/********************************************* + ************** interface ****************** + *********************************************/ + +static gpg_err_code_t +elg_generate (const gcry_sexp_t genparms, gcry_sexp_t *r_skey) +{ + gpg_err_code_t rc; + unsigned int nbits; + ELG_secret_key sk; + gcry_mpi_t xvalue = NULL; + gcry_sexp_t l1; + gcry_mpi_t *factors = NULL; + gcry_sexp_t misc_info = NULL; + + memset (&sk, 0, sizeof sk); + + rc = _gcry_pk_util_get_nbits (genparms, &nbits); + if (rc) + return rc; + + /* Parse the optional xvalue element. */ + l1 = sexp_find_token (genparms, "xvalue", 0); + if (l1) + { + xvalue = sexp_nth_mpi (l1, 1, 0); + sexp_release (l1); + if (!xvalue) + return GPG_ERR_BAD_MPI; + } + + if (xvalue) + { + rc = generate_using_x (&sk, nbits, xvalue, &factors); + mpi_free (xvalue); + } + else + { + rc = generate (&sk, nbits, &factors); + } + if (rc) + goto leave; + + if (factors && factors[0]) + { + int nfac; + void **arg_list; + char *buffer, *p; + + for (nfac = 0; factors[nfac]; nfac++) + ; + arg_list = xtrycalloc (nfac+1, sizeof *arg_list); + if (!arg_list) + { + rc = gpg_err_code_from_syserror (); + goto leave; + } + buffer = xtrymalloc (30 + nfac*2 + 2 + 1); + if (!buffer) + { + rc = gpg_err_code_from_syserror (); + xfree (arg_list); + goto leave; + } + p = stpcpy (buffer, "(misc-key-info(pm1-factors"); + for(nfac = 0; factors[nfac]; nfac++) + { + p = stpcpy (p, "%m"); + arg_list[nfac] = factors + nfac; + } + p = stpcpy (p, "))"); + rc = sexp_build_array (&misc_info, NULL, buffer, arg_list); + xfree (arg_list); + xfree (buffer); + if (rc) + goto leave; + } + + rc = sexp_build (r_skey, NULL, + "(key-data" + " (public-key" + " (elg(p%m)(g%m)(y%m)))" + " (private-key" + " (elg(p%m)(g%m)(y%m)(x%m)))" + " %S)", + sk.p, sk.g, sk.y, + sk.p, sk.g, sk.y, sk.x, + misc_info); + + leave: + mpi_free (sk.p); + mpi_free (sk.g); + mpi_free (sk.y); + mpi_free (sk.x); + sexp_release (misc_info); + if (factors) + { + gcry_mpi_t *mp; + for (mp = factors; *mp; mp++) + mpi_free (*mp); + xfree (factors); + } + + return rc; +} + + +static gcry_err_code_t +elg_check_secret_key (gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + ELG_secret_key sk = {NULL, NULL, NULL, NULL}; + + rc = sexp_extract_param (keyparms, NULL, "pgyx", + &sk.p, &sk.g, &sk.y, &sk.x, + NULL); + if (rc) + goto leave; + + if (!check_secret_key (&sk)) + rc = GPG_ERR_BAD_SECKEY; + + leave: + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + if (DBG_CIPHER) + log_debug ("elg_testkey => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_encrypt (gcry_sexp_t *r_ciph, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t mpi_a = NULL; + gcry_mpi_t mpi_b = NULL; + gcry_mpi_t data = NULL; + ELG_public_key pk = { NULL, NULL, NULL }; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_ENCRYPT, + elg_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("elg_encrypt data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "pgy", + &pk.p, &pk.g, &pk.y, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_encrypt p", pk.p); + log_mpidump ("elg_encrypt g", pk.g); + log_mpidump ("elg_encrypt y", pk.y); + } + + /* Do Elgamal computation and build result. */ + mpi_a = mpi_new (0); + mpi_b = mpi_new (0); + do_encrypt (mpi_a, mpi_b, data, &pk); + rc = sexp_build (r_ciph, NULL, "(enc-val(elg(a%m)(b%m)))", mpi_a, mpi_b); + + leave: + _gcry_mpi_release (mpi_a); + _gcry_mpi_release (mpi_b); + _gcry_mpi_release (pk.p); + _gcry_mpi_release (pk.g); + _gcry_mpi_release (pk.y); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_encrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_decrypt (gcry_sexp_t *r_plain, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gpg_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t data_a = NULL; + gcry_mpi_t data_b = NULL; + ELG_secret_key sk = {NULL, NULL, NULL, NULL}; + gcry_mpi_t plain = NULL; + unsigned char *unpad = NULL; + size_t unpadlen = 0; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_DECRYPT, + elg_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_preparse_encval (s_data, elg_names, &l1, &ctx); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "ab", &data_a, &data_b, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("elg_decrypt d_a", data_a); + log_printmpi ("elg_decrypt d_b", data_b); + } + if (mpi_is_opaque (data_a) || mpi_is_opaque (data_b)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "pgyx", + &sk.p, &sk.g, &sk.y, &sk.x, + NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_printmpi ("elg_decrypt p", sk.p); + log_printmpi ("elg_decrypt g", sk.g); + log_printmpi ("elg_decrypt y", sk.y); + if (!fips_mode ()) + log_printmpi ("elg_decrypt x", sk.x); + } + + plain = mpi_snew (ctx.nbits); + decrypt (plain, data_a, data_b, &sk); + if (DBG_CIPHER) + log_printmpi ("elg_decrypt res", plain); + + /* Reverse the encoding and build the s-expression. */ + switch (ctx.encoding) + { + case PUBKEY_ENC_PKCS1: + rc = _gcry_rsa_pkcs1_decode_for_enc (&unpad, &unpadlen, ctx.nbits, plain); + mpi_free (plain); plain = NULL; + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad); + break; + + case PUBKEY_ENC_OAEP: + rc = _gcry_rsa_oaep_decode (&unpad, &unpadlen, + ctx.nbits, ctx.hash_algo, plain, + ctx.label, ctx.labellen); + mpi_free (plain); plain = NULL; + if (!rc) + rc = sexp_build (r_plain, NULL, "(value %b)", (int)unpadlen, unpad); + break; + + default: + /* Raw format. For backward compatibility we need to assume a + signed mpi by using the sexp format string "%m". */ + rc = sexp_build (r_plain, NULL, + (ctx.flags & PUBKEY_FLAG_LEGACYRESULT) + ? "%m" : "(value %m)", + plain); + break; + } + + + leave: + xfree (unpad); + _gcry_mpi_release (plain); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + _gcry_mpi_release (data_a); + _gcry_mpi_release (data_b); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_decrypt => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_sign (gcry_sexp_t *r_sig, gcry_sexp_t s_data, gcry_sexp_t keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_mpi_t data = NULL; + ELG_secret_key sk = {NULL, NULL, NULL, NULL}; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_SIGN, + elg_get_nbits (keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("elg_sign data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the key. */ + rc = sexp_extract_param (keyparms, NULL, "pgyx", + &sk.p, &sk.g, &sk.y, &sk.x, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_sign p", sk.p); + log_mpidump ("elg_sign g", sk.g); + log_mpidump ("elg_sign y", sk.y); + if (!fips_mode ()) + log_mpidump ("elg_sign x", sk.x); + } + + sig_r = mpi_new (0); + sig_s = mpi_new (0); + sign (sig_r, sig_s, data, &sk); + if (DBG_CIPHER) + { + log_mpidump ("elg_sign sig_r", sig_r); + log_mpidump ("elg_sign sig_s", sig_s); + } + rc = sexp_build (r_sig, NULL, "(sig-val(elg(r%M)(s%M)))", sig_r, sig_s); + + leave: + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + _gcry_mpi_release (sk.p); + _gcry_mpi_release (sk.g); + _gcry_mpi_release (sk.y); + _gcry_mpi_release (sk.x); + _gcry_mpi_release (data); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_sign => %s\n", gpg_strerror (rc)); + return rc; +} + + +static gcry_err_code_t +elg_verify (gcry_sexp_t s_sig, gcry_sexp_t s_data, gcry_sexp_t s_keyparms) +{ + gcry_err_code_t rc; + struct pk_encoding_ctx ctx; + gcry_sexp_t l1 = NULL; + gcry_mpi_t sig_r = NULL; + gcry_mpi_t sig_s = NULL; + gcry_mpi_t data = NULL; + ELG_public_key pk = { NULL, NULL, NULL }; + + _gcry_pk_util_init_encoding_ctx (&ctx, PUBKEY_OP_VERIFY, + elg_get_nbits (s_keyparms)); + + /* Extract the data. */ + rc = _gcry_pk_util_data_to_mpi (s_data, &data, &ctx); + if (rc) + goto leave; + if (DBG_CIPHER) + log_mpidump ("elg_verify data", data); + if (mpi_is_opaque (data)) + { + rc = GPG_ERR_INV_DATA; + goto leave; + } + + /* Extract the signature value. */ + rc = _gcry_pk_util_preparse_sigval (s_sig, elg_names, &l1, NULL); + if (rc) + goto leave; + rc = sexp_extract_param (l1, NULL, "rs", &sig_r, &sig_s, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_verify s_r", sig_r); + log_mpidump ("elg_verify s_s", sig_s); + } + + /* Extract the key. */ + rc = sexp_extract_param (s_keyparms, NULL, "pgy", + &pk.p, &pk.g, &pk.y, NULL); + if (rc) + goto leave; + if (DBG_CIPHER) + { + log_mpidump ("elg_verify p", pk.p); + log_mpidump ("elg_verify g", pk.g); + log_mpidump ("elg_verify y", pk.y); + } + + /* Verify the signature. */ + if (!verify (sig_r, sig_s, data, &pk)) + rc = GPG_ERR_BAD_SIGNATURE; + + leave: + _gcry_mpi_release (pk.p); + _gcry_mpi_release (pk.g); + _gcry_mpi_release (pk.y); + _gcry_mpi_release (data); + _gcry_mpi_release (sig_r); + _gcry_mpi_release (sig_s); + sexp_release (l1); + _gcry_pk_util_free_encoding_ctx (&ctx); + if (DBG_CIPHER) + log_debug ("elg_verify => %s\n", rc?gpg_strerror (rc):"Good"); + return rc; +} + + +/* Return the number of bits for the key described by PARMS. On error + * 0 is returned. The format of PARMS starts with the algorithm name; + * for example: + * + * (dsa + * (p ) + * (g ) + * (y )) + * + * More parameters may be given but we only need P here. + */ +static unsigned int +elg_get_nbits (gcry_sexp_t parms) +{ + gcry_sexp_t l1; + gcry_mpi_t p; + unsigned int nbits; + + l1 = sexp_find_token (parms, "p", 1); + if (!l1) + return 0; /* Parameter P not found. */ + + p= sexp_nth_mpi (l1, 1, GCRYMPI_FMT_USG); + sexp_release (l1); + nbits = p? mpi_get_nbits (p) : 0; + _gcry_mpi_release (p); + return nbits; +} + + + +gcry_pk_spec_t _gcry_pubkey_spec_elg = + { + GCRY_PK_ELG, { 0, 0 }, + (GCRY_PK_USAGE_SIGN | GCRY_PK_USAGE_ENCR), + "ELG", elg_names, + "pgy", "pgyx", "ab", "rs", "pgy", + elg_generate, + elg_check_secret_key, + elg_encrypt, + elg_decrypt, + elg_sign, + elg_verify, + elg_get_nbits, + }; diff --git a/comm/third_party/libgcrypt/cipher/gost-s-box.c b/comm/third_party/libgcrypt/cipher/gost-s-box.c new file mode 100644 index 0000000000..5d5ed7dc44 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/gost-s-box.c @@ -0,0 +1,266 @@ +/* gost-s-box.c - GOST 28147-89 S-Box expander + * Copyright (C) 2013 Dmitry Eremin-Solenikov + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include + +#define DIM(v) (sizeof(v)/sizeof((v)[0])) + +struct gost_sbox +{ + const char *name; + const char *oid; + unsigned int keymeshing; + unsigned char sbox[16*8]; +} gost_sboxes[] = { + { "test_3411", "1.2.643.2.2.30.0", 0, + { + 0x4, 0xE, 0x5, 0x7, 0x6, 0x4, 0xD, 0x1, + 0xA, 0xB, 0x8, 0xD, 0xC, 0xB, 0xB, 0xF, + 0x9, 0x4, 0x1, 0xA, 0x7, 0xA, 0x4, 0xD, + 0x2, 0xC, 0xD, 0x1, 0x1, 0x0, 0x1, 0x0, + + 0xD, 0x6, 0xA, 0x0, 0x5, 0x7, 0x3, 0x5, + 0x8, 0xD, 0x3, 0x8, 0xF, 0x2, 0xF, 0x7, + 0x0, 0xF, 0x4, 0x9, 0xD, 0x1, 0x5, 0xA, + 0xE, 0xA, 0x2, 0xF, 0x8, 0xD, 0x9, 0x4, + + 0x6, 0x2, 0xE, 0xE, 0x4, 0x3, 0x0, 0x9, + 0xB, 0x3, 0xF, 0x4, 0xA, 0x6, 0xA, 0x2, + 0x1, 0x8, 0xC, 0x6, 0x9, 0x8, 0xE, 0x3, + 0xC, 0x1, 0x7, 0xC, 0xE, 0x5, 0x7, 0xE, + + 0x7, 0x0, 0x6, 0xB, 0x0, 0x9, 0x6, 0x6, + 0xF, 0x7, 0x0, 0x2, 0x3, 0xC, 0x8, 0xB, + 0x5, 0x5, 0x9, 0x5, 0xB, 0xF, 0x2, 0x8, + 0x3, 0x9, 0xB, 0x3, 0x2, 0xE, 0xC, 0xC, + } + }, + { "CryptoPro_3411", "1.2.643.2.2.30.1", 0, + { + 0xA, 0x5, 0x7, 0x4, 0x7, 0x7, 0xD, 0x1, + 0x4, 0xF, 0xF, 0xA, 0x6, 0x6, 0xE, 0x3, + 0x5, 0x4, 0xC, 0x7, 0x4, 0x2, 0x4, 0xA, + 0x6, 0x0, 0xE, 0xC, 0xB, 0x4, 0x1, 0x9, + + 0x8, 0x2, 0x9, 0x0, 0x9, 0xD, 0x7, 0x5, + 0x1, 0xD, 0x4, 0xF, 0xC, 0x9, 0x0, 0xB, + 0x3, 0xB, 0x1, 0x2, 0x2, 0xF, 0x5, 0x4, + 0x7, 0x9, 0x0, 0x8, 0xA, 0x0, 0xA, 0xF, + + 0xD, 0x1, 0x3, 0xE, 0x1, 0xA, 0x3, 0x8, + 0xC, 0x7, 0xB, 0x1, 0x8, 0x1, 0xC, 0x6, + 0xE, 0x6, 0x5, 0x6, 0x0, 0x5, 0x8, 0x7, + 0x0, 0x3, 0x2, 0x5, 0xE, 0xB, 0xF, 0xE, + + 0x9, 0xC, 0x6, 0xD, 0xF, 0x8, 0x6, 0xD, + 0x2, 0xE, 0xA, 0xB, 0xD, 0xE, 0x2, 0x0, + 0xB, 0xA, 0x8, 0x9, 0x3, 0xC, 0x9, 0x2, + 0xF, 0x8, 0xD, 0x3, 0x5, 0x3, 0xB, 0xC, + } + }, + { "Test_89", "1.2.643.2.2.31.0", 0, + { + 0x4, 0xC, 0xD, 0xE, 0x3, 0x8, 0x9, 0xC, + 0x2, 0x9, 0x8, 0x9, 0xE, 0xF, 0xB, 0x6, + 0xF, 0xF, 0xE, 0xB, 0x5, 0x6, 0xC, 0x5, + 0x5, 0xE, 0xC, 0x2, 0x9, 0xB, 0x0, 0x2, + + 0x9, 0x8, 0x7, 0x5, 0x6, 0x1, 0x3, 0xB, + 0x1, 0x1, 0x3, 0xF, 0x8, 0x9, 0x6, 0x0, + 0x0, 0x3, 0x9, 0x7, 0x0, 0xC, 0x7, 0x9, + 0x8, 0xA, 0xA, 0x1, 0xD, 0x5, 0x5, 0xD, + + 0xE, 0x2, 0x1, 0x0, 0xA, 0xD, 0x4, 0x3, + 0x3, 0x7, 0x5, 0xD, 0xB, 0x3, 0x8, 0xE, + 0xB, 0x4, 0x2, 0xC, 0x7, 0x7, 0xE, 0x7, + 0xC, 0xD, 0x4, 0x6, 0xC, 0xA, 0xF, 0xA, + + 0xD, 0x6, 0x6, 0xA, 0x2, 0x0, 0x1, 0xF, + 0x7, 0x0, 0xF, 0x4, 0x1, 0xE, 0xA, 0x4, + 0xA, 0xB, 0x0, 0x3, 0xF, 0x2, 0x2, 0x1, + 0x6, 0x5, 0xB, 0x8, 0x4, 0x4, 0xD, 0x8, + } + }, + { "CryptoPro_A", "1.2.643.2.2.31.1", 1, + { + 0x9, 0x3, 0xE, 0xE, 0xB, 0x3, 0x1, 0xB, + 0x6, 0x7, 0x4, 0x7, 0x5, 0xA, 0xD, 0xA, + 0x3, 0xE, 0x6, 0xA, 0x1, 0xD, 0x2, 0xF, + 0x2, 0x9, 0x2, 0xC, 0x9, 0xC, 0x9, 0x5, + + 0x8, 0x8, 0xB, 0xD, 0x8, 0x1, 0x7, 0x0, + 0xB, 0xA, 0x3, 0x1, 0xD, 0x2, 0xA, 0xC, + 0x1, 0xF, 0xD, 0x3, 0xF, 0x0, 0x6, 0xE, + 0x7, 0x0, 0x8, 0x9, 0x0, 0xB, 0x0, 0x8, + + 0xA, 0x5, 0xC, 0x0, 0xE, 0x7, 0x8, 0x6, + 0x4, 0x2, 0xF, 0x2, 0x4, 0x5, 0xC, 0x2, + 0xE, 0x6, 0x5, 0xB, 0x2, 0x9, 0x4, 0x3, + 0xF, 0xC, 0xA, 0x4, 0x3, 0x4, 0x5, 0x9, + + 0xC, 0xB, 0x0, 0xF, 0xC, 0x8, 0xF, 0x1, + 0x0, 0x4, 0x7, 0x8, 0x7, 0xF, 0x3, 0x7, + 0xD, 0xD, 0x1, 0x5, 0xA, 0xE, 0xB, 0xD, + 0x5, 0x1, 0x9, 0x6, 0x6, 0x6, 0xE, 0x4, + } + }, + { "CryptoPro_B", "1.2.643.2.2.31.2", 1, + { + 0x8, 0x0, 0xE, 0x7, 0x2, 0x8, 0x5, 0x0, + 0x4, 0x1, 0xC, 0x5, 0x7, 0x3, 0x2, 0x4, + 0xB, 0x2, 0x0, 0x0, 0xC, 0x2, 0xA, 0xB, + 0x1, 0xA, 0xA, 0xD, 0xF, 0x6, 0xB, 0xE, + + 0x3, 0x4, 0x9, 0xB, 0x9, 0x4, 0x9, 0x8, + 0x5, 0xD, 0x2, 0x6, 0x5, 0xD, 0x1, 0x3, + 0x0, 0x5, 0xD, 0x1, 0xA, 0xE, 0xC, 0x7, + 0x9, 0xC, 0xB, 0x2, 0xB, 0xB, 0x3, 0x1, + + 0x2, 0x9, 0x7, 0x3, 0x1, 0xC, 0x7, 0xA, + 0xE, 0x7, 0x5, 0xA, 0x4, 0x1, 0x4, 0x2, + 0xA, 0x3, 0x8, 0xC, 0x0, 0x7, 0xD, 0x9, + 0xC, 0xF, 0xF, 0xF, 0xD, 0xF, 0x0, 0x6, + + 0xD, 0xB, 0x3, 0x4, 0x6, 0xA, 0x6, 0xF, + 0x6, 0x8, 0x6, 0xE, 0x8, 0x0, 0xF, 0xD, + 0x7, 0x6, 0x1, 0x9, 0xE, 0x9, 0x8, 0x5, + 0xF, 0xE, 0x4, 0x8, 0x3, 0x5, 0xE, 0xC, + } + }, + { "CryptoPro_C", "1.2.643.2.2.31.3", 1, + { + 0x1, 0x0, 0x8, 0x3, 0x8, 0xC, 0xA, 0x7, + 0xB, 0x1, 0x2, 0x6, 0xD, 0x9, 0x9, 0x4, + 0xC, 0x7, 0x5, 0x0, 0xB, 0xB, 0x6, 0x0, + 0x2, 0xD, 0x0, 0x1, 0x0, 0x1, 0x8, 0x5, + + 0x9, 0xB, 0x4, 0x5, 0x4, 0x8, 0xD, 0xA, + 0xD, 0x4, 0x9, 0xD, 0x5, 0xE, 0xE, 0x2, + 0x0, 0x5, 0xF, 0xA, 0x1, 0x2, 0x2, 0xF, + 0xF, 0x2, 0xA, 0x8, 0x2, 0x4, 0x0, 0xE, + + 0x4, 0x8, 0x3, 0xB, 0x9, 0x7, 0xF, 0xC, + 0x5, 0xE, 0x7, 0x2, 0x3, 0x3, 0x3, 0x6, + 0x8, 0xF, 0xC, 0x9, 0xC, 0x6, 0x5, 0x1, + 0xE, 0xC, 0xD, 0x7, 0xE, 0x5, 0xB, 0xB, + + 0xA, 0x9, 0x6, 0xE, 0x6, 0xA, 0x4, 0xD, + 0x7, 0xA, 0xE, 0xF, 0xF, 0x0, 0x1, 0x9, + 0x6, 0x6, 0x1, 0xC, 0xA, 0xF, 0xC, 0x3, + 0x3, 0x3, 0xB, 0x4, 0x7, 0xD, 0x7, 0x8, + } + }, + { "CryptoPro_D", "1.2.643.2.2.31.4", 1, + { + 0xF, 0xB, 0x1, 0x1, 0x0, 0x8, 0x3, 0x1, + 0xC, 0x6, 0xC, 0x5, 0xC, 0x0, 0x0, 0xA, + 0x2, 0x3, 0xB, 0xE, 0x8, 0xF, 0x6, 0x6, + 0xA, 0x4, 0x0, 0xC, 0x9, 0x3, 0xF, 0x8, + + 0x6, 0xC, 0xF, 0xA, 0xD, 0x2, 0x1, 0xF, + 0x4, 0xF, 0xE, 0x7, 0x2, 0x5, 0xE, 0xB, + 0x5, 0xE, 0x6, 0x0, 0xA, 0xE, 0x9, 0x0, + 0x0, 0x2, 0x5, 0xD, 0xB, 0xB, 0x2, 0x4, + + 0x7, 0x7, 0xA, 0x6, 0x7, 0x1, 0xD, 0xC, + 0x9, 0xD, 0xD, 0x2, 0x3, 0xA, 0x8, 0x3, + 0xE, 0x8, 0x4, 0xB, 0x6, 0x4, 0xC, 0x5, + 0xD, 0x0, 0x8, 0x4, 0x5, 0x7, 0x4, 0x9, + + 0x1, 0x5, 0x9, 0x9, 0x4, 0xC, 0xB, 0x7, + 0xB, 0xA, 0x3, 0x3, 0xE, 0x9, 0xA, 0xD, + 0x8, 0x9, 0x7, 0xF, 0xF, 0xD, 0x5, 0x2, + 0x3, 0x1, 0x2, 0x8, 0x1, 0x6, 0x7, 0xE, + } + }, + { "TC26_Z", "1.2.643.7.1.2.5.1.1", 1, + { + 0xc, 0x6, 0xb, 0xc, 0x7, 0x5, 0x8, 0x1, + 0x4, 0x8, 0x3, 0x8, 0xf, 0xd, 0xe, 0x7, + 0x6, 0x2, 0x5, 0x2, 0x5, 0xf, 0x2, 0xe, + 0x2, 0x3, 0x8, 0x1, 0xa, 0x6, 0x5, 0xd, + + 0xa, 0x9, 0x2, 0xd, 0x8, 0x9, 0x6, 0x0, + 0x5, 0xa, 0xf, 0x4, 0x1, 0x2, 0x9, 0x5, + 0xb, 0x5, 0xa, 0xf, 0x6, 0xc, 0x1, 0x8, + 0x9, 0xc, 0xd, 0x6, 0xd, 0xa, 0xc, 0x3, + + 0xe, 0x1, 0xe, 0x7, 0x0, 0xb, 0xf, 0x4, + 0x8, 0xe, 0x1, 0x0, 0x9, 0x7, 0x4, 0xf, + 0xd, 0x4, 0x7, 0xa, 0x3, 0x8, 0xb, 0xa, + 0x7, 0x7, 0x4, 0x5, 0xe, 0x1, 0x0, 0x6, + + 0x0, 0xb, 0xc, 0x3, 0xb, 0x4, 0xd, 0x9, + 0x3, 0xd, 0x9, 0xe, 0x4, 0x3, 0xa, 0xc, + 0xf, 0x0, 0x6, 0x9, 0x2, 0xe, 0x3, 0xb, + 0x1, 0xf, 0x0, 0xb, 0xc, 0x0, 0x7, 0x2, + } + }, +}; + +int main(int argc, char **argv) +{ + unsigned int i, j, s; + FILE *f; + + if (argc == 1) + f = stdin; + else + f = fopen(argv[1], "w"); + + if (!f) + { + perror("fopen"); + exit(1); + } + + for (s = 0; s < DIM(gost_sboxes); s++) + { + unsigned char *sbox = gost_sboxes[s].sbox; + fprintf (f, "static const u32 sbox_%s[4*256] =\n {", gost_sboxes[s].name); + for (i = 0; i < 4; i++) { + fprintf (f, "\n /* %d */\n ", i); + for (j = 0; j < 256; j++) { + unsigned int val; + if (j % 4 == 0 && j != 0) + fprintf (f, "\n "); + val = sbox[ (j & 0xf) * 8 + 2 * i + 0] | + (sbox[ (j >> 4) * 8 + 2 * i + 1] << 4); + val <<= (8*i); + val = (val << 11) | (val >> 21); + fprintf (f, " 0x%08x,", val); + } + } + fprintf (f, "\n };\n\n"); + } + + fprintf (f, "static struct\n{\n const char *oid;\n const u32 *sbox;\n const int keymeshing;\n} gost_oid_map[] = {\n"); + + for (s = 0; s < DIM(gost_sboxes); s++) + { + fprintf (f, " { \"%s\", sbox_%s, %d },\n", gost_sboxes[s].oid, gost_sboxes[s].name, gost_sboxes[s].keymeshing ); + } + + fprintf(f, " { NULL, NULL, 0 }\n};\n"); + + fclose (f); + + return 0; +} diff --git a/comm/third_party/libgcrypt/cipher/gost.h b/comm/third_party/libgcrypt/cipher/gost.h new file mode 100644 index 0000000000..53a4050503 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/gost.h @@ -0,0 +1,34 @@ +/* gost.h - GOST 28147-89 implementation + * Copyright (C) 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef _GCRY_GOST_H +#define _GCRY_GOST_H + +typedef struct { + u32 key[8]; + const u32 *sbox; + unsigned int mesh_counter; + unsigned int mesh_limit; +} GOST28147_context; + +/* This is a simple interface that will be used by GOST R 34.11-94 */ +unsigned int _gcry_gost_enc_data (const u32 *key, + u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro); + +#endif diff --git a/comm/third_party/libgcrypt/cipher/gost28147.c b/comm/third_party/libgcrypt/cipher/gost28147.c new file mode 100644 index 0000000000..9445b378c4 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/gost28147.c @@ -0,0 +1,553 @@ +/* gost28147.c - GOST 28147-89 implementation for Libgcrypt + * Copyright (C) 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* GOST 28147-89 defines several modes of encryption: + * - ECB which should be used only for key transfer + * - CFB mode + * - OFB-like mode with additional transformation on keystream + * RFC 5830 names this 'counter encryption' mode + * Original GOST text uses the term 'gammirovanie' + * - MAC mode ('imitovstavka') + * + * This implementation handles ECB and CFB modes via usual libgcrypt handling. + * OFB-like modes are unsupported. + */ + +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "mac-internal.h" +#include "bufhelp.h" +#include "cipher-internal.h" + +#include "gost.h" +#include "gost-sb.h" + +static void +gost_do_set_sbox (GOST28147_context *ctx, unsigned int index) +{ + ctx->sbox = gost_oid_map[index].sbox; + ctx->mesh_limit = gost_oid_map[index].keymeshing ? 1024 : 0; +} + +static gcry_err_code_t +gost_setkey (void *c, const byte *key, unsigned keylen, + cipher_bulk_ops_t *bulk_ops) +{ + int i; + GOST28147_context *ctx = c; + + (void)bulk_ops; + + if (keylen != 256 / 8) + return GPG_ERR_INV_KEYLEN; + + if (!ctx->sbox) + gost_do_set_sbox (ctx, 0); + + for (i = 0; i < 8; i++) + { + ctx->key[i] = buf_get_le32(&key[4*i]); + } + + ctx->mesh_counter = 0; + + return GPG_ERR_NO_ERROR; +} + +static inline u32 +gost_val (u32 subkey, u32 cm1, const u32 *sbox) +{ + cm1 += subkey; + cm1 = sbox[0*256 + ((cm1 >> 0) & 0xff)] | + sbox[1*256 + ((cm1 >> 8) & 0xff)] | + sbox[2*256 + ((cm1 >> 16) & 0xff)] | + sbox[3*256 + ((cm1 >> 24) & 0xff)]; + return cm1; +} + +static unsigned int +_gost_encrypt_data (const u32 *sbox, const u32 *key, u32 *o1, u32 *o2, u32 n1, u32 n2) +{ + n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox); + n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox); + n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox); + n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox); + + n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox); + n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox); + n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox); + n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox); + + n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox); + n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox); + n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox); + n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox); + + n2 ^= gost_val (key[7], n1, sbox); n1 ^= gost_val (key[6], n2, sbox); + n2 ^= gost_val (key[5], n1, sbox); n1 ^= gost_val (key[4], n2, sbox); + n2 ^= gost_val (key[3], n1, sbox); n1 ^= gost_val (key[2], n2, sbox); + n2 ^= gost_val (key[1], n1, sbox); n1 ^= gost_val (key[0], n2, sbox); + + *o1 = n2; + *o2 = n1; + + return /* burn_stack */ 4*sizeof(void*) /* func call */ + + 3*sizeof(void*) /* stack */ + + 4*sizeof(void*) /* gost_val call */; +} + +static unsigned int +gost_encrypt_block (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + unsigned int burn; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + + buf_put_le32 (outbuf+0, n1); + buf_put_le32 (outbuf+4, n2); + + return /* burn_stack */ burn + 6*sizeof(void*) /* func call */; +} + +unsigned int _gcry_gost_enc_data (const u32 *key, + u32 *o1, u32 *o2, u32 n1, u32 n2, int cryptopro) +{ + const u32 *sbox; + if (cryptopro) + sbox = sbox_CryptoPro_3411; + else + sbox = sbox_test_3411; + return _gost_encrypt_data (sbox, key, o1, o2, n1, n2) + 7 * sizeof(void *); +} + +static unsigned int +gost_decrypt_block (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + const u32 *sbox = ctx->sbox; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + n2 ^= gost_val (ctx->key[0], n1, sbox); n1 ^= gost_val (ctx->key[1], n2, sbox); + n2 ^= gost_val (ctx->key[2], n1, sbox); n1 ^= gost_val (ctx->key[3], n2, sbox); + n2 ^= gost_val (ctx->key[4], n1, sbox); n1 ^= gost_val (ctx->key[5], n2, sbox); + n2 ^= gost_val (ctx->key[6], n1, sbox); n1 ^= gost_val (ctx->key[7], n2, sbox); + + n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, sbox); + n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, sbox); + n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, sbox); + n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, sbox); + + n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, sbox); + n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, sbox); + n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, sbox); + n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, sbox); + + n2 ^= gost_val (ctx->key[7], n1, sbox); n1 ^= gost_val (ctx->key[6], n2, sbox); + n2 ^= gost_val (ctx->key[5], n1, sbox); n1 ^= gost_val (ctx->key[4], n2, sbox); + n2 ^= gost_val (ctx->key[3], n1, sbox); n1 ^= gost_val (ctx->key[2], n2, sbox); + n2 ^= gost_val (ctx->key[1], n1, sbox); n1 ^= gost_val (ctx->key[0], n2, sbox); + + buf_put_le32 (outbuf+0, n2); + buf_put_le32 (outbuf+4, n1); + + return /* burn_stack */ 4*sizeof(void*) /* func call */ + + 3*sizeof(void*) /* stack */ + + 4*sizeof(void*) /* gost_val call */; +} + +static gpg_err_code_t +gost_set_sbox (GOST28147_context *ctx, const char *oid) +{ + int i; + + for (i = 0; gost_oid_map[i].oid; i++) + { + if (!strcmp(gost_oid_map[i].oid, oid)) + { + gost_do_set_sbox (ctx, i); + return 0; + } + } + return GPG_ERR_VALUE_NOT_FOUND; +} + +static gpg_err_code_t +gost_set_extra_info (void *c, int what, const void *buffer, size_t buflen) +{ + GOST28147_context *ctx = c; + gpg_err_code_t ec = 0; + + (void)buffer; + (void)buflen; + + switch (what) + { + case GCRYCTL_SET_SBOX: + ec = gost_set_sbox (ctx, buffer); + break; + + default: + ec = GPG_ERR_INV_OP; + break; + } + return ec; +} + +static const byte CryptoProKeyMeshingKey[] = { + 0x69, 0x00, 0x72, 0x22, 0x64, 0xC9, 0x04, 0x23, + 0x8D, 0x3A, 0xDB, 0x96, 0x46, 0xE9, 0x2A, 0xC4, + 0x18, 0xFE, 0xAC, 0x94, 0x00, 0xED, 0x07, 0x12, + 0xC0, 0x86, 0xDC, 0xC2, 0xEF, 0x4C, 0xA9, 0x2B +}; + +/* Implements key meshing algorithm by modifing ctx and returning new IV. + Thanks to Dmitry Belyavskiy. */ +static void +cryptopro_key_meshing (GOST28147_context *ctx) +{ + unsigned char newkey[32]; + unsigned int i; + + /* "Decrypt" the static keymeshing key */ + for (i = 0; i < 4; i++) + { + gost_decrypt_block (ctx, newkey + i*8, CryptoProKeyMeshingKey + i*8); + } + + /* Set new key */ + for (i = 0; i < 8; i++) + { + ctx->key[i] = buf_get_le32(&newkey[4*i]); + } + + ctx->mesh_counter = 0; +} + +static unsigned int +gost_encrypt_block_mesh (void *c, byte *outbuf, const byte *inbuf) +{ + GOST28147_context *ctx = c; + u32 n1, n2; + unsigned int burn; + + n1 = buf_get_le32 (inbuf); + n2 = buf_get_le32 (inbuf+4); + + if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit)) + { + cryptopro_key_meshing (ctx); + /* Yes, encrypt twice: once for KeyMeshing procedure per RFC 4357, + * once for block encryption */ + _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + } + + burn = _gost_encrypt_data(ctx->sbox, ctx->key, &n1, &n2, n1, n2); + + ctx->mesh_counter += 8; + + buf_put_le32 (outbuf+0, n1); + buf_put_le32 (outbuf+4, n2); + + return /* burn_stack */ burn + 6*sizeof(void*) /* func call */; +} + +static gcry_cipher_oid_spec_t oids_gost28147_mesh[] = + { + { "1.2.643.2.2.21", GCRY_CIPHER_MODE_CFB }, + /* { "1.2.643.2.2.31.0", GCRY_CIPHER_MODE_CNTGOST }, */ + { "1.2.643.2.2.31.1", GCRY_CIPHER_MODE_CFB }, + { "1.2.643.2.2.31.2", GCRY_CIPHER_MODE_CFB }, + { "1.2.643.2.2.31.3", GCRY_CIPHER_MODE_CFB }, + { "1.2.643.2.2.31.4", GCRY_CIPHER_MODE_CFB }, + { NULL } + }; + +gcry_cipher_spec_t _gcry_cipher_spec_gost28147 = + { + GCRY_CIPHER_GOST28147, {0, 0}, + "GOST28147", NULL, NULL, 8, 256, + sizeof (GOST28147_context), + gost_setkey, + gost_encrypt_block, + gost_decrypt_block, + NULL, NULL, NULL, gost_set_extra_info, + }; + +/* Meshing is used only for CFB, so no need to have separate + * gost_decrypt_block_mesh. + * Moreover key meshing is specified as encrypting the block (IV). Decrypting + * it afterwards would be meaningless. */ +gcry_cipher_spec_t _gcry_cipher_spec_gost28147_mesh = + { + GCRY_CIPHER_GOST28147_MESH, {0, 0}, + "GOST28147_MESH", NULL, oids_gost28147_mesh, 8, 256, + sizeof (GOST28147_context), + gost_setkey, + gost_encrypt_block_mesh, + gost_decrypt_block, + NULL, NULL, NULL, gost_set_extra_info, + }; + +static gcry_err_code_t +gost_imit_open (gcry_mac_hd_t h) +{ + memset(&h->u.imit, 0, sizeof(h->u.imit)); + return 0; +} + +static void +gost_imit_close (gcry_mac_hd_t h) +{ + (void) h; +} + +static gcry_err_code_t +gost_imit_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + int i; + + if (keylen != 256 / 8) + return GPG_ERR_INV_KEYLEN; + + if (!h->u.imit.ctx.sbox) + h->u.imit.ctx.sbox = sbox_CryptoPro_A; + + for (i = 0; i < 8; i++) + { + h->u.imit.ctx.key[i] = buf_get_le32(&key[4*i]); + } + + return 0; +} + +static gcry_err_code_t +gost_imit_setiv (gcry_mac_hd_t h, + const unsigned char *iv, + size_t ivlen) +{ + if (ivlen != 8) + return GPG_ERR_INV_LENGTH; + + h->u.imit.n1 = buf_get_le32 (iv + 0); + h->u.imit.n2 = buf_get_le32 (iv + 4); + + return 0; +} + +static gcry_err_code_t +gost_imit_reset (gcry_mac_hd_t h) +{ + h->u.imit.n1 = h->u.imit.n2 = 0; + h->u.imit.unused = 0; + return 0; +} + +static unsigned int +_gost_imit_block (const u32 *sbox, const u32 *key, u32 *o1, u32 *o2, u32 n1, u32 n2) +{ + n1 ^= *o1; + n2 ^= *o2; + + n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox); + n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox); + n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox); + n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox); + + n2 ^= gost_val (key[0], n1, sbox); n1 ^= gost_val (key[1], n2, sbox); + n2 ^= gost_val (key[2], n1, sbox); n1 ^= gost_val (key[3], n2, sbox); + n2 ^= gost_val (key[4], n1, sbox); n1 ^= gost_val (key[5], n2, sbox); + n2 ^= gost_val (key[6], n1, sbox); n1 ^= gost_val (key[7], n2, sbox); + + *o1 = n1; + *o2 = n2; + + return /* burn_stack */ 4*sizeof(void*) /* func call */ + + 3*sizeof(void*) /* stack */ + + 4*sizeof(void*) /* gost_val call */; +} + +static inline unsigned int +gost_imit_block (GOST28147_context *ctx, u32 *n1, u32 *n2, const unsigned char *buf) +{ + if (ctx->mesh_limit && (ctx->mesh_counter == ctx->mesh_limit)) + cryptopro_key_meshing (ctx); + + return _gost_imit_block (ctx->sbox, ctx->key, + n1, n2, + buf_get_le32 (buf+0), + buf_get_le32 (buf+4)); +} + +static gcry_err_code_t +gost_imit_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + const int blocksize = 8; + unsigned int burn = 0; + if (!buflen || !buf) + return GPG_ERR_NO_ERROR; + + if (h->u.imit.unused) + { + for (; buflen && h->u.imit.unused < blocksize; buflen --) + h->u.imit.lastiv[h->u.imit.unused++] = *buf++; + + if (h->u.imit.unused < blocksize) + return GPG_ERR_NO_ERROR; + + h->u.imit.count ++; + burn = gost_imit_block (&h->u.imit.ctx, + &h->u.imit.n1, &h->u.imit.n2, + h->u.imit.lastiv); + + h->u.imit.unused = 0; + } + + while (buflen >= blocksize) + { + h->u.imit.count ++; + burn = gost_imit_block (&h->u.imit.ctx, + &h->u.imit.n1, &h->u.imit.n2, + buf); + buf += blocksize; + buflen -= blocksize; + } + + for (; buflen; buflen--) + h->u.imit.lastiv[h->u.imit.unused++] = *buf++; + + _gcry_burn_stack (burn); + + return GPG_ERR_NO_ERROR; +} + +static void +gost_imit_finish (gcry_mac_hd_t h) +{ + static const unsigned char zero[8] = {0}; + + /* Fill till full block */ + if (h->u.imit.unused) + gost_imit_write(h, zero, 8 - h->u.imit.unused); + + if (h->u.imit.count == 1) + gost_imit_write(h, zero, 8); +} + +static gcry_err_code_t +gost_imit_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + unsigned int dlen = 8; + unsigned char digest[8]; + + gost_imit_finish (h); + + buf_put_le32 (digest+0, h->u.imit.n1); + buf_put_le32 (digest+4, h->u.imit.n2); + + if (*outlen <= dlen) + buf_cpy (outbuf, digest, *outlen); + else + { + buf_cpy (outbuf, digest, dlen); + *outlen = dlen; + } + return 0; +} + +static gcry_err_code_t +gost_imit_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + unsigned char tbuf[8]; + + gost_imit_finish (h); + + buf_put_le32 (tbuf+0, h->u.imit.n1); + buf_put_le32 (tbuf+4, h->u.imit.n2); + + return buf_eq_const(tbuf, buf, buflen) ? + GPG_ERR_NO_ERROR : GPG_ERR_CHECKSUM; +} + +static unsigned int +gost_imit_get_maclen (int algo) +{ + (void) algo; + return 4; /* or 8 */ +} + + +static unsigned int +gost_imit_get_keylen (int algo) +{ + (void) algo; + return 256 / 8; +} + +static gpg_err_code_t +gost_imit_set_extra_info (gcry_mac_hd_t hd, int what, const void *buffer, size_t buflen) +{ + gpg_err_code_t ec = 0; + + (void)buffer; + (void)buflen; + + switch (what) + { + case GCRYCTL_SET_SBOX: + ec = gost_set_sbox (&hd->u.imit.ctx, buffer); + break; + + default: + ec = GPG_ERR_INV_OP; + break; + } + return ec; +} + + +static gcry_mac_spec_ops_t gost_imit_ops = { + gost_imit_open, + gost_imit_close, + gost_imit_setkey, + gost_imit_setiv, + gost_imit_reset, + gost_imit_write, + gost_imit_read, + gost_imit_verify, + gost_imit_get_maclen, + gost_imit_get_keylen, + gost_imit_set_extra_info, + NULL +}; + +gcry_mac_spec_t _gcry_mac_type_spec_gost28147_imit = + { + GCRY_MAC_GOST28147_IMIT, {0, 0}, "GOST28147_IMIT", + &gost_imit_ops + }; diff --git a/comm/third_party/libgcrypt/cipher/gostr3411-94.c b/comm/third_party/libgcrypt/cipher/gostr3411-94.c new file mode 100644 index 0000000000..7cf0637e26 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/gostr3411-94.c @@ -0,0 +1,383 @@ +/* gostr3411-94.c - GOST R 34.11-94 hash function + * Copyright (C) 2012 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + + +#include +#include +#include +#include + +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + +#include "gost.h" + +#define max(a, b) (((a) > (b)) ? (a) : (b)) + +typedef struct { + gcry_md_block_ctx_t bctx; + union { + u32 h[8]; + byte result[32]; + }; + u32 sigma[8]; + u32 len; + int cryptopro; +} GOSTR3411_CONTEXT; + +static unsigned int +transform (void *c, const unsigned char *data, size_t nblks); + +static void +gost3411_init (void *context, unsigned int flags) +{ + GOSTR3411_CONTEXT *hd = context; + + (void)flags; + + memset (hd->h, 0, 32); + memset (hd->sigma, 0, 32); + + hd->bctx.nblocks = 0; + hd->bctx.count = 0; + hd->bctx.blocksize_shift = _gcry_ctz(32); + hd->bctx.bwrite = transform; + hd->cryptopro = 0; +} + +static void +gost3411_cp_init (void *context, unsigned int flags) +{ + GOSTR3411_CONTEXT *hd = context; + gost3411_init (context, flags); + hd->cryptopro = 1; +} + +static void +do_p (u32 *p, u32 *u, u32 *v) +{ + int k; + u32 t[8]; + + for (k = 0; k < 8; k++) + t[k] = u[k] ^ v[k]; + + k = 0; + p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 | + ((t[2] >> (8*k)) & 0xff) << 8 | + ((t[4] >> (8*k)) & 0xff) << 16 | + ((t[6] >> (8*k)) & 0xff) << 24; + p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 | + ((t[3] >> (8*k)) & 0xff) << 8 | + ((t[5] >> (8*k)) & 0xff) << 16 | + ((t[7] >> (8*k)) & 0xff) << 24; + + k = 1; + p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 | + ((t[2] >> (8*k)) & 0xff) << 8 | + ((t[4] >> (8*k)) & 0xff) << 16 | + ((t[6] >> (8*k)) & 0xff) << 24; + p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 | + ((t[3] >> (8*k)) & 0xff) << 8 | + ((t[5] >> (8*k)) & 0xff) << 16 | + ((t[7] >> (8*k)) & 0xff) << 24; + + k = 2; + p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 | + ((t[2] >> (8*k)) & 0xff) << 8 | + ((t[4] >> (8*k)) & 0xff) << 16 | + ((t[6] >> (8*k)) & 0xff) << 24; + p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 | + ((t[3] >> (8*k)) & 0xff) << 8 | + ((t[5] >> (8*k)) & 0xff) << 16 | + ((t[7] >> (8*k)) & 0xff) << 24; + + k = 3; + p[k+0] = ((t[0] >> (8*k)) & 0xff) << 0 | + ((t[2] >> (8*k)) & 0xff) << 8 | + ((t[4] >> (8*k)) & 0xff) << 16 | + ((t[6] >> (8*k)) & 0xff) << 24; + p[k+4] = ((t[1] >> (8*k)) & 0xff) << 0 | + ((t[3] >> (8*k)) & 0xff) << 8 | + ((t[5] >> (8*k)) & 0xff) << 16 | + ((t[7] >> (8*k)) & 0xff) << 24; +} + +static void +do_a (u32 *u) +{ + u32 t[2]; + int i; + memcpy(t, u, 2*4); + for (i = 0; i < 6; i++) + u[i] = u[i+2]; + u[6] = u[0] ^ t[0]; + u[7] = u[1] ^ t[1]; +} +/* apply do_a twice: 1 2 3 4 -> 3 4 1^2 2^3 */ +static void +do_a2 (u32 *u) +{ + u32 t[4]; + int i; + memcpy (t, u, 16); + memcpy (u, u + 4, 16); + for (i = 0; i < 2; i++) + { + u[4+i] = t[i] ^ t[i + 2]; + u[6+i] = u[i] ^ t[i + 2]; + } +} + +static void +do_apply_c2 (u32 *u) +{ + u[ 0] ^= 0xff00ff00; + u[ 1] ^= 0xff00ff00; + u[ 2] ^= 0x00ff00ff; + u[ 3] ^= 0x00ff00ff; + u[ 4] ^= 0x00ffff00; + u[ 5] ^= 0xff0000ff; + u[ 6] ^= 0x000000ff; + u[ 7] ^= 0xff00ffff; +} + +#define do_chi_step12(e) \ + e[6] ^= ((e[6] >> 16) ^ e[7] ^ (e[7] >> 16) ^ e[4] ^ (e[5] >>16)) & 0xffff; + +#define do_chi_step13(e) \ + e[6] ^= ((e[7] ^ (e[7] >> 16) ^ e[0] ^ (e[4] >> 16) ^ e[6]) & 0xffff) << 16; + +#define do_chi_doublestep(e, i) \ + e[i] ^= (e[i] >> 16) ^ (e[(i+1)%8] << 16) ^ e[(i+1)%8] ^ (e[(i+1)%8] >> 16) ^ (e[(i+2)%8] << 16) ^ e[(i+6)%8] ^ (e[(i+7)%8] >> 16); \ + e[i] ^= (e[i] << 16); + +static void +do_chi_submix12 (u32 *e, u32 *x) +{ + e[6] ^= x[0]; + e[7] ^= x[1]; + e[0] ^= x[2]; + e[1] ^= x[3]; + e[2] ^= x[4]; + e[3] ^= x[5]; + e[4] ^= x[6]; + e[5] ^= x[7]; +} + +static void +do_chi_submix13 (u32 *e, u32 *x) +{ + e[6] ^= (x[0] << 16) | (x[7] >> 16); + e[7] ^= (x[1] << 16) | (x[0] >> 16); + e[0] ^= (x[2] << 16) | (x[1] >> 16); + e[1] ^= (x[3] << 16) | (x[2] >> 16); + e[2] ^= (x[4] << 16) | (x[3] >> 16); + e[3] ^= (x[5] << 16) | (x[4] >> 16); + e[4] ^= (x[6] << 16) | (x[5] >> 16); + e[5] ^= (x[7] << 16) | (x[6] >> 16); +} + +static void +do_add (u32 *s, u32 *a) +{ + u32 carry = 0; + int i; + + for (i = 0; i < 8; i++) + { + u32 op = carry + a[i]; + s[i] += op; + carry = (a[i] > op) || (op > s[i]); + } +} + +static unsigned int +do_hash_step (GOSTR3411_CONTEXT *hd, u32 *h, u32 *m) +{ + u32 u[8], v[8]; + u32 s[8]; + u32 k[8]; + unsigned int burn; + int i; + + memcpy (u, h, 32); + memcpy (v, m, 32); + + for (i = 0; i < 4; i++) { + do_p (k, u, v); + + burn = _gcry_gost_enc_data (k, &s[2*i], &s[2*i+1], h[2*i], h[2*i+1], hd->cryptopro); + + do_a (u); + if (i == 1) + do_apply_c2 (u); + do_a2 (v); + } + + for (i = 0; i < 5; i++) + { + do_chi_doublestep (s, 0); + do_chi_doublestep (s, 1); + do_chi_doublestep (s, 2); + do_chi_doublestep (s, 3); + do_chi_doublestep (s, 4); + /* That is in total 12 + 1 + 61 = 74 = 16 * 4 + 10 rounds */ + if (i == 4) + break; + do_chi_doublestep (s, 5); + if (i == 0) + do_chi_submix12(s, m); + do_chi_step12 (s); + if (i == 0) + do_chi_submix13(s, h); + do_chi_step13 (s); + do_chi_doublestep (s, 7); + } + + memcpy (h, s+5, 12); + memcpy (h+3, s, 20); + + return /* burn_stack */ 4 * sizeof(void*) /* func call (ret addr + args) */ + + 4 * 32 + 2 * sizeof(int) /* stack */ + + max(burn /* _gcry_gost_enc_one */, + sizeof(void*) * 2 /* do_a2 call */ + + 16 + sizeof(int) /* do_a2 stack */ ); +} + +static unsigned int +transform_blk (void *ctx, const unsigned char *data) +{ + GOSTR3411_CONTEXT *hd = ctx; + u32 m[8]; + unsigned int burn; + int i; + + for (i = 0; i < 8; i++) + m[i] = buf_get_le32(data + i*4); + burn = do_hash_step (hd, hd->h, m); + do_add (hd->sigma, m); + + return /* burn_stack */ burn + 3 * sizeof(void*) + 32 + 2 * sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 32; + } + while (--nblks); + + return burn; +} + + +/* + The routine finally terminates the computation and returns the + digest. The handle is prepared for a new cycle, but adding bytes + to the handle will the destroy the returned buffer. Returns: 32 + bytes with the message the digest. */ +static void +gost3411_final (void *context) +{ + GOSTR3411_CONTEXT *hd = context; + size_t padlen = 0; + u32 l[8]; + int i; + MD_NBLOCKS_TYPE nblocks; + + if (hd->bctx.count > 0) + { + padlen = 32 - hd->bctx.count; + memset (hd->bctx.buf + hd->bctx.count, 0, padlen); + hd->bctx.count += padlen; + _gcry_md_block_write (hd, NULL, 0); /* flush */; + } + + if (hd->bctx.count != 0) + return; /* Something went wrong */ + + memset (l, 0, 32); + + nblocks = hd->bctx.nblocks; + if (padlen) + { + nblocks --; + l[0] = 256 - padlen * 8; + } + l[0] |= nblocks << 8; + nblocks >>= 24; + + for (i = 1; i < 8 && nblocks != 0; i++) + { + l[i] = nblocks; + nblocks >>= 24; + } + + do_hash_step (hd, hd->h, l); + do_hash_step (hd, hd->h, hd->sigma); + for (i = 0; i < 8; i++) + hd->h[i] = le_bswap32(hd->h[i]); +} + +static byte * +gost3411_read (void *context) +{ + GOSTR3411_CONTEXT *hd = context; + + return hd->result; +} + +static unsigned char asn[6] = /* Object ID is 1.2.643.2.2.3 */ + { 0x2a, 0x85, 0x03, 0x02, 0x02, 0x03 }; + +static gcry_md_oid_spec_t oid_spec_gostr3411[] = + { + /* iso.member-body.ru.rans.cryptopro.3 (gostR3411-94-with-gostR3410-2001) */ + { "1.2.643.2.2.3" }, + /* iso.member-body.ru.rans.cryptopro.9 (gostR3411-94) */ + { "1.2.643.2.2.9" }, + {NULL}, + }; + +gcry_md_spec_t _gcry_digest_spec_gost3411_94 = + { + GCRY_MD_GOSTR3411_94, {0, 0}, + "GOSTR3411_94", NULL, 0, NULL, 32, + gost3411_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL, + NULL, NULL, + sizeof (GOSTR3411_CONTEXT) + }; +gcry_md_spec_t _gcry_digest_spec_gost3411_cp = + { + GCRY_MD_GOSTR3411_CP, {0, 0}, + "GOSTR3411_CP", asn, DIM (asn), oid_spec_gostr3411, 32, + gost3411_cp_init, _gcry_md_block_write, gost3411_final, gost3411_read, NULL, + NULL, NULL, + sizeof (GOSTR3411_CONTEXT) + }; diff --git a/comm/third_party/libgcrypt/cipher/hash-common.c b/comm/third_party/libgcrypt/cipher/hash-common.c new file mode 100644 index 0000000000..ed2d7cacd1 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/hash-common.c @@ -0,0 +1,193 @@ +/* hash-common.c - Common code for hash algorithms + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#ifdef HAVE_STDINT_H +# include +#endif + +#include "g10lib.h" +#include "bufhelp.h" +#include "hash-common.h" + + +/* Run a selftest for hash algorithm ALGO. If the resulting digest + matches EXPECT/EXPECTLEN and everything else is fine as well, + return NULL. If an error occurs, return a static text string + describing the error. + + DATAMODE controls what will be hashed according to this table: + + 0 - Hash the supplied DATA of DATALEN. + 1 - Hash one million times a 'a'. DATA and DATALEN are ignored. + +*/ +const char * +_gcry_hash_selftest_check_one (int algo, + int datamode, const void *data, size_t datalen, + const void *expect, size_t expectlen) +{ + const char *result = NULL; + gcry_error_t err = 0; + gcry_md_hd_t hd; + unsigned char *digest; + char aaa[1000]; + int xof = 0; + + if (_gcry_md_get_algo_dlen (algo) == 0) + xof = 1; + else if (_gcry_md_get_algo_dlen (algo) != expectlen) + return "digest size does not match expected size"; + + err = _gcry_md_open (&hd, algo, 0); + if (err) + return "gcry_md_open failed"; + + switch (datamode) + { + case 0: + _gcry_md_write (hd, data, datalen); + break; + + case 1: /* Hash one million times an "a". */ + { + int i; + + /* Write in odd size chunks so that we test the buffering. */ + memset (aaa, 'a', 1000); + for (i = 0; i < 1000; i++) + _gcry_md_write (hd, aaa, 1000); + } + break; + + default: + result = "invalid DATAMODE"; + } + + if (!result) + { + if (!xof) + { + digest = _gcry_md_read (hd, algo); + + if ( memcmp (digest, expect, expectlen) ) + result = "digest mismatch"; + } + else + { + gcry_assert(expectlen <= sizeof(aaa)); + + err = _gcry_md_extract (hd, algo, aaa, expectlen); + if (err) + result = "error extracting output from XOF"; + else if ( memcmp (aaa, expect, expectlen) ) + result = "digest mismatch"; + } + } + + _gcry_md_close (hd); + + return result; +} + + +/* Common function to write a chunk of data to the transform function + of a hash algorithm. Note that the use of the term "block" does + not imply a fixed size block. Note that we explicitly allow to use + this function after the context has been finalized; the result does + not have any meaning but writing after finalize is sometimes + helpful to mitigate timing attacks. */ +void +_gcry_md_block_write (void *context, const void *inbuf_arg, size_t inlen) +{ + const unsigned char *inbuf = inbuf_arg; + gcry_md_block_ctx_t *hd = context; + unsigned int stack_burn = 0; + unsigned int nburn; + const unsigned int blocksize_shift = hd->blocksize_shift; + const unsigned int blocksize = 1 << blocksize_shift; + size_t inblocks; + size_t copylen; + + if (sizeof(hd->buf) < blocksize) + BUG(); + + if (!hd->bwrite) + return; + + if (hd->count > blocksize) + { + /* This happens only when gcry_md_write is called after final. + * Writing after final is used for mitigating timing attacks. */ + hd->count = 0; + } + + while (hd->count) + { + if (hd->count == blocksize) /* Flush the buffer. */ + { + nburn = hd->bwrite (hd, hd->buf, 1); + stack_burn = nburn > stack_burn ? nburn : stack_burn; + hd->count = 0; + if (!++hd->nblocks) + hd->nblocks_high++; + } + else + { + copylen = inlen; + if (copylen > blocksize - hd->count) + copylen = blocksize - hd->count; + + if (copylen == 0) + break; + + buf_cpy (&hd->buf[hd->count], inbuf, copylen); + hd->count += copylen; + inbuf += copylen; + inlen -= copylen; + } + } + + if (inlen == 0) + return; + + if (inlen >= blocksize) + { + inblocks = inlen >> blocksize_shift; + nburn = hd->bwrite (hd, inbuf, inblocks); + stack_burn = nburn > stack_burn ? nburn : stack_burn; + hd->count = 0; + hd->nblocks_high += (hd->nblocks + inblocks < inblocks); + hd->nblocks += inblocks; + inlen -= inblocks << blocksize_shift; + inbuf += inblocks << blocksize_shift; + } + + if (inlen) + { + buf_cpy (hd->buf, inbuf, inlen); + hd->count = inlen; + } + + if (stack_burn > 0) + _gcry_burn_stack (stack_burn); +} diff --git a/comm/third_party/libgcrypt/cipher/hash-common.h b/comm/third_party/libgcrypt/cipher/hash-common.h new file mode 100644 index 0000000000..561e77a7e5 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/hash-common.h @@ -0,0 +1,62 @@ +/* hash-common.h - Declarations of common code for hash algorithms. + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_HASH_COMMON_H +#define GCRY_HASH_COMMON_H + +#include "types.h" + + +const char * _gcry_hash_selftest_check_one +/**/ (int algo, + int datamode, const void *data, size_t datalen, + const void *expect, size_t expectlen); + +/* Type for the md_write helper function. */ +typedef unsigned int (*_gcry_md_block_write_t) (void *c, + const unsigned char *blks, + size_t nblks); + +#if (defined(USE_SHA512) || defined(USE_WHIRLPOOL)) +/* SHA-512 and Whirlpool needs u64. SHA-512 needs larger buffer. */ +# define MD_BLOCK_MAX_BLOCKSIZE 128 +# define MD_NBLOCKS_TYPE u64 +#else +# define MD_BLOCK_MAX_BLOCKSIZE 64 +# define MD_NBLOCKS_TYPE u32 +#endif + +/* SHA1 needs 2x64 bytes and SHA-512 needs 128 bytes. */ +#define MD_BLOCK_CTX_BUFFER_SIZE 128 + +typedef struct gcry_md_block_ctx +{ + byte buf[MD_BLOCK_CTX_BUFFER_SIZE]; + MD_NBLOCKS_TYPE nblocks; + MD_NBLOCKS_TYPE nblocks_high; + int count; + unsigned int blocksize_shift; + _gcry_md_block_write_t bwrite; +} gcry_md_block_ctx_t; + + +void +_gcry_md_block_write( void *context, const void *inbuf_arg, size_t inlen); + +#endif /*GCRY_HASH_COMMON_H*/ diff --git a/comm/third_party/libgcrypt/cipher/idea.c b/comm/third_party/libgcrypt/cipher/idea.c new file mode 100644 index 0000000000..0a81081810 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/idea.c @@ -0,0 +1,382 @@ +/* idea.c - IDEA function + * Copyright 1997, 1998, 1999, 2001 Werner Koch (dd9jn) + * Copyright 2013 g10 Code GmbH + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WERNER KOCH BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Werner Koch shall not be + * used in advertising or otherwise to promote the sale, use or other dealings + * in this Software without prior written authorization from Werner Koch. + * + * Patents on IDEA have expired: + * Europe: EP0482154 on 2011-05-16, + * Japan: JP3225440 on 2011-05-16, + * U.S.: 5,214,703 on 2012-01-07. + */ + +/* + * Please see http://www.noepatents.org/ to learn why software patents + * are bad for society and what you can do to fight them. + * + * The code herein is based on the one from: + * Bruce Schneier: Applied Cryptography. John Wiley & Sons, 1996. + * ISBN 0-471-11709-9. + */ + + +#include +#include +#include +#include +#include + +#include "types.h" /* for byte and u32 typedefs */ +#include "g10lib.h" +#include "cipher.h" +#include "cipher-internal.h" + + +#define IDEA_KEYSIZE 16 +#define IDEA_BLOCKSIZE 8 +#define IDEA_ROUNDS 8 +#define IDEA_KEYLEN (6*IDEA_ROUNDS+4) + +typedef struct { + u16 ek[IDEA_KEYLEN]; + u16 dk[IDEA_KEYLEN]; + int have_dk; +} IDEA_context; + +static const char *selftest(void); + + +static u16 +mul_inv( u16 x ) +{ + u16 t0, t1; + u16 q, y; + + if( x < 2 ) + return x; + t1 = 0x10001UL / x; + y = 0x10001UL % x; + if( y == 1 ) + return (1-t1) & 0xffff; + + t0 = 1; + do { + q = x / y; + x = x % y; + t0 += q * t1; + if( x == 1 ) + return t0; + q = y / x; + y = y % x; + t1 += q * t0; + } while( y != 1 ); + return (1-t1) & 0xffff; +} + + + +static void +expand_key( const byte *userkey, u16 *ek ) +{ + int i,j; + + for(j=0; j < 8; j++ ) { + ek[j] = (*userkey << 8) + userkey[1]; + userkey += 2; + } + for(i=0; j < IDEA_KEYLEN; j++ ) { + i++; + ek[i+7] = ek[i&7] << 9 | ek[(i+1)&7] >> 7; + ek += i & 8; + i &= 7; + } +} + + +static void +invert_key( u16 *ek, u16 dk[IDEA_KEYLEN] ) +{ + int i; + u16 t1, t2, t3; + u16 temp[IDEA_KEYLEN]; + u16 *p = temp + IDEA_KEYLEN; + + t1 = mul_inv( *ek++ ); + t2 = -*ek++; + t3 = -*ek++; + *--p = mul_inv( *ek++ ); + *--p = t3; + *--p = t2; + *--p = t1; + + for(i=0; i < IDEA_ROUNDS-1; i++ ) { + t1 = *ek++; + *--p = *ek++; + *--p = t1; + + t1 = mul_inv( *ek++ ); + t2 = -*ek++; + t3 = -*ek++; + *--p = mul_inv( *ek++ ); + *--p = t2; + *--p = t3; + *--p = t1; + } + t1 = *ek++; + *--p = *ek++; + *--p = t1; + + t1 = mul_inv( *ek++ ); + t2 = -*ek++; + t3 = -*ek++; + *--p = mul_inv( *ek++ ); + *--p = t3; + *--p = t2; + *--p = t1; + memcpy(dk, temp, sizeof(temp) ); + wipememory(temp, sizeof(temp)); +} + + +static void +cipher( byte *outbuf, const byte *inbuf, u16 *key ) +{ + u16 s2, s3; + u16 in[4]; + int r = IDEA_ROUNDS; +#define x1 (in[0]) +#define x2 (in[1]) +#define x3 (in[2]) +#define x4 (in[3]) +#define MUL(x,y) \ + do {u16 _t16; u32 _t32; \ + if( (_t16 = (y)) ) { \ + if( (x = (x)&0xffff) ) { \ + _t32 = (u32)x * _t16; \ + x = _t32 & 0xffff; \ + _t16 = _t32 >> 16; \ + x = ((x)-_t16) + (x<_t16?1:0); \ + } \ + else { \ + x = 1 - _t16; \ + } \ + } \ + else { \ + x = 1 - x; \ + } \ + } while(0) + + memcpy (in, inbuf, sizeof in); +#ifndef WORDS_BIGENDIAN + x1 = (x1>>8) | (x1<<8); + x2 = (x2>>8) | (x2<<8); + x3 = (x3>>8) | (x3<<8); + x4 = (x4>>8) | (x4<<8); +#endif + do { + MUL(x1, *key++); + x2 += *key++; + x3 += *key++; + MUL(x4, *key++ ); + + s3 = x3; + x3 ^= x1; + MUL(x3, *key++); + s2 = x2; + x2 ^=x4; + x2 += x3; + MUL(x2, *key++); + x3 += x2; + + x1 ^= x2; + x4 ^= x3; + + x2 ^= s3; + x3 ^= s2; + } while( --r ); + MUL(x1, *key++); + x3 += *key++; + x2 += *key++; + MUL(x4, *key); + +#ifndef WORDS_BIGENDIAN + x1 = (x1>>8) | (x1<<8); + x2 = (x2>>8) | (x2<<8); + x3 = (x3>>8) | (x3<<8); + x4 = (x4>>8) | (x4<<8); +#endif + memcpy (outbuf+0, &x1, 2); + memcpy (outbuf+2, &x3, 2); + memcpy (outbuf+4, &x2, 2); + memcpy (outbuf+6, &x4, 2); +#undef MUL +#undef x1 +#undef x2 +#undef x3 +#undef x4 +} + + +static int +do_setkey( IDEA_context *c, const byte *key, unsigned int keylen ) +{ + static int initialized = 0; + static const char *selftest_failed = 0; + + if( !initialized ) { + initialized = 1; + selftest_failed = selftest(); + if( selftest_failed ) + log_error( "%s\n", selftest_failed ); + } + if( selftest_failed ) + return GPG_ERR_SELFTEST_FAILED; + + assert(keylen == 16); + c->have_dk = 0; + expand_key( key, c->ek ); + invert_key( c->ek, c->dk ); + return 0; +} + +static gcry_err_code_t +idea_setkey (void *context, const byte *key, unsigned int keylen, + cipher_bulk_ops_t *bulk_ops) +{ + IDEA_context *ctx = context; + int rc = do_setkey (ctx, key, keylen); + (void)bulk_ops; + _gcry_burn_stack (23+6*sizeof(void*)); + return rc; +} + +static void +encrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf ) +{ + cipher( outbuf, inbuf, c->ek ); +} + +static unsigned int +idea_encrypt (void *context, byte *out, const byte *in) +{ + IDEA_context *ctx = context; + encrypt_block (ctx, out, in); + return /*burn_stack*/ (24+3*sizeof (void*)); +} + +static void +decrypt_block( IDEA_context *c, byte *outbuf, const byte *inbuf ) +{ + if( !c->have_dk ) { + c->have_dk = 1; + invert_key( c->ek, c->dk ); + } + cipher( outbuf, inbuf, c->dk ); +} + +static unsigned int +idea_decrypt (void *context, byte *out, const byte *in) +{ + IDEA_context *ctx = context; + decrypt_block (ctx, out, in); + return /*burn_stack*/ (24+3*sizeof (void*)); +} + + +static const char * +selftest( void ) +{ +static struct { + byte key[16]; + byte plain[8]; + byte cipher[8]; +} test_vectors[] = { + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x00, 0x00, 0x00, 0x01, 0x00, 0x02, 0x00, 0x03 }, + { 0x11, 0xFB, 0xED, 0x2B, 0x01, 0x98, 0x6D, 0xE5 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, + { 0x54, 0x0E, 0x5F, 0xEA, 0x18, 0xC2, 0xF8, 0xB1 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x00, 0x19, 0x32, 0x4B, 0x64, 0x7D, 0x96, 0xAF }, + { 0x9F, 0x0A, 0x0A, 0xB6, 0xE1, 0x0C, 0xED, 0x78 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0xF5, 0x20, 0x2D, 0x5B, 0x9C, 0x67, 0x1B, 0x08 }, + { 0xCF, 0x18, 0xFD, 0x73, 0x55, 0xE2, 0xC5, 0xC5 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0xFA, 0xE6, 0xD2, 0xBE, 0xAA, 0x96, 0x82, 0x6E }, + { 0x85, 0xDF, 0x52, 0x00, 0x56, 0x08, 0x19, 0x3D } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x0A, 0x14, 0x1E, 0x28, 0x32, 0x3C, 0x46, 0x50 }, + { 0x2F, 0x7D, 0xE7, 0x50, 0x21, 0x2F, 0xB7, 0x34 } }, + { { 0x00, 0x01, 0x00, 0x02, 0x00, 0x03, 0x00, 0x04, + 0x00, 0x05, 0x00, 0x06, 0x00, 0x07, 0x00, 0x08 }, + { 0x05, 0x0A, 0x0F, 0x14, 0x19, 0x1E, 0x23, 0x28 }, + { 0x7B, 0x73, 0x14, 0x92, 0x5D, 0xE5, 0x9C, 0x09 } }, + { { 0x00, 0x05, 0x00, 0x0A, 0x00, 0x0F, 0x00, 0x14, + 0x00, 0x19, 0x00, 0x1E, 0x00, 0x23, 0x00, 0x28 }, + { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, + { 0x3E, 0xC0, 0x47, 0x80, 0xBE, 0xFF, 0x6E, 0x20 } }, + { { 0x3A, 0x98, 0x4E, 0x20, 0x00, 0x19, 0x5D, 0xB3, + 0x2E, 0xE5, 0x01, 0xC8, 0xC4, 0x7C, 0xEA, 0x60 }, + { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08 }, + { 0x97, 0xBC, 0xD8, 0x20, 0x07, 0x80, 0xDA, 0x86 } }, + { { 0x00, 0x64, 0x00, 0xC8, 0x01, 0x2C, 0x01, 0x90, + 0x01, 0xF4, 0x02, 0x58, 0x02, 0xBC, 0x03, 0x20 }, + { 0x05, 0x32, 0x0A, 0x64, 0x14, 0xC8, 0x19, 0xFA }, + { 0x65, 0xBE, 0x87, 0xE7, 0xA2, 0x53, 0x8A, 0xED } }, + { { 0x9D, 0x40, 0x75, 0xC1, 0x03, 0xBC, 0x32, 0x2A, + 0xFB, 0x03, 0xE7, 0xBE, 0x6A, 0xB3, 0x00, 0x06 }, + { 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08 }, + { 0xF5, 0xDB, 0x1A, 0xC4, 0x5E, 0x5E, 0xF9, 0xF9 } } +}; + IDEA_context c; + byte buffer[8]; + int i; + + for(i=0; i < DIM(test_vectors); i++ ) { + do_setkey( &c, test_vectors[i].key, 16 ); + encrypt_block( &c, buffer, test_vectors[i].plain ); + if( memcmp( buffer, test_vectors[i].cipher, 8 ) ) + return "IDEA test encryption failed."; + decrypt_block( &c, buffer, test_vectors[i].cipher ); + if( memcmp( buffer, test_vectors[i].plain, 8 ) ) + return "IDEA test decryption failed."; + } + + return NULL; +} + + +gcry_cipher_spec_t _gcry_cipher_spec_idea = + { + GCRY_CIPHER_IDEA, {0, 0}, + "IDEA", NULL, NULL, IDEA_BLOCKSIZE, 128, + sizeof (IDEA_context), + idea_setkey, idea_encrypt, idea_decrypt + }; diff --git a/comm/third_party/libgcrypt/cipher/kdf-internal.h b/comm/third_party/libgcrypt/cipher/kdf-internal.h new file mode 100644 index 0000000000..7079860e99 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/kdf-internal.h @@ -0,0 +1,40 @@ +/* kdf-internal.h - Internal defs for kdf.c + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_KDF_INTERNAL_H +#define GCRY_KDF_INTERNAL_H + +/*-- kdf.c --*/ +gpg_err_code_t +_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen, + int hashalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer); + +/*-- scrypt.c --*/ +gcry_err_code_t +_gcry_kdf_scrypt (const unsigned char *passwd, size_t passwdlen, + int algo, int subalgo, + const unsigned char *salt, size_t saltlen, + unsigned long iterations, + size_t dklen, unsigned char *dk); + + +#endif /*GCRY_KDF_INTERNAL_H*/ diff --git a/comm/third_party/libgcrypt/cipher/kdf.c b/comm/third_party/libgcrypt/cipher/kdf.c new file mode 100644 index 0000000000..93c2c9f65e --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/kdf.c @@ -0,0 +1,503 @@ +/* kdf.c - Key Derivation Functions + * Copyright (C) 1998, 2008, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "kdf-internal.h" + + +/* Transform a passphrase into a suitable key of length KEYSIZE and + store this key in the caller provided buffer KEYBUFFER. The caller + must provide an HASHALGO, a valid ALGO and depending on that algo a + SALT of 8 bytes and the number of ITERATIONS. Code taken from + gnupg/agent/protect.c:hash_passphrase. */ +static gpg_err_code_t +openpgp_s2k (const void *passphrase, size_t passphraselen, + int algo, int hashalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer) +{ + gpg_err_code_t ec; + gcry_md_hd_t md; + char *key = keybuffer; + int pass, i; + int used = 0; + int secmode; + + if ((algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K) + && (!salt || saltlen != 8)) + return GPG_ERR_INV_VALUE; + + secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer); + + ec = _gcry_md_open (&md, hashalgo, secmode? GCRY_MD_FLAG_SECURE : 0); + if (ec) + return ec; + + for (pass=0; used < keysize; pass++) + { + if (pass) + { + _gcry_md_reset (md); + for (i=0; i < pass; i++) /* Preset the hash context. */ + _gcry_md_putc (md, 0); + } + + if (algo == GCRY_KDF_SALTED_S2K || algo == GCRY_KDF_ITERSALTED_S2K) + { + int len2 = passphraselen + 8; + unsigned long count = len2; + + if (algo == GCRY_KDF_ITERSALTED_S2K) + { + count = iterations; + if (count < len2) + count = len2; + } + + while (count > len2) + { + _gcry_md_write (md, salt, saltlen); + _gcry_md_write (md, passphrase, passphraselen); + count -= len2; + } + if (count < saltlen) + _gcry_md_write (md, salt, count); + else + { + _gcry_md_write (md, salt, saltlen); + count -= saltlen; + _gcry_md_write (md, passphrase, count); + } + } + else + _gcry_md_write (md, passphrase, passphraselen); + + _gcry_md_final (md); + i = _gcry_md_get_algo_dlen (hashalgo); + if (i > keysize - used) + i = keysize - used; + memcpy (key+used, _gcry_md_read (md, hashalgo), i); + used += i; + } + _gcry_md_close (md); + return 0; +} + + +/* Transform a passphrase into a suitable key of length KEYSIZE and + store this key in the caller provided buffer KEYBUFFER. The caller + must provide PRFALGO which indicates the pseudorandom function to + use: This shall be the algorithms id of a hash algorithm; it is + used in HMAC mode. SALT is a salt of length SALTLEN and ITERATIONS + gives the number of iterations. */ +gpg_err_code_t +_gcry_kdf_pkdf2 (const void *passphrase, size_t passphraselen, + int hashalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer) +{ + gpg_err_code_t ec; + gcry_md_hd_t md; + int secmode; + unsigned long dklen = keysize; + char *dk = keybuffer; + unsigned int hlen; /* Output length of the digest function. */ + unsigned int l; /* Rounded up number of blocks. */ + unsigned int r; /* Number of octets in the last block. */ + char *sbuf; /* Malloced buffer to concatenate salt and iter + as well as space to hold TBUF and UBUF. */ + char *tbuf; /* Buffer for T; ptr into SBUF, size is HLEN. */ + char *ubuf; /* Buffer for U; ptr into SBUF, size is HLEN. */ + unsigned int lidx; /* Current block number. */ + unsigned long iter; /* Current iteration number. */ + unsigned int i; + + /* We allow for a saltlen of 0 here to support scrypt. It is not + clear whether rfc2898 allows for this this, thus we do a test on + saltlen > 0 only in gcry_kdf_derive. */ + if (!salt || !iterations || !dklen) + return GPG_ERR_INV_VALUE; + + hlen = _gcry_md_get_algo_dlen (hashalgo); + if (!hlen) + return GPG_ERR_DIGEST_ALGO; + + secmode = _gcry_is_secure (passphrase) || _gcry_is_secure (keybuffer); + + /* Step 1 */ + /* If dkLen > (2^32 - 1) * hLen, output "derived key too long" and + * stop. We use a stronger inequality but only if our type can hold + * a larger value. */ + +#if SIZEOF_UNSIGNED_LONG > 4 + if (dklen > 0xffffffffU) + return GPG_ERR_INV_VALUE; +#endif + + + /* Step 2 */ + l = ((dklen - 1)/ hlen) + 1; + r = dklen - (l - 1) * hlen; + + /* Setup buffers and prepare a hash context. */ + sbuf = (secmode + ? xtrymalloc_secure (saltlen + 4 + hlen + hlen) + : xtrymalloc (saltlen + 4 + hlen + hlen)); + if (!sbuf) + return gpg_err_code_from_syserror (); + tbuf = sbuf + saltlen + 4; + ubuf = tbuf + hlen; + + ec = _gcry_md_open (&md, hashalgo, (GCRY_MD_FLAG_HMAC + | (secmode?GCRY_MD_FLAG_SECURE:0))); + if (ec) + { + xfree (sbuf); + return ec; + } + + ec = _gcry_md_setkey (md, passphrase, passphraselen); + if (ec) + { + _gcry_md_close (md); + xfree (sbuf); + return ec; + } + + /* Step 3 and 4. */ + memcpy (sbuf, salt, saltlen); + for (lidx = 1; lidx <= l; lidx++) + { + for (iter = 0; iter < iterations; iter++) + { + _gcry_md_reset (md); + if (!iter) /* Compute U_1: */ + { + sbuf[saltlen] = (lidx >> 24); + sbuf[saltlen + 1] = (lidx >> 16); + sbuf[saltlen + 2] = (lidx >> 8); + sbuf[saltlen + 3] = lidx; + _gcry_md_write (md, sbuf, saltlen + 4); + memcpy (ubuf, _gcry_md_read (md, 0), hlen); + memcpy (tbuf, ubuf, hlen); + } + else /* Compute U_(2..c): */ + { + _gcry_md_write (md, ubuf, hlen); + memcpy (ubuf, _gcry_md_read (md, 0), hlen); + for (i=0; i < hlen; i++) + tbuf[i] ^= ubuf[i]; + } + } + if (lidx == l) /* Last block. */ + memcpy (dk, tbuf, r); + else + { + memcpy (dk, tbuf, hlen); + dk += hlen; + } + } + + _gcry_md_close (md); + xfree (sbuf); + return 0; +} + + +/* Derive a key from a passphrase. KEYSIZE gives the requested size + of the keys in octets. KEYBUFFER is a caller provided buffer + filled on success with the derived key. The input passphrase is + taken from (PASSPHRASE,PASSPHRASELEN) which is an arbitrary memory + buffer. ALGO specifies the KDF algorithm to use; these are the + constants GCRY_KDF_*. SUBALGO specifies an algorithm used + internally by the KDF algorithms; this is usually a hash algorithm + but certain KDF algorithm may use it differently. {SALT,SALTLEN} + is a salt as needed by most KDF algorithms. ITERATIONS is a + positive integer parameter to most KDFs. 0 is returned on success, + or an error code on failure. */ +gpg_err_code_t +_gcry_kdf_derive (const void *passphrase, size_t passphraselen, + int algo, int subalgo, + const void *salt, size_t saltlen, + unsigned long iterations, + size_t keysize, void *keybuffer) +{ + gpg_err_code_t ec; + + if (!passphrase) + { + ec = GPG_ERR_INV_DATA; + goto leave; + } + + if (!keybuffer || !keysize) + { + ec = GPG_ERR_INV_VALUE; + goto leave; + } + + + switch (algo) + { + case GCRY_KDF_SIMPLE_S2K: + case GCRY_KDF_SALTED_S2K: + case GCRY_KDF_ITERSALTED_S2K: + if (!passphraselen) + ec = GPG_ERR_INV_DATA; + else + ec = openpgp_s2k (passphrase, passphraselen, algo, subalgo, + salt, saltlen, iterations, keysize, keybuffer); + break; + + case GCRY_KDF_PBKDF1: + ec = GPG_ERR_UNSUPPORTED_ALGORITHM; + break; + + case GCRY_KDF_PBKDF2: + if (!saltlen) + ec = GPG_ERR_INV_VALUE; + else + ec = _gcry_kdf_pkdf2 (passphrase, passphraselen, subalgo, + salt, saltlen, iterations, keysize, keybuffer); + break; + + case 41: + case GCRY_KDF_SCRYPT: +#if USE_SCRYPT + ec = _gcry_kdf_scrypt (passphrase, passphraselen, algo, subalgo, + salt, saltlen, iterations, keysize, keybuffer); +#else + ec = GPG_ERR_UNSUPPORTED_ALGORITHM; +#endif /*USE_SCRYPT*/ + break; + + default: + ec = GPG_ERR_UNKNOWN_ALGORITHM; + break; + } + + leave: + return ec; +} + + +/* Check one KDF call with ALGO and HASH_ALGO using the regular KDF + * API. (passphrase,passphraselen) is the password to be derived, + * (salt,saltlen) the salt for the key derivation, + * iterations is the number of the kdf iterations, + * and (expect,expectlen) the expected result. Returns NULL on + * success or a string describing the failure. */ + +static const char * +check_one (int algo, int hash_algo, + const void *passphrase, size_t passphraselen, + const void *salt, size_t saltlen, + unsigned long iterations, + const void *expect, size_t expectlen) +{ + unsigned char key[512]; /* hardcoded to avoid allocation */ + size_t keysize = expectlen; + + if (keysize > sizeof(key)) + return "invalid tests data"; + + if (_gcry_kdf_derive (passphrase, passphraselen, algo, + hash_algo, salt, saltlen, iterations, + keysize, key)) + return "gcry_kdf_derive failed"; + + if (memcmp (key, expect, expectlen)) + return "does not match"; + + return NULL; +} + + +static gpg_err_code_t +selftest_pbkdf2 (int extended, selftest_report_func_t report) +{ + static const struct { + const char *desc; + const char *p; /* Passphrase. */ + size_t plen; /* Length of P. */ + const char *salt; + size_t saltlen; + int hashalgo; + unsigned long c; /* Iterations. */ + int dklen; /* Requested key length. */ + const char *dk; /* Derived key. */ + int disabled; + } tv[] = { +#if USE_SHA1 +#define NUM_TEST_VECTORS 9 + /* SHA1 test vectors are from RFC-6070. */ + { + "Basic PBKDF2 SHA1 #1", + "password", 8, + "salt", 4, + GCRY_MD_SHA1, + 1, + 20, + "\x0c\x60\xc8\x0f\x96\x1f\x0e\x71\xf3\xa9" + "\xb5\x24\xaf\x60\x12\x06\x2f\xe0\x37\xa6" + }, + { + "Basic PBKDF2 SHA1 #2", + "password", 8, + "salt", 4, + GCRY_MD_SHA1, + 2, + 20, + "\xea\x6c\x01\x4d\xc7\x2d\x6f\x8c\xcd\x1e" + "\xd9\x2a\xce\x1d\x41\xf0\xd8\xde\x89\x57" + }, + { + "Basic PBKDF2 SHA1 #3", + "password", 8, + "salt", 4, + GCRY_MD_SHA1, + 4096, + 20, + "\x4b\x00\x79\x01\xb7\x65\x48\x9a\xbe\xad" + "\x49\xd9\x26\xf7\x21\xd0\x65\xa4\x29\xc1" + }, + { + "Basic PBKDF2 SHA1 #4", + "password", 8, + "salt", 4, + GCRY_MD_SHA1, + 16777216, + 20, + "\xee\xfe\x3d\x61\xcd\x4d\xa4\xe4\xe9\x94" + "\x5b\x3d\x6b\xa2\x15\x8c\x26\x34\xe9\x84", + 1 /* This test takes too long. */ + }, + { + "Basic PBKDF2 SHA1 #5", + "passwordPASSWORDpassword", 24, + "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36, + GCRY_MD_SHA1, + 4096, + 25, + "\x3d\x2e\xec\x4f\xe4\x1c\x84\x9b\x80\xc8" + "\xd8\x36\x62\xc0\xe4\x4a\x8b\x29\x1a\x96" + "\x4c\xf2\xf0\x70\x38" + }, + { + "Basic PBKDF2 SHA1 #6", + "pass\0word", 9, + "sa\0lt", 5, + GCRY_MD_SHA1, + 4096, + 16, + "\x56\xfa\x6a\xa7\x55\x48\x09\x9d\xcc\x37" + "\xd7\xf0\x34\x25\xe0\xc3" + }, + { /* empty password test, not in RFC-6070 */ + "Basic PBKDF2 SHA1 #7", + "", 0, + "salt", 4, + GCRY_MD_SHA1, + 2, + 20, + "\x13\x3a\x4c\xe8\x37\xb4\xd2\x52\x1e\xe2" + "\xbf\x03\xe1\x1c\x71\xca\x79\x4e\x07\x97" + }, +#else +#define NUM_TEST_VECTORS 2 +#endif + { + "Basic PBKDF2 SHA256", + "password", 8, + "salt", 4, + GCRY_MD_SHA256, + 2, + 32, + "\xae\x4d\x0c\x95\xaf\x6b\x46\xd3\x2d\x0a\xdf\xf9\x28\xf0\x6d\xd0" + "\x2a\x30\x3f\x8e\xf3\xc2\x51\xdf\xd6\xe2\xd8\x5a\x95\x47\x4c\x43" + }, + { + "Extended PBKDF2 SHA256", + "passwordPASSWORDpassword", 24, + "saltSALTsaltSALTsaltSALTsaltSALTsalt", 36, + GCRY_MD_SHA256, + 4096, + 40, + "\x34\x8c\x89\xdb\xcb\xd3\x2b\x2f\x32\xd8\x14\xb8\x11\x6e\x84\xcf" + "\x2b\x17\x34\x7e\xbc\x18\x00\x18\x1c\x4e\x2a\x1f\xb8\xdd\x53\xe1" + "\xc6\x35\x51\x8c\x7d\xac\x47\xe9" + }, + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + if (tv[tvidx].disabled) + continue; + errtxt = check_one (GCRY_KDF_PBKDF2, tv[tvidx].hashalgo, + tv[tvidx].p, tv[tvidx].plen, + tv[tvidx].salt, tv[tvidx].saltlen, + tv[tvidx].c, + tv[tvidx].dk, tv[tvidx].dklen); + if (errtxt) + goto failed; + if (tvidx >= NUM_TEST_VECTORS - 1 && !extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("kdf", GCRY_KDF_PBKDF2, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run the selftests for KDF with KDF algorithm ALGO with optional + reporting function REPORT. */ +gpg_error_t +_gcry_kdf_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec = 0; + + if (algo == GCRY_KDF_PBKDF2) + ec = selftest_pbkdf2 (extended, report); + else + { + ec = GPG_ERR_UNSUPPORTED_ALGORITHM; + if (report) + report ("kdf", algo, "module", "algorithm not available"); + } + return gpg_error (ec); +} diff --git a/comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S b/comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S new file mode 100644 index 0000000000..0bec8d50a9 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/keccak-armv7-neon.S @@ -0,0 +1,945 @@ +/* keccak-armv7-neon.S - ARMv7/NEON implementation of Keccak + * + * Copyright (C) 2015 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) && \ + defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) && \ + defined(HAVE_GCC_INLINE_ASM_NEON) + +/* Based on public-domain/CC0 implementation from SUPERCOP package + * (keccakc1024/inplace-armv7a-neon/keccak2.s) + * + * Original copyright header follows: + */ + +@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, +@ Michaël Peeters and Gilles Van Assche. For more information, feedback or +@ questions, please refer to our website: http://keccak.noekeon.org/ +@ +@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". +@ +@ To the extent possible under law, the implementer has waived all copyright +@ and related or neighboring rights to the source code in this file. +@ http://creativecommons.org/publicdomain/zero/1.0/ + +.text + +.syntax unified +.fpu neon +.arm + + +.extern _gcry_keccak_round_consts_64bit; + +#ifdef __PIC__ +# define GET_DATA_POINTER(reg, name, rtmp) \ + ldr reg, 1f; \ + ldr rtmp, 2f; \ + b 3f; \ + 1: .word _GLOBAL_OFFSET_TABLE_-(3f+8); \ + 2: .word name(GOT); \ + 3: add reg, pc, reg; \ + ldr reg, [reg, rtmp]; +#else +# define GET_DATA_POINTER(reg, name, rtmp) ldr reg, =name +#endif + + +@// --- offsets in state +.equ Aba, 0*8 +.equ Aga, 1*8 +.equ Aka, 2*8 +.equ Ama, 3*8 +.equ Asa, 4*8 + +@// --- macros + +.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5 + + @Prepare Theta + @Ca = Aba^Aga^Aka^Ama^Asa@ + @Ce = Abe^Age^Ake^Ame^Ase@ + @Ci = Abi^Agi^Aki^Ami^Asi@ + @Co = Abo^Ago^Ako^Amo^Aso@ + @Cu = Abu^Agu^Aku^Amu^Asu@ + @De = Ca^ROL64(Ci, 1)@ + @Di = Ce^ROL64(Co, 1)@ + @Do = Ci^ROL64(Cu, 1)@ + @Du = Co^ROL64(Ca, 1)@ + @Da = Cu^ROL64(Ce, 1)@ + + veor.64 q4, q6, q7 + veor.64 q5, q9, q10 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d1, d8, d16 + veor.64 d2, d10, d17 + + veor.64 q4, q11, q12 + veor.64 q5, q14, q15 + veor.64 d8, d8, d9 + veor.64 d10, d10, d11 + veor.64 d3, d8, d26 + + vadd.u64 q4, q1, q1 + veor.64 d4, d10, d27 + vmov.64 d0, d5 + vsri.64 q4, q1, #63 + + vadd.u64 q5, q2, q2 + veor.64 q4, q4, q0 + vsri.64 q5, q2, #63 + vadd.u64 d7, d1, d1 + veor.64 \argA2, \argA2, d8 + veor.64 q5, q5, q1 + + vsri.64 d7, d1, #63 + vshl.u64 d1, \argA2, #44 + veor.64 \argA3, \argA3, d9 + veor.64 d7, d7, d4 + + @Ba = argA1^Da@ + @Be = ROL64((argA2^De), 44)@ + @Bi = ROL64((argA3^Di), 43)@ + @Bo = ROL64((argA4^Do), 21)@ + @Bu = ROL64((argA5^Du), 14)@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@ + vsri.64 d1, \argA2, #64-44 + vshl.u64 d2, \argA3, #43 + vldr.64 d0, [sp, #\argA1] + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA3, #64-43 + vshl.u64 d3, \argA4, #21 + veor.64 \argA5, \argA5, d11 + veor.64 d0, d0, d7 + vsri.64 d3, \argA4, #64-21 + vbic.64 d5, d2, d1 + vshl.u64 d4, \argA5, #14 + vbic.64 \argA2, d3, d2 + vld1.64 d6, [ip]! + veor.64 d5, d0 + vsri.64 d4, \argA5, #64-14 + veor.64 d5, d6 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA2, d1 + vstr.64 d5, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5 + + @d2 = ROL64((argA1^Da), 3)@ + @d3 = ROL64((argA2^De), 45)@ + @d4 = ROL64((argA3^Di), 61)@ + @d0 = ROL64((argA4^Do), 28)@ + @d1 = ROL64((argA5^Du), 20)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d3, \argA2, #45 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d4, \argA3, #61 + veor.64 \argA4, \argA4, d10 + vsri.64 d3, \argA2, #64-45 + veor.64 \argA5, \argA5, d11 + vsri.64 d4, \argA3, #64-61 + vshl.u64 d0, \argA4, #28 + veor.64 d6, d6, d7 + vshl.u64 d1, \argA5, #20 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA4, #64-28 + vbic.64 \argA4, d0, d4 + vshl.u64 d2, d6, #3 + vsri.64 d1, \argA5, #64-20 + veor.64 \argA4, d3 + vsri.64 d2, d6, #64-3 + vbic.64 \argA5, d1, d0 + vbic.64 d6, d2, d1 + vbic.64 \argA2, d3, d2 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 \argA3, d2 + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5 + + @d4 = ROL64((argA1^Da), 18)@ + @d0 = ROL64((argA2^De), 1)@ + @d1 = ROL64((argA3^Di), 6)@ + @d2 = ROL64((argA4^Do), 25)@ + @d3 = ROL64((argA5^Du), 8)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA3, \argA3, d9 + veor.64 \argA4, \argA4, d10 + vshl.u64 d1, \argA3, #6 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d2, \argA4, #25 + veor.64 \argA5, \argA5, d11 + vsri.64 d1, \argA3, #64-6 + veor.64 \argA2, \argA2, d8 + vsri.64 d2, \argA4, #64-25 + vext.8 d3, \argA5, \argA5, #7 + veor.64 d6, d6, d7 + vbic.64 \argA3, d2, d1 + vadd.u64 d0, \argA2, \argA2 + vbic.64 \argA4, d3, d2 + vsri.64 d0, \argA2, #64-1 + vshl.u64 d4, d6, #18 + veor.64 \argA2, d1, \argA4 + veor.64 \argA3, d0 + vsri.64 d4, d6, #64-18 + vstr.64 \argA3, [sp, #\argA1] + veor.64 d5, \argA3 + vbic.64 \argA5, d1, d0 + vbic.64 \argA3, d4, d3 + vbic.64 \argA4, d0, d4 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5 + + @d1 = ROL64((argA1^Da), 36)@ + @d2 = ROL64((argA2^De), 10)@ + @d3 = ROL64((argA3^Di), 15)@ + @d4 = ROL64((argA4^Do), 56)@ + @d0 = ROL64((argA5^Du), 27)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d2, \argA2, #10 + vldr.64 d6, [sp, #\argA1] + vshl.u64 d3, \argA3, #15 + veor.64 \argA4, \argA4, d10 + vsri.64 d2, \argA2, #64-10 + vsri.64 d3, \argA3, #64-15 + veor.64 \argA5, \argA5, d11 + vext.8 d4, \argA4, \argA4, #1 + vbic.64 \argA2, d3, d2 + vshl.u64 d0, \argA5, #27 + veor.64 d6, d6, d7 + vbic.64 \argA3, d4, d3 + vsri.64 d0, \argA5, #64-27 + vshl.u64 d1, d6, #36 + veor.64 \argA3, d2 + vbic.64 \argA4, d0, d4 + vsri.64 d1, d6, #64-36 + + veor.64 \argA4, d3 + vbic.64 d6, d2, d1 + vbic.64 \argA5, d1, d0 + veor.64 d6, d0 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA5, d4 + + .endm + +.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5 + + @d3 = ROL64((argA1^Da), 41)@ + @d4 = ROL64((argA2^De), 2)@ + @d0 = ROL64((argA3^Di), 62)@ + @d1 = ROL64((argA4^Do), 55)@ + @d2 = ROL64((argA5^Du), 39)@ + @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ + @argA2 = Be ^((~Bi)& Bo )@ + @argA3 = Bi ^((~Bo)& Bu )@ + @argA4 = Bo ^((~Bu)& Ba )@ + @argA5 = Bu ^((~Ba)& Be )@ + + veor.64 \argA2, \argA2, d8 + veor.64 \argA3, \argA3, d9 + vshl.u64 d4, \argA2, #2 + veor.64 \argA5, \argA5, d11 + vshl.u64 d0, \argA3, #62 + vldr.64 d6, [sp, #\argA1] + vsri.64 d4, \argA2, #64-2 + veor.64 \argA4, \argA4, d10 + vsri.64 d0, \argA3, #64-62 + + vshl.u64 d1, \argA4, #55 + veor.64 d6, d6, d7 + vshl.u64 d2, \argA5, #39 + vsri.64 d1, \argA4, #64-55 + vbic.64 \argA4, d0, d4 + vsri.64 d2, \argA5, #64-39 + vbic.64 \argA2, d1, d0 + vshl.u64 d3, d6, #41 + veor.64 \argA5, d4, \argA2 + vbic.64 \argA2, d2, d1 + vsri.64 d3, d6, #64-41 + veor.64 d6, d0, \argA2 + + vbic.64 \argA2, d3, d2 + vbic.64 \argA3, d4, d3 + veor.64 \argA2, d1 + vstr.64 d6, [sp, #\argA1] + veor.64 d5, d6 + veor.64 \argA3, d2 + veor.64 \argA4, d3 + + .endm + + +@// --- code + +@not callable from C! +.p2align 3 +.type KeccakF_armv7a_neon_asm,%function; +KeccakF_armv7a_neon_asm: @ + +.LroundLoop: + + KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31 + KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28 + KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30 + KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27 + KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29 + + KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29 + KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28 + KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27 + KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31 + KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30 + + KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30 + KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28 + KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31 + KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29 + KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27 + + KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27 + KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28 + KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29 + ldr r0, [ip] + KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30 + cmp r0, #0xFFFFFFFF + KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31 + + bne .LroundLoop + sub ip, #(8*24) + bx lr +.p2align 2 +.ltorg +.size KeccakF_armv7a_neon_asm,.-KeccakF_armv7a_neon_asm; + + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state) callable from C +.p2align 3 +.global _gcry_keccak_permute_armv7_neon +.type _gcry_keccak_permute_armv7_neon,%function; +_gcry_keccak_permute_armv7_neon: + + push {ip, lr} + vpush {q4-q7} + sub sp,sp, #5*8 + + vldr.64 d0, [r0, #0*8] + vldr.64 d12, [r0, #1*8] + vldr.64 d17, [r0, #2*8] + vldr.64 d22, [r0, #3*8] + vldr.64 d27, [r0, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r0, #5*8] + vldr.64 d13, [r0, #6*8] + vldr.64 d18, [r0, #7*8] + vldr.64 d23, [r0, #8*8] + vldr.64 d28, [r0, #9*8] + + vldr.64 d2, [r0, #10*8] + vldr.64 d14, [r0, #11*8] + vldr.64 d19, [r0, #12*8] + vldr.64 d24, [r0, #13*8] + vldr.64 d29, [r0, #14*8] + + vldr.64 d3, [r0, #15*8] + vldr.64 d15, [r0, #16*8] + vldr.64 d20, [r0, #17*8] + vldr.64 d25, [r0, #18*8] + vldr.64 d30, [r0, #19*8] + + vldr.64 d4, [r0, #20*8] + vldr.64 d16, [r0, #21*8] + vldr.64 d21, [r0, #22*8] + vldr.64 d26, [r0, #23*8] + vldr.64 d31, [r0, #24*8] + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + mov r1, r0 + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + vpop.64 { d0- d4 } + + vstr.64 d0, [r1, #0*8] + vstr.64 d12, [r1, #1*8] + vstr.64 d17, [r1, #2*8] + vstr.64 d22, [r1, #3*8] + vstr.64 d27, [r1, #4*8] + + vstr.64 d1, [r1, #5*8] + vstr.64 d13, [r1, #6*8] + vstr.64 d18, [r1, #7*8] + vstr.64 d23, [r1, #8*8] + vstr.64 d28, [r1, #9*8] + + vstr.64 d2, [r1, #10*8] + vstr.64 d14, [r1, #11*8] + vstr.64 d19, [r1, #12*8] + vstr.64 d24, [r1, #13*8] + vstr.64 d29, [r1, #14*8] + + vstr.64 d3, [r1, #15*8] + vstr.64 d15, [r1, #16*8] + vstr.64 d20, [r1, #17*8] + vstr.64 d25, [r1, #18*8] + vstr.64 d30, [r1, #19*8] + + vstr.64 d4, [r1, #20*8] + vstr.64 d16, [r1, #21*8] + vstr.64 d21, [r1, #22*8] + vstr.64 d26, [r1, #23*8] + vstr.64 d31, [r1, #24*8] + + mov r0, #112 + vpop {q4-q7} + pop {ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_permute_armv7_neon,.-_gcry_keccak_permute_armv7_neon; + +@//unsigned _gcry_keccak_permute_armv7_neon(u64 *state, @r4 +@ int pos, @r1 +@ const byte *lanes, @r2 +@ unsigned int nlanes, @r3 +@ int blocklanes) @ r5 callable from C +.p2align 3 +.global _gcry_keccak_absorb_lanes64_armv7_neon +.type _gcry_keccak_absorb_lanes64_armv7_neon,%function; +_gcry_keccak_absorb_lanes64_armv7_neon: + + cmp r3, #0 @ nlanes == 0 + itt eq + moveq r0, #0 + bxeq lr + + push {r4-r5, ip, lr} + beq .Lout + mov r4, r0 + ldr r5, [sp, #(4*4)] + vpush {q4-q7} + + @ load state + vldr.64 d0, [r4, #0*8] + vldr.64 d12, [r4, #1*8] + vldr.64 d17, [r4, #2*8] + vldr.64 d22, [r4, #3*8] + vldr.64 d27, [r4, #4*8] + + GET_DATA_POINTER(ip, _gcry_keccak_round_consts_64bit, lr); + + vldr.64 d1, [r4, #5*8] + vldr.64 d13, [r4, #6*8] + vldr.64 d18, [r4, #7*8] + vldr.64 d23, [r4, #8*8] + vldr.64 d28, [r4, #9*8] + + vldr.64 d2, [r4, #10*8] + vldr.64 d14, [r4, #11*8] + vldr.64 d19, [r4, #12*8] + vldr.64 d24, [r4, #13*8] + vldr.64 d29, [r4, #14*8] + + vldr.64 d3, [r4, #15*8] + vldr.64 d15, [r4, #16*8] + vldr.64 d20, [r4, #17*8] + vldr.64 d25, [r4, #18*8] + vldr.64 d30, [r4, #19*8] + + vldr.64 d4, [r4, #20*8] + vldr.64 d16, [r4, #21*8] + vldr.64 d21, [r4, #22*8] + vldr.64 d26, [r4, #23*8] + vldr.64 d31, [r4, #24*8] + +.Lmain_loop: + + @ detect absorb mode (full blocks vs lanes) + + cmp r1, #0 @ pos != 0 + bne .Llanes_loop + +.Lmain_loop_pos0: + + @ full blocks mode + + @ switch (blocksize) + cmp r5, #21 + beq .Lfull_block_21 + cmp r5, #18 + beq .Lfull_block_18 + cmp r5, #17 + beq .Lfull_block_17 + cmp r5, #13 + beq .Lfull_block_13 + cmp r5, #9 + beq .Lfull_block_9 + + @ unknown blocksize + b .Llanes_loop + +.Lfull_block_21: + + @ SHAKE128 + + cmp r3, #21 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + vld1.64 {d9-d11}, [r2]! + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + veor d25, d9 + veor d30, d10 + + veor d4, d11 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #21 @ nlanes -= 21 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_21 + +.Lfull_block_18: + + @ SHA3-224 + + cmp r3, #18 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d8}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + veor d20, d8 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #18 @ nlanes -= 18 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_18 + +.Lfull_block_17: + + @ SHA3-256 & SHAKE256 + + cmp r3, #17 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d11}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + vld1.64 {d5-d7}, [r2]! + veor d14, d9 + veor d19, d10 + veor d24, d11 + veor d29, d5 + + veor d3, d6 + veor d15, d7 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #17 @ nlanes -= 17 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_17 + +.Lfull_block_13: + + @ SHA3-384 + + cmp r3, #13 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d8}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + vld1.64 {d9-d10}, [r2]! + veor d18, d5 + veor d23, d6 + veor d28, d7 + + veor d2, d8 + veor d14, d9 + veor d19, d10 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #13 @ nlanes -= 13 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_13 + +.Lfull_block_9: + + @ SHA3-512 + + cmp r3, #9 @ nlanes < blocklanes + blo .Llanes_loop + + sub sp,sp, #5*8 + + vld1.64 {d5-d8}, [r2]! + veor d0, d5 + vld1.64 {d9-d11}, [r2]! + veor d12, d6 + veor d17, d7 + veor d22, d8 + vld1.64 {d5-d6}, [r2]! + veor d27, d9 + + veor d1, d10 + veor d13, d11 + veor d18, d5 + veor d23, d6 + + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + subs r3, #9 @ nlanes -= 9 + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lfull_block_9 + +.Llanes_loop: + + @ per-lane mode + + @ switch (pos) + ldrb r0, [pc, r1] + add pc, pc, r0, lsl #2 +.Lswitch_table: + .byte (.Llane0-.Lswitch_table-4)/4 + .byte (.Llane1-.Lswitch_table-4)/4 + .byte (.Llane2-.Lswitch_table-4)/4 + .byte (.Llane3-.Lswitch_table-4)/4 + .byte (.Llane4-.Lswitch_table-4)/4 + .byte (.Llane5-.Lswitch_table-4)/4 + .byte (.Llane6-.Lswitch_table-4)/4 + .byte (.Llane7-.Lswitch_table-4)/4 + .byte (.Llane8-.Lswitch_table-4)/4 + .byte (.Llane9-.Lswitch_table-4)/4 + .byte (.Llane10-.Lswitch_table-4)/4 + .byte (.Llane11-.Lswitch_table-4)/4 + .byte (.Llane12-.Lswitch_table-4)/4 + .byte (.Llane13-.Lswitch_table-4)/4 + .byte (.Llane14-.Lswitch_table-4)/4 + .byte (.Llane15-.Lswitch_table-4)/4 + .byte (.Llane16-.Lswitch_table-4)/4 + .byte (.Llane17-.Lswitch_table-4)/4 + .byte (.Llane18-.Lswitch_table-4)/4 + .byte (.Llane19-.Lswitch_table-4)/4 + .byte (.Llane20-.Lswitch_table-4)/4 + .byte (.Llane21-.Lswitch_table-4)/4 + .byte (.Llane22-.Lswitch_table-4)/4 + .byte (.Llane23-.Lswitch_table-4)/4 + .byte (.Llane24-.Lswitch_table-4)/4 +.p2align 2 + +#define ABSORB_LANE(label, vreg) \ + label: \ + add r1, #1; \ + vld1.64 d5, [r2]!; \ + cmp r1, r5; /* pos == blocklanes */ \ + veor vreg, vreg, d5; \ + beq .Llanes_permute; \ + subs r3, #1; \ + beq .Ldone; + + ABSORB_LANE(.Llane0, d0) + ABSORB_LANE(.Llane1, d12) + ABSORB_LANE(.Llane2, d17) + ABSORB_LANE(.Llane3, d22) + ABSORB_LANE(.Llane4, d27) + + ABSORB_LANE(.Llane5, d1) + ABSORB_LANE(.Llane6, d13) + ABSORB_LANE(.Llane7, d18) + ABSORB_LANE(.Llane8, d23) + ABSORB_LANE(.Llane9, d28) + + ABSORB_LANE(.Llane10, d2) + ABSORB_LANE(.Llane11, d14) + ABSORB_LANE(.Llane12, d19) + ABSORB_LANE(.Llane13, d24) + ABSORB_LANE(.Llane14, d29) + + ABSORB_LANE(.Llane15, d3) + ABSORB_LANE(.Llane16, d15) + ABSORB_LANE(.Llane17, d20) + ABSORB_LANE(.Llane18, d25) + ABSORB_LANE(.Llane19, d30) + + ABSORB_LANE(.Llane20, d4) + ABSORB_LANE(.Llane21, d16) + ABSORB_LANE(.Llane22, d21) + ABSORB_LANE(.Llane23, d26) + ABSORB_LANE(.Llane24, d31) + + b .Llanes_loop + +.Llanes_permute: + + sub sp,sp, #5*8 + vstr.64 d0, [sp, #Aba] + vstr.64 d1, [sp, #Aga] + veor.64 q0, q0, q1 + vstr.64 d2, [sp, #Aka] + veor.64 d5, d0, d1 + vstr.64 d3, [sp, #Ama] + vstr.64 d4, [sp, #Asa] + veor.64 d5, d5, d4 + + bl KeccakF_armv7a_neon_asm + + mov r1, #0 @ pos <= 0 + subs r3, #1 + + vpop.64 { d0-d4 } + + beq .Ldone + + b .Lmain_loop_pos0 + +.Ldone: + + @ save state + vstr.64 d0, [r4, #0*8] + vstr.64 d12, [r4, #1*8] + vstr.64 d17, [r4, #2*8] + vstr.64 d22, [r4, #3*8] + vstr.64 d27, [r4, #4*8] + + vstr.64 d1, [r4, #5*8] + vstr.64 d13, [r4, #6*8] + vstr.64 d18, [r4, #7*8] + vstr.64 d23, [r4, #8*8] + vstr.64 d28, [r4, #9*8] + + vstr.64 d2, [r4, #10*8] + vstr.64 d14, [r4, #11*8] + vstr.64 d19, [r4, #12*8] + vstr.64 d24, [r4, #13*8] + vstr.64 d29, [r4, #14*8] + + vstr.64 d3, [r4, #15*8] + vstr.64 d15, [r4, #16*8] + vstr.64 d20, [r4, #17*8] + vstr.64 d25, [r4, #18*8] + vstr.64 d30, [r4, #19*8] + + vstr.64 d4, [r4, #20*8] + vstr.64 d16, [r4, #21*8] + vstr.64 d21, [r4, #22*8] + vstr.64 d26, [r4, #23*8] + vstr.64 d31, [r4, #24*8] + + mov r0, #120 + vpop {q4-q7} +.Lout: + pop {r4-r5, ip, pc} +.p2align 2 +.ltorg +.size _gcry_keccak_absorb_lanes64_armv7_neon,.-_gcry_keccak_absorb_lanes64_armv7_neon; + +#endif diff --git a/comm/third_party/libgcrypt/cipher/keccak.c b/comm/third_party/libgcrypt/cipher/keccak.c new file mode 100644 index 0000000000..795a02e5b9 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/keccak.c @@ -0,0 +1,1577 @@ +/* keccak.c - SHA3 hash functions + * Copyright (C) 2015 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + + +#include +#include +#include "g10lib.h" +#include "bithelp.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hash-common.h" + + + +/* USE_64BIT indicates whether to use 64-bit generic implementation. + * USE_32BIT indicates whether to use 32-bit generic implementation. */ +#undef USE_64BIT +#if defined(__x86_64__) || SIZEOF_UNSIGNED_LONG == 8 +# define USE_64BIT 1 +#else +# define USE_32BIT 1 +#endif + + +/* USE_64BIT_BMI2 indicates whether to compile with 64-bit Intel BMI2 code. */ +#undef USE_64BIT_BMI2 +#if defined(USE_64BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_CPU_ARCH_X86) +# define USE_64BIT_BMI2 1 +#endif + + +/* USE_64BIT_SHLD indicates whether to compile with 64-bit Intel SHLD code. */ +#undef USE_64BIT_SHLD +#if defined(USE_64BIT) && defined (__GNUC__) && defined(__x86_64__) && \ + defined(HAVE_CPU_ARCH_X86) +# define USE_64BIT_SHLD 1 +#endif + + +/* USE_32BIT_BMI2 indicates whether to compile with 32-bit Intel BMI2 code. */ +#undef USE_32BIT_BMI2 +#if defined(USE_32BIT) && defined(HAVE_GCC_INLINE_ASM_BMI2) && \ + defined(HAVE_CPU_ARCH_X86) +# define USE_32BIT_BMI2 1 +#endif + + +/* USE_64BIT_ARM_NEON indicates whether to enable 64-bit ARM/NEON assembly + * code. */ +#undef USE_64BIT_ARM_NEON +#ifdef ENABLE_NEON_SUPPORT +# if defined(HAVE_ARM_ARCH_V6) && defined(__ARMEL__) \ + && defined(HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS) \ + && defined(HAVE_GCC_INLINE_ASM_NEON) +# define USE_64BIT_ARM_NEON 1 +# endif +#endif /*ENABLE_NEON_SUPPORT*/ + + +/* USE_S390X_CRYPTO indicates whether to enable zSeries code. */ +#undef USE_S390X_CRYPTO +#if defined(HAVE_GCC_INLINE_ASM_S390X) +# define USE_S390X_CRYPTO 1 +#endif /* USE_S390X_CRYPTO */ + + +#if defined(USE_64BIT) || defined(USE_64BIT_ARM_NEON) +# define NEED_COMMON64 1 +#endif + +#ifdef USE_32BIT +# define NEED_COMMON32BI 1 +#endif + + +#define SHA3_DELIMITED_SUFFIX 0x06 +#define SHAKE_DELIMITED_SUFFIX 0x1F + + +typedef struct +{ + union { +#ifdef NEED_COMMON64 + u64 state64[25]; +#endif +#ifdef NEED_COMMON32BI + u32 state32bi[50]; +#endif + } u; +} KECCAK_STATE; + + +typedef struct +{ + unsigned int (*permute)(KECCAK_STATE *hd); + unsigned int (*absorb)(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes); + unsigned int (*extract) (KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen); +} keccak_ops_t; + + +typedef struct KECCAK_CONTEXT_S +{ + KECCAK_STATE state; + unsigned int outlen; + unsigned int blocksize; + unsigned int count; + unsigned int suffix; + const keccak_ops_t *ops; +#ifdef USE_S390X_CRYPTO + unsigned int kimd_func; + unsigned int buf_pos; + byte buf[1344 / 8]; /* SHAKE128 requires biggest buffer, 1344 bits. */ +#endif +} KECCAK_CONTEXT; + + + +#ifdef NEED_COMMON64 + +const u64 _gcry_keccak_round_consts_64bit[24 + 1] = +{ + U64_C(0x0000000000000001), U64_C(0x0000000000008082), + U64_C(0x800000000000808A), U64_C(0x8000000080008000), + U64_C(0x000000000000808B), U64_C(0x0000000080000001), + U64_C(0x8000000080008081), U64_C(0x8000000000008009), + U64_C(0x000000000000008A), U64_C(0x0000000000000088), + U64_C(0x0000000080008009), U64_C(0x000000008000000A), + U64_C(0x000000008000808B), U64_C(0x800000000000008B), + U64_C(0x8000000000008089), U64_C(0x8000000000008003), + U64_C(0x8000000000008002), U64_C(0x8000000000000080), + U64_C(0x000000000000800A), U64_C(0x800000008000000A), + U64_C(0x8000000080008081), U64_C(0x8000000000008080), + U64_C(0x0000000080000001), U64_C(0x8000000080008008), + U64_C(0xFFFFFFFFFFFFFFFF) +}; + +static unsigned int +keccak_extract64(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) +{ + unsigned int i; + + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) + { + u64 tmp = hd->u.state64[i]; + buf_put_le64(outbuf, tmp); + outbuf += 8; + } + + return 0; +} + +#endif /* NEED_COMMON64 */ + + +#ifdef NEED_COMMON32BI + +static const u32 round_consts_32bit[2 * 24] = +{ + 0x00000001UL, 0x00000000UL, 0x00000000UL, 0x00000089UL, + 0x00000000UL, 0x8000008bUL, 0x00000000UL, 0x80008080UL, + 0x00000001UL, 0x0000008bUL, 0x00000001UL, 0x00008000UL, + 0x00000001UL, 0x80008088UL, 0x00000001UL, 0x80000082UL, + 0x00000000UL, 0x0000000bUL, 0x00000000UL, 0x0000000aUL, + 0x00000001UL, 0x00008082UL, 0x00000000UL, 0x00008003UL, + 0x00000001UL, 0x0000808bUL, 0x00000001UL, 0x8000000bUL, + 0x00000001UL, 0x8000008aUL, 0x00000001UL, 0x80000081UL, + 0x00000000UL, 0x80000081UL, 0x00000000UL, 0x80000008UL, + 0x00000000UL, 0x00000083UL, 0x00000000UL, 0x80008003UL, + 0x00000001UL, 0x80008088UL, 0x00000000UL, 0x80000088UL, + 0x00000001UL, 0x00008000UL, 0x00000000UL, 0x80008082UL +}; + +static unsigned int +keccak_extract32bi(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) +{ + unsigned int i; + u32 x0; + u32 x1; + u32 t; + + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) + { + x0 = hd->u.state32bi[i * 2 + 0]; + x1 = hd->u.state32bi[i * 2 + 1]; + + t = (x0 & 0x0000FFFFUL) + (x1 << 16); + x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); + x0 = t; + t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); + t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); + t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); + t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); + t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); + t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); + t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); + t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); + + buf_put_le32(&outbuf[0], x0); + buf_put_le32(&outbuf[4], x1); + outbuf += 8; + } + + return 0; +} + +static inline void +keccak_absorb_lane32bi(u32 *lane, u32 x0, u32 x1) +{ + u32 t; + + t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); + t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); + t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); + t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); + t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); + t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); + t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); + t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); + lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); + lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); +} + +#endif /* NEED_COMMON32BI */ + + +/* Construct generic 64-bit implementation. */ +#ifdef USE_64BIT + +#if __GNUC__ >= 4 && defined(__x86_64__) + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "movdqu 2*16(%[dst]), %%xmm2\n\t" + "movdqu 3*16(%[dst]), %%xmm3\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu 2*16(%[in]), %%xmm4\n\t" + "movdqu 3*16(%[in]), %%xmm5\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "pxor %%xmm4, %%xmm2\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + "pxor %%xmm5, %%xmm3\n\t" + "movdqu %%xmm2, 2*16(%[dst])\n\t" + "movdqu %%xmm3, 3*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "movdqu 1*16(%[dst]), %%xmm1\n\t" + "movdqu 1*16(%[in]), %%xmm5\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "pxor %%xmm5, %%xmm1\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + "movdqu %%xmm1, 1*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm1", "xmm4", "xmm5", "memory"); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + asm ("movdqu 0*16(%[dst]), %%xmm0\n\t" + "movdqu 0*16(%[in]), %%xmm4\n\t" + "pxor %%xmm4, %%xmm0\n\t" + "movdqu %%xmm0, 0*16(%[dst])\n\t" + : + : [dst] "r" (dst), [in] "r" (in) + : "xmm0", "xmm4", "memory"); +} + +#else /* __x86_64__ */ + +static inline void absorb_lanes64_8(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); + dst[4] ^= buf_get_le64(in + 8 * 4); + dst[5] ^= buf_get_le64(in + 8 * 5); + dst[6] ^= buf_get_le64(in + 8 * 6); + dst[7] ^= buf_get_le64(in + 8 * 7); +} + +static inline void absorb_lanes64_4(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); + dst[2] ^= buf_get_le64(in + 8 * 2); + dst[3] ^= buf_get_le64(in + 8 * 3); +} + +static inline void absorb_lanes64_2(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); + dst[1] ^= buf_get_le64(in + 8 * 1); +} + +#endif /* !__x86_64__ */ + +static inline void absorb_lanes64_1(u64 *dst, const byte *in) +{ + dst[0] ^= buf_get_le64(in + 8 * 0); +} + + +# define ANDN64(x, y) (~(x) & (y)) +# define ROL64(x, n) (((x) << ((unsigned int)n & 63)) | \ + ((x) >> ((64 - (unsigned int)(n)) & 63))) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64 +# include "keccak_permute_64.h" + +# undef ANDN64 +# undef ROL64 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME +# undef KECCAK_F1600_ABSORB_FUNC_NAME + +static const keccak_ops_t keccak_generic64_ops = +{ + .permute = keccak_f1600_state_permute64, + .absorb = keccak_absorb_lanes64, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT */ + + +/* Construct 64-bit Intel SHLD implementation. */ +#ifdef USE_64BIT_SHLD + +# define ANDN64(x, y) (~(x) & (y)) +# define ROL64(x, n) ({ \ + u64 tmp = (x); \ + asm ("shldq %1, %0, %0" \ + : "+r" (tmp) \ + : "J" ((n) & 63) \ + : "cc"); \ + tmp; }) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_shld +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_shld +# include "keccak_permute_64.h" + +# undef ANDN64 +# undef ROL64 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME +# undef KECCAK_F1600_ABSORB_FUNC_NAME + +static const keccak_ops_t keccak_shld_64_ops = +{ + .permute = keccak_f1600_state_permute64_shld, + .absorb = keccak_absorb_lanes64_shld, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_SHLD */ + + +/* Construct 64-bit Intel BMI2 implementation. */ +#ifdef USE_64BIT_BMI2 + +# define ANDN64(x, y) ({ \ + u64 tmp; \ + asm ("andnq %2, %1, %0" \ + : "=r" (tmp) \ + : "r0" (x), "rm" (y)); \ + tmp; }) + +# define ROL64(x, n) ({ \ + u64 tmp; \ + asm ("rorxq %2, %1, %0" \ + : "=r" (tmp) \ + : "rm0" (x), "J" (64 - ((n) & 63))); \ + tmp; }) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute64_bmi2 +# define KECCAK_F1600_ABSORB_FUNC_NAME keccak_absorb_lanes64_bmi2 +# include "keccak_permute_64.h" + +# undef ANDN64 +# undef ROL64 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME +# undef KECCAK_F1600_ABSORB_FUNC_NAME + +static const keccak_ops_t keccak_bmi2_64_ops = +{ + .permute = keccak_f1600_state_permute64_bmi2, + .absorb = keccak_absorb_lanes64_bmi2, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_BMI2 */ + + +/* 64-bit ARMv7/NEON implementation. */ +#ifdef USE_64BIT_ARM_NEON + +unsigned int _gcry_keccak_permute_armv7_neon(u64 *state); +unsigned int _gcry_keccak_absorb_lanes64_armv7_neon(u64 *state, int pos, + const byte *lanes, + unsigned int nlanes, + int blocklanes); + +static unsigned int keccak_permute64_armv7_neon(KECCAK_STATE *hd) +{ + return _gcry_keccak_permute_armv7_neon(hd->u.state64); +} + +static unsigned int +keccak_absorb_lanes64_armv7_neon(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + if (blocklanes < 0) + { + /* blocklanes == -1, permutationless absorb from keccak_final. */ + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + } + + return 0; + } + else + { + return _gcry_keccak_absorb_lanes64_armv7_neon(hd->u.state64, pos, lanes, + nlanes, blocklanes); + } +} + +static const keccak_ops_t keccak_armv7_neon_64_ops = +{ + .permute = keccak_permute64_armv7_neon, + .absorb = keccak_absorb_lanes64_armv7_neon, + .extract = keccak_extract64, +}; + +#endif /* USE_64BIT_ARM_NEON */ + + +/* Construct generic 32-bit implementation. */ +#ifdef USE_32BIT + +# define ANDN32(x, y) (~(x) & (y)) +# define ROL32(x, n) (((x) << ((unsigned int)n & 31)) | \ + ((x) >> ((32 - (unsigned int)(n)) & 31))) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi +# include "keccak_permute_32.h" + +# undef ANDN32 +# undef ROL32 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME + +static unsigned int +keccak_absorb_lanes32bi(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + keccak_absorb_lane32bi(&hd->u.state32bi[pos * 2], + buf_get_le32(lanes + 0), + buf_get_le32(lanes + 4)); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = keccak_f1600_state_permute32bi(hd); + pos = 0; + } + } + + return burn; +} + +static const keccak_ops_t keccak_generic32bi_ops = +{ + .permute = keccak_f1600_state_permute32bi, + .absorb = keccak_absorb_lanes32bi, + .extract = keccak_extract32bi, +}; + +#endif /* USE_32BIT */ + + +/* Construct 32-bit Intel BMI2 implementation. */ +#ifdef USE_32BIT_BMI2 + +# define ANDN32(x, y) ({ \ + u32 tmp; \ + asm ("andnl %2, %1, %0" \ + : "=r" (tmp) \ + : "r0" (x), "rm" (y)); \ + tmp; }) + +# define ROL32(x, n) ({ \ + u32 tmp; \ + asm ("rorxl %2, %1, %0" \ + : "=r" (tmp) \ + : "rm0" (x), "J" (32 - ((n) & 31))); \ + tmp; }) + +# define KECCAK_F1600_PERMUTE_FUNC_NAME keccak_f1600_state_permute32bi_bmi2 +# include "keccak_permute_32.h" + +# undef ANDN32 +# undef ROL32 +# undef KECCAK_F1600_PERMUTE_FUNC_NAME + +static inline u32 pext(u32 x, u32 mask) +{ + u32 tmp; + asm ("pextl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); + return tmp; +} + +static inline u32 pdep(u32 x, u32 mask) +{ + u32 tmp; + asm ("pdepl %2, %1, %0" : "=r" (tmp) : "r0" (x), "rm" (mask)); + return tmp; +} + +static inline void +keccak_absorb_lane32bi_bmi2(u32 *lane, u32 x0, u32 x1) +{ + x0 = pdep(pext(x0, 0x55555555), 0x0000ffff) | (pext(x0, 0xaaaaaaaa) << 16); + x1 = pdep(pext(x1, 0x55555555), 0x0000ffff) | (pext(x1, 0xaaaaaaaa) << 16); + + lane[0] ^= (x0 & 0x0000FFFFUL) + (x1 << 16); + lane[1] ^= (x0 >> 16) + (x1 & 0xFFFF0000UL); +} + +static unsigned int +keccak_absorb_lanes32bi_bmi2(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + keccak_absorb_lane32bi_bmi2(&hd->u.state32bi[pos * 2], + buf_get_le32(lanes + 0), + buf_get_le32(lanes + 4)); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = keccak_f1600_state_permute32bi_bmi2(hd); + pos = 0; + } + } + + return burn; +} + +static unsigned int +keccak_extract32bi_bmi2(KECCAK_STATE *hd, unsigned int pos, byte *outbuf, + unsigned int outlen) +{ + unsigned int i; + u32 x0; + u32 x1; + u32 t; + + /* NOTE: when pos == 0, hd and outbuf may point to same memory (SHA-3). */ + + for (i = pos; i < pos + outlen / 8 + !!(outlen % 8); i++) + { + x0 = hd->u.state32bi[i * 2 + 0]; + x1 = hd->u.state32bi[i * 2 + 1]; + + t = (x0 & 0x0000FFFFUL) + (x1 << 16); + x1 = (x0 >> 16) + (x1 & 0xFFFF0000UL); + x0 = t; + + x0 = pdep(pext(x0, 0xffff0001), 0xaaaaaaab) | pdep(x0 >> 1, 0x55555554); + x1 = pdep(pext(x1, 0xffff0001), 0xaaaaaaab) | pdep(x1 >> 1, 0x55555554); + + buf_put_le32(&outbuf[0], x0); + buf_put_le32(&outbuf[4], x1); + outbuf += 8; + } + + return 0; +} + +static const keccak_ops_t keccak_bmi2_32bi_ops = +{ + .permute = keccak_f1600_state_permute32bi_bmi2, + .absorb = keccak_absorb_lanes32bi_bmi2, + .extract = keccak_extract32bi_bmi2, +}; + +#endif /* USE_32BIT_BMI2 */ + + +#ifdef USE_S390X_CRYPTO +#include "asm-inline-s390x.h" + +static inline void +keccak_bwrite_s390x (void *context, const byte *in, size_t inlen) +{ + KECCAK_CONTEXT *ctx = context; + + /* Write full-blocks. */ + kimd_execute (ctx->kimd_func, &ctx->state, in, inlen); + return; +} + +static inline void +keccak_final_s390x (void *context) +{ + KECCAK_CONTEXT *ctx = context; + + if (ctx->suffix == SHA3_DELIMITED_SUFFIX) + { + klmd_execute (ctx->kimd_func, &ctx->state, ctx->buf, ctx->count); + } + else + { + klmd_shake_execute (ctx->kimd_func, &ctx->state, NULL, 0, ctx->buf, + ctx->count); + ctx->count = 0; + ctx->buf_pos = 0; + } + + return; +} + +static inline void +keccak_bextract_s390x (void *context, byte *out, size_t outlen) +{ + KECCAK_CONTEXT *ctx = context; + + /* Extract full-blocks. */ + klmd_shake_execute (ctx->kimd_func | KLMD_PADDING_STATE, &ctx->state, + out, outlen, NULL, 0); + return; +} + +static void +keccak_write_s390x (void *context, const byte *inbuf, size_t inlen) +{ + KECCAK_CONTEXT *hd = context; + const size_t blocksize = hd->blocksize; + size_t inblocks; + size_t copylen; + + while (hd->count) + { + if (hd->count == blocksize) /* Flush the buffer. */ + { + keccak_bwrite_s390x (hd, hd->buf, blocksize); + hd->count = 0; + } + else + { + copylen = inlen; + if (copylen > blocksize - hd->count) + copylen = blocksize - hd->count; + + if (copylen == 0) + break; + + buf_cpy (&hd->buf[hd->count], inbuf, copylen); + hd->count += copylen; + inbuf += copylen; + inlen -= copylen; + } + } + + if (inlen == 0) + return; + + if (inlen >= blocksize) + { + inblocks = inlen / blocksize; + keccak_bwrite_s390x (hd, inbuf, inblocks * blocksize); + hd->count = 0; + inlen -= inblocks * blocksize; + inbuf += inblocks * blocksize; + } + + if (inlen) + { + buf_cpy (hd->buf, inbuf, inlen); + hd->count = inlen; + } +} + +static void +keccak_extract_s390x (void *context, void *outbuf_arg, size_t outlen) +{ + KECCAK_CONTEXT *hd = context; + const size_t blocksize = hd->blocksize; + byte *outbuf = outbuf_arg; + + while (outlen) + { + gcry_assert(hd->count == 0 || hd->buf_pos < hd->count); + + if (hd->buf_pos < hd->count && outlen) + { + size_t copylen = hd->count - hd->buf_pos; + + if (copylen > outlen) + copylen = outlen; + + buf_cpy (outbuf, &hd->buf[hd->buf_pos], copylen); + + outbuf += copylen; + outlen -= copylen; + hd->buf_pos += copylen; + } + + if (hd->buf_pos == hd->count) + { + hd->buf_pos = 0; + hd->count = 0; + } + + if (outlen == 0) + return; + + if (outlen >= blocksize) + { + size_t outblocks = outlen / blocksize; + + keccak_bextract_s390x (context, outbuf, outblocks * blocksize); + + outlen -= outblocks * blocksize; + outbuf += outblocks * blocksize; + + if (outlen == 0) + return; + } + + keccak_bextract_s390x (context, hd->buf, blocksize); + hd->count = blocksize; + } +} +#endif /* USE_S390X_CRYPTO */ + + +static void +keccak_write (void *context, const void *inbuf_arg, size_t inlen) +{ + KECCAK_CONTEXT *ctx = context; + const size_t bsize = ctx->blocksize; + const size_t blocklanes = bsize / 8; + const byte *inbuf = inbuf_arg; + unsigned int nburn, burn = 0; + unsigned int count, i; + unsigned int pos, nlanes; + +#ifdef USE_S390X_CRYPTO + if (ctx->kimd_func) + { + keccak_write_s390x (context, inbuf, inlen); + return; + } +#endif + + count = ctx->count; + + if (inlen && (count % 8)) + { + byte lane[8] = { 0, }; + + /* Complete absorbing partial input lane. */ + + pos = count / 8; + + for (i = count % 8; inlen && i < 8; i++) + { + lane[i] = *inbuf++; + inlen--; + count++; + } + + if (count == bsize) + count = 0; + + nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, + (count % 8) ? -1 : blocklanes); + burn = nburn > burn ? nburn : burn; + } + + /* Absorb full input lanes. */ + + pos = count / 8; + nlanes = inlen / 8; + if (nlanes > 0) + { + nburn = ctx->ops->absorb(&ctx->state, pos, inbuf, nlanes, blocklanes); + burn = nburn > burn ? nburn : burn; + inlen -= nlanes * 8; + inbuf += nlanes * 8; + count += nlanes * 8; + count = count % bsize; + } + + if (inlen) + { + byte lane[8] = { 0, }; + + /* Absorb remaining partial input lane. */ + + pos = count / 8; + + for (i = count % 8; inlen && i < 8; i++) + { + lane[i] = *inbuf++; + inlen--; + count++; + } + + nburn = ctx->ops->absorb(&ctx->state, pos, lane, 1, -1); + burn = nburn > burn ? nburn : burn; + + gcry_assert(count < bsize); + } + + ctx->count = count; + + if (burn) + _gcry_burn_stack (burn); +} + + +static void +keccak_init (int algo, void *context, unsigned int flags) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + unsigned int features = _gcry_get_hw_features (); + + (void)flags; + (void)features; + + memset (hd, 0, sizeof *hd); + + ctx->count = 0; + + /* Select generic implementation. */ +#ifdef USE_64BIT + ctx->ops = &keccak_generic64_ops; +#elif defined USE_32BIT + ctx->ops = &keccak_generic32bi_ops; +#endif + + /* Select optimized implementation based in hw features. */ + if (0) {} +#ifdef USE_64BIT_ARM_NEON + else if (features & HWF_ARM_NEON) + ctx->ops = &keccak_armv7_neon_64_ops; +#endif +#ifdef USE_64BIT_BMI2 + else if (features & HWF_INTEL_BMI2) + ctx->ops = &keccak_bmi2_64_ops; +#endif +#ifdef USE_32BIT_BMI2 + else if (features & HWF_INTEL_BMI2) + ctx->ops = &keccak_bmi2_32bi_ops; +#endif +#ifdef USE_64BIT_SHLD + else if (features & HWF_INTEL_FAST_SHLD) + ctx->ops = &keccak_shld_64_ops; +#endif + + /* Set input block size, in Keccak terms this is called 'rate'. */ + + switch (algo) + { + case GCRY_MD_SHA3_224: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 1152 / 8; + ctx->outlen = 224 / 8; + break; + case GCRY_MD_SHA3_256: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 1088 / 8; + ctx->outlen = 256 / 8; + break; + case GCRY_MD_SHA3_384: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 832 / 8; + ctx->outlen = 384 / 8; + break; + case GCRY_MD_SHA3_512: + ctx->suffix = SHA3_DELIMITED_SUFFIX; + ctx->blocksize = 576 / 8; + ctx->outlen = 512 / 8; + break; + case GCRY_MD_SHAKE128: + ctx->suffix = SHAKE_DELIMITED_SUFFIX; + ctx->blocksize = 1344 / 8; + ctx->outlen = 0; + break; + case GCRY_MD_SHAKE256: + ctx->suffix = SHAKE_DELIMITED_SUFFIX; + ctx->blocksize = 1088 / 8; + ctx->outlen = 0; + break; + default: + BUG(); + } + +#ifdef USE_S390X_CRYPTO + ctx->kimd_func = 0; + if ((features & HWF_S390X_MSA) != 0) + { + unsigned int kimd_func = 0; + + switch (algo) + { + case GCRY_MD_SHA3_224: + kimd_func = KMID_FUNCTION_SHA3_224; + break; + case GCRY_MD_SHA3_256: + kimd_func = KMID_FUNCTION_SHA3_256; + break; + case GCRY_MD_SHA3_384: + kimd_func = KMID_FUNCTION_SHA3_384; + break; + case GCRY_MD_SHA3_512: + kimd_func = KMID_FUNCTION_SHA3_512; + break; + case GCRY_MD_SHAKE128: + kimd_func = KMID_FUNCTION_SHAKE128; + break; + case GCRY_MD_SHAKE256: + kimd_func = KMID_FUNCTION_SHAKE256; + break; + } + + if ((kimd_query () & km_function_to_mask (kimd_func)) && + (klmd_query () & km_function_to_mask (kimd_func))) + { + ctx->kimd_func = kimd_func; + } + } +#endif +} + +static void +sha3_224_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_224, context, flags); +} + +static void +sha3_256_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_256, context, flags); +} + +static void +sha3_384_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_384, context, flags); +} + +static void +sha3_512_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHA3_512, context, flags); +} + +static void +shake128_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHAKE128, context, flags); +} + +static void +shake256_init (void *context, unsigned int flags) +{ + keccak_init (GCRY_MD_SHAKE256, context, flags); +} + +/* The routine final terminates the computation and + * returns the digest. + * The handle is prepared for a new cycle, but adding bytes to the + * handle will the destroy the returned buffer. + * Returns: 64 bytes representing the digest. When used for sha384, + * we take the leftmost 48 of those bytes. + */ +static void +keccak_final (void *context) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + const size_t bsize = ctx->blocksize; + const byte suffix = ctx->suffix; + unsigned int nburn, burn = 0; + unsigned int lastbytes; + byte lane[8]; + +#ifdef USE_S390X_CRYPTO + if (ctx->kimd_func) + { + keccak_final_s390x (context); + return; + } +#endif + + lastbytes = ctx->count; + + /* Do the padding and switch to the squeezing phase */ + + /* Absorb the last few bits and add the first bit of padding (which + coincides with the delimiter in delimited suffix) */ + buf_put_le64(lane, (u64)suffix << ((lastbytes % 8) * 8)); + nburn = ctx->ops->absorb(&ctx->state, lastbytes / 8, lane, 1, -1); + burn = nburn > burn ? nburn : burn; + + /* Add the second bit of padding. */ + buf_put_le64(lane, (u64)0x80 << (((bsize - 1) % 8) * 8)); + nburn = ctx->ops->absorb(&ctx->state, (bsize - 1) / 8, lane, 1, -1); + burn = nburn > burn ? nburn : burn; + + if (suffix == SHA3_DELIMITED_SUFFIX) + { + /* Switch to the squeezing phase. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + + /* Squeeze out the SHA3 digest. */ + nburn = ctx->ops->extract(hd, 0, (void *)hd, ctx->outlen); + burn = nburn > burn ? nburn : burn; + } + else + { + /* Output for SHAKE can now be read with md_extract(). */ + + ctx->count = 0; + } + + wipememory(lane, sizeof(lane)); + if (burn) + _gcry_burn_stack (burn); +} + + +static byte * +keccak_read (void *context) +{ + KECCAK_CONTEXT *ctx = (KECCAK_CONTEXT *) context; + KECCAK_STATE *hd = &ctx->state; + return (byte *)&hd->u; +} + + +static void +keccak_extract (void *context, void *out, size_t outlen) +{ + KECCAK_CONTEXT *ctx = context; + KECCAK_STATE *hd = &ctx->state; + const size_t bsize = ctx->blocksize; + unsigned int nburn, burn = 0; + byte *outbuf = out; + unsigned int nlanes; + unsigned int nleft; + unsigned int count; + unsigned int i; + byte lane[8]; + +#ifdef USE_S390X_CRYPTO + if (ctx->kimd_func) + { + keccak_extract_s390x (context, out, outlen); + return; + } +#endif + + count = ctx->count; + + while (count && outlen && (outlen < 8 || count % 8)) + { + /* Extract partial lane. */ + nburn = ctx->ops->extract(hd, count / 8, lane, 8); + burn = nburn > burn ? nburn : burn; + + for (i = count % 8; outlen && i < 8; i++) + { + *outbuf++ = lane[i]; + outlen--; + count++; + } + + gcry_assert(count <= bsize); + + if (count == bsize) + count = 0; + } + + if (outlen >= 8 && count) + { + /* Extract tail of partial block. */ + nlanes = outlen / 8; + nleft = (bsize - count) / 8; + nlanes = nlanes < nleft ? nlanes : nleft; + + nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); + burn = nburn > burn ? nburn : burn; + outlen -= nlanes * 8; + outbuf += nlanes * 8; + count += nlanes * 8; + + gcry_assert(count <= bsize); + + if (count == bsize) + count = 0; + } + + while (outlen >= bsize) + { + gcry_assert(count == 0); + + /* Squeeze more. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + + /* Extract full block. */ + nburn = ctx->ops->extract(hd, 0, outbuf, bsize); + burn = nburn > burn ? nburn : burn; + + outlen -= bsize; + outbuf += bsize; + } + + if (outlen) + { + gcry_assert(outlen < bsize); + + if (count == 0) + { + /* Squeeze more. */ + nburn = ctx->ops->permute(hd); + burn = nburn > burn ? nburn : burn; + } + + if (outlen >= 8) + { + /* Extract head of partial block. */ + nlanes = outlen / 8; + nburn = ctx->ops->extract(hd, count / 8, outbuf, nlanes * 8); + burn = nburn > burn ? nburn : burn; + outlen -= nlanes * 8; + outbuf += nlanes * 8; + count += nlanes * 8; + + gcry_assert(count < bsize); + } + + if (outlen) + { + /* Extract head of partial lane. */ + nburn = ctx->ops->extract(hd, count / 8, lane, 8); + burn = nburn > burn ? nburn : burn; + + for (i = count % 8; outlen && i < 8; i++) + { + *outbuf++ = lane[i]; + outlen--; + count++; + } + + gcry_assert(count < bsize); + } + } + + ctx->count = count; + + if (burn) + _gcry_burn_stack (burn); +} + + +/* Shortcut functions which puts the hash value of the supplied buffer + * into outbuf which must have a size of 'spec->mdlen' bytes. */ +static void +_gcry_sha3_hash_buffer (void *outbuf, const void *buffer, size_t length, + const gcry_md_spec_t *spec) +{ + KECCAK_CONTEXT hd; + + spec->init (&hd, 0); + keccak_write (&hd, buffer, length); + keccak_final (&hd); + memcpy (outbuf, keccak_read (&hd), spec->mdlen); +} + + +/* Variant of the above shortcut function using multiple buffers. */ +static void +_gcry_sha3_hash_buffers (void *outbuf, const gcry_buffer_t *iov, int iovcnt, + const gcry_md_spec_t *spec) +{ + KECCAK_CONTEXT hd; + + spec->init (&hd, 0); + for (;iovcnt > 0; iov++, iovcnt--) + keccak_write (&hd, (const char*)iov[0].data + iov[0].off, iov[0].len); + keccak_final (&hd); + memcpy (outbuf, keccak_read (&hd), spec->mdlen); +} + + +static void +_gcry_sha3_224_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_224); +} + +static void +_gcry_sha3_256_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_256); +} + +static void +_gcry_sha3_384_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_384); +} + +static void +_gcry_sha3_512_hash_buffer (void *outbuf, const void *buffer, size_t length) +{ + _gcry_sha3_hash_buffer (outbuf, buffer, length, &_gcry_digest_spec_sha3_512); +} + +static void +_gcry_sha3_224_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_224); +} + +static void +_gcry_sha3_256_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_256); +} + +static void +_gcry_sha3_384_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_384); +} + +static void +_gcry_sha3_512_hash_buffers (void *outbuf, const gcry_buffer_t *iov, + int iovcnt) +{ + _gcry_sha3_hash_buffers (outbuf, iov, iovcnt, &_gcry_digest_spec_sha3_512); +} + + +/* + Self-test section. + */ + + +static gpg_err_code_t +selftests_keccak (int algo, int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + const char *short_hash; + const char *long_hash; + const char *one_million_a_hash; + int hash_len; + + switch (algo) + { + default: + BUG(); + + case GCRY_MD_SHA3_224: + short_hash = + "\xe6\x42\x82\x4c\x3f\x8c\xf2\x4a\xd0\x92\x34\xee\x7d\x3c\x76\x6f" + "\xc9\xa3\xa5\x16\x8d\x0c\x94\xad\x73\xb4\x6f\xdf"; + long_hash = + "\x54\x3e\x68\x68\xe1\x66\x6c\x1a\x64\x36\x30\xdf\x77\x36\x7a\xe5" + "\xa6\x2a\x85\x07\x0a\x51\xc1\x4c\xbf\x66\x5c\xbc"; + one_million_a_hash = + "\xd6\x93\x35\xb9\x33\x25\x19\x2e\x51\x6a\x91\x2e\x6d\x19\xa1\x5c" + "\xb5\x1c\x6e\xd5\xc1\x52\x43\xe7\xa7\xfd\x65\x3c"; + hash_len = 28; + break; + + case GCRY_MD_SHA3_256: + short_hash = + "\x3a\x98\x5d\xa7\x4f\xe2\x25\xb2\x04\x5c\x17\x2d\x6b\xd3\x90\xbd" + "\x85\x5f\x08\x6e\x3e\x9d\x52\x5b\x46\xbf\xe2\x45\x11\x43\x15\x32"; + long_hash = + "\x91\x6f\x60\x61\xfe\x87\x97\x41\xca\x64\x69\xb4\x39\x71\xdf\xdb" + "\x28\xb1\xa3\x2d\xc3\x6c\xb3\x25\x4e\x81\x2b\xe2\x7a\xad\x1d\x18"; + one_million_a_hash = + "\x5c\x88\x75\xae\x47\x4a\x36\x34\xba\x4f\xd5\x5e\xc8\x5b\xff\xd6" + "\x61\xf3\x2a\xca\x75\xc6\xd6\x99\xd0\xcd\xcb\x6c\x11\x58\x91\xc1"; + hash_len = 32; + break; + + case GCRY_MD_SHA3_384: + short_hash = + "\xec\x01\x49\x82\x88\x51\x6f\xc9\x26\x45\x9f\x58\xe2\xc6\xad\x8d" + "\xf9\xb4\x73\xcb\x0f\xc0\x8c\x25\x96\xda\x7c\xf0\xe4\x9b\xe4\xb2" + "\x98\xd8\x8c\xea\x92\x7a\xc7\xf5\x39\xf1\xed\xf2\x28\x37\x6d\x25"; + long_hash = + "\x79\x40\x7d\x3b\x59\x16\xb5\x9c\x3e\x30\xb0\x98\x22\x97\x47\x91" + "\xc3\x13\xfb\x9e\xcc\x84\x9e\x40\x6f\x23\x59\x2d\x04\xf6\x25\xdc" + "\x8c\x70\x9b\x98\xb4\x3b\x38\x52\xb3\x37\x21\x61\x79\xaa\x7f\xc7"; + one_million_a_hash = + "\xee\xe9\xe2\x4d\x78\xc1\x85\x53\x37\x98\x34\x51\xdf\x97\xc8\xad" + "\x9e\xed\xf2\x56\xc6\x33\x4f\x8e\x94\x8d\x25\x2d\x5e\x0e\x76\x84" + "\x7a\xa0\x77\x4d\xdb\x90\xa8\x42\x19\x0d\x2c\x55\x8b\x4b\x83\x40"; + hash_len = 48; + break; + + case GCRY_MD_SHA3_512: + short_hash = + "\xb7\x51\x85\x0b\x1a\x57\x16\x8a\x56\x93\xcd\x92\x4b\x6b\x09\x6e" + "\x08\xf6\x21\x82\x74\x44\xf7\x0d\x88\x4f\x5d\x02\x40\xd2\x71\x2e" + "\x10\xe1\x16\xe9\x19\x2a\xf3\xc9\x1a\x7e\xc5\x76\x47\xe3\x93\x40" + "\x57\x34\x0b\x4c\xf4\x08\xd5\xa5\x65\x92\xf8\x27\x4e\xec\x53\xf0"; + long_hash = + "\xaf\xeb\xb2\xef\x54\x2e\x65\x79\xc5\x0c\xad\x06\xd2\xe5\x78\xf9" + "\xf8\xdd\x68\x81\xd7\xdc\x82\x4d\x26\x36\x0f\xee\xbf\x18\xa4\xfa" + "\x73\xe3\x26\x11\x22\x94\x8e\xfc\xfd\x49\x2e\x74\xe8\x2e\x21\x89" + "\xed\x0f\xb4\x40\xd1\x87\xf3\x82\x27\x0c\xb4\x55\xf2\x1d\xd1\x85"; + one_million_a_hash = + "\x3c\x3a\x87\x6d\xa1\x40\x34\xab\x60\x62\x7c\x07\x7b\xb9\x8f\x7e" + "\x12\x0a\x2a\x53\x70\x21\x2d\xff\xb3\x38\x5a\x18\xd4\xf3\x88\x59" + "\xed\x31\x1d\x0a\x9d\x51\x41\xce\x9c\xc5\xc6\x6e\xe6\x89\xb2\x66" + "\xa8\xaa\x18\xac\xe8\x28\x2a\x0e\x0d\xb5\x96\xc9\x0b\x0a\x7b\x87"; + hash_len = 64; + break; + + case GCRY_MD_SHAKE128: + short_hash = + "\x58\x81\x09\x2d\xd8\x18\xbf\x5c\xf8\xa3\xdd\xb7\x93\xfb\xcb\xa7" + "\x40\x97\xd5\xc5\x26\xa6\xd3\x5f\x97\xb8\x33\x51\x94\x0f\x2c\xc8"; + long_hash = + "\x7b\x6d\xf6\xff\x18\x11\x73\xb6\xd7\x89\x8d\x7f\xf6\x3f\xb0\x7b" + "\x7c\x23\x7d\xaf\x47\x1a\x5a\xe5\x60\x2a\xdb\xcc\xef\x9c\xcf\x4b"; + one_million_a_hash = + "\x9d\x22\x2c\x79\xc4\xff\x9d\x09\x2c\xf6\xca\x86\x14\x3a\xa4\x11" + "\xe3\x69\x97\x38\x08\xef\x97\x09\x32\x55\x82\x6c\x55\x72\xef\x58"; + hash_len = 32; + break; + + case GCRY_MD_SHAKE256: + short_hash = + "\x48\x33\x66\x60\x13\x60\xa8\x77\x1c\x68\x63\x08\x0c\xc4\x11\x4d" + "\x8d\xb4\x45\x30\xf8\xf1\xe1\xee\x4f\x94\xea\x37\xe7\x8b\x57\x39"; + long_hash = + "\x98\xbe\x04\x51\x6c\x04\xcc\x73\x59\x3f\xef\x3e\xd0\x35\x2e\xa9" + "\xf6\x44\x39\x42\xd6\x95\x0e\x29\xa3\x72\xa6\x81\xc3\xde\xaf\x45"; + one_million_a_hash = + "\x35\x78\xa7\xa4\xca\x91\x37\x56\x9c\xdf\x76\xed\x61\x7d\x31\xbb" + "\x99\x4f\xca\x9c\x1b\xbf\x8b\x18\x40\x13\xde\x82\x34\xdf\xd1\x3a"; + hash_len = 32; + break; + } + + what = "short string"; + errtxt = _gcry_hash_selftest_check_one (algo, 0, "abc", 3, short_hash, + hash_len); + if (errtxt) + goto failed; + + if (extended) + { + what = "long string"; + errtxt = _gcry_hash_selftest_check_one + (algo, 0, + "abcdefghbcdefghicdefghijdefghijkefghijklfghijklmghijklmn" + "hijklmnoijklmnopjklmnopqklmnopqrlmnopqrsmnopqrstnopqrstu", 112, + long_hash, hash_len); + if (errtxt) + goto failed; + + what = "one million \"a\""; + errtxt = _gcry_hash_selftest_check_one (algo, 1, NULL, 0, + one_million_a_hash, hash_len); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + +failed: + if (report) + report ("digest", algo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +/* Run a full self-test for ALGO and return 0 on success. */ +static gpg_err_code_t +run_selftests (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MD_SHA3_224: + case GCRY_MD_SHA3_256: + case GCRY_MD_SHA3_384: + case GCRY_MD_SHA3_512: + case GCRY_MD_SHAKE128: + case GCRY_MD_SHAKE256: + ec = selftests_keccak (algo, extended, report); + break; + default: + ec = GPG_ERR_DIGEST_ALGO; + break; + } + + return ec; +} + + + + +static byte sha3_224_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_224[] = + { + { "2.16.840.1.101.3.4.2.7" }, + /* PKCS#1 sha3_224WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte sha3_256_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_256[] = + { + { "2.16.840.1.101.3.4.2.8" }, + /* PKCS#1 sha3_256WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte sha3_384_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_384[] = + { + { "2.16.840.1.101.3.4.2.9" }, + /* PKCS#1 sha3_384WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte sha3_512_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_sha3_512[] = + { + { "2.16.840.1.101.3.4.2.10" }, + /* PKCS#1 sha3_512WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte shake128_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_shake128[] = + { + { "2.16.840.1.101.3.4.2.11" }, + /* PKCS#1 shake128WithRSAEncryption */ + { "?" }, + { NULL } + }; +static byte shake256_asn[] = { 0x30 }; +static gcry_md_oid_spec_t oid_spec_shake256[] = + { + { "2.16.840.1.101.3.4.2.12" }, + /* PKCS#1 shake256WithRSAEncryption */ + { "?" }, + { NULL } + }; + +gcry_md_spec_t _gcry_digest_spec_sha3_224 = + { + GCRY_MD_SHA3_224, {0, 1}, + "SHA3-224", sha3_224_asn, DIM (sha3_224_asn), oid_spec_sha3_224, 28, + sha3_224_init, keccak_write, keccak_final, keccak_read, NULL, + _gcry_sha3_224_hash_buffer, _gcry_sha3_224_hash_buffers, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_sha3_256 = + { + GCRY_MD_SHA3_256, {0, 1}, + "SHA3-256", sha3_256_asn, DIM (sha3_256_asn), oid_spec_sha3_256, 32, + sha3_256_init, keccak_write, keccak_final, keccak_read, NULL, + _gcry_sha3_256_hash_buffer, _gcry_sha3_256_hash_buffers, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_sha3_384 = + { + GCRY_MD_SHA3_384, {0, 1}, + "SHA3-384", sha3_384_asn, DIM (sha3_384_asn), oid_spec_sha3_384, 48, + sha3_384_init, keccak_write, keccak_final, keccak_read, NULL, + _gcry_sha3_384_hash_buffer, _gcry_sha3_384_hash_buffers, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_sha3_512 = + { + GCRY_MD_SHA3_512, {0, 1}, + "SHA3-512", sha3_512_asn, DIM (sha3_512_asn), oid_spec_sha3_512, 64, + sha3_512_init, keccak_write, keccak_final, keccak_read, NULL, + _gcry_sha3_512_hash_buffer, _gcry_sha3_512_hash_buffers, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_shake128 = + { + GCRY_MD_SHAKE128, {0, 1}, + "SHAKE128", shake128_asn, DIM (shake128_asn), oid_spec_shake128, 0, + shake128_init, keccak_write, keccak_final, NULL, keccak_extract, + NULL, NULL, + sizeof (KECCAK_CONTEXT), + run_selftests + }; +gcry_md_spec_t _gcry_digest_spec_shake256 = + { + GCRY_MD_SHAKE256, {0, 1}, + "SHAKE256", shake256_asn, DIM (shake256_asn), oid_spec_shake256, 0, + shake256_init, keccak_write, keccak_final, NULL, keccak_extract, + NULL, NULL, + sizeof (KECCAK_CONTEXT), + run_selftests + }; diff --git a/comm/third_party/libgcrypt/cipher/keccak_permute_32.h b/comm/third_party/libgcrypt/cipher/keccak_permute_32.h new file mode 100644 index 0000000000..1ce42a42fc --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/keccak_permute_32.h @@ -0,0 +1,536 @@ +/* keccak_permute_32.h - Keccak permute function (simple 32bit bit-interleaved) + * Copyright (C) 2015 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* The code is based on public-domain/CC0 "keccakc1024/simple32bi/ + * Keccak-simple32BI.c" implementation by Ronny Van Keer from SUPERCOP toolkit + * package. + */ + +/* Function that computes the Keccak-f[1600] permutation on the given state. */ +static unsigned int +KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) +{ + const u32 *round_consts = round_consts_32bit; + const u32 *round_consts_end = round_consts_32bit + 2 * 24; + u32 Aba0, Abe0, Abi0, Abo0, Abu0; + u32 Aba1, Abe1, Abi1, Abo1, Abu1; + u32 Aga0, Age0, Agi0, Ago0, Agu0; + u32 Aga1, Age1, Agi1, Ago1, Agu1; + u32 Aka0, Ake0, Aki0, Ako0, Aku0; + u32 Aka1, Ake1, Aki1, Ako1, Aku1; + u32 Ama0, Ame0, Ami0, Amo0, Amu0; + u32 Ama1, Ame1, Ami1, Amo1, Amu1; + u32 Asa0, Ase0, Asi0, Aso0, Asu0; + u32 Asa1, Ase1, Asi1, Aso1, Asu1; + u32 BCa0, BCe0, BCi0, BCo0, BCu0; + u32 BCa1, BCe1, BCi1, BCo1, BCu1; + u32 Da0, De0, Di0, Do0, Du0; + u32 Da1, De1, Di1, Do1, Du1; + u32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; + u32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; + u32 Ega0, Ege0, Egi0, Ego0, Egu0; + u32 Ega1, Ege1, Egi1, Ego1, Egu1; + u32 Eka0, Eke0, Eki0, Eko0, Eku0; + u32 Eka1, Eke1, Eki1, Eko1, Eku1; + u32 Ema0, Eme0, Emi0, Emo0, Emu0; + u32 Ema1, Eme1, Emi1, Emo1, Emu1; + u32 Esa0, Ese0, Esi0, Eso0, Esu0; + u32 Esa1, Ese1, Esi1, Eso1, Esu1; + u32 *state = hd->u.state32bi; + + Aba0 = state[0]; + Aba1 = state[1]; + Abe0 = state[2]; + Abe1 = state[3]; + Abi0 = state[4]; + Abi1 = state[5]; + Abo0 = state[6]; + Abo1 = state[7]; + Abu0 = state[8]; + Abu1 = state[9]; + Aga0 = state[10]; + Aga1 = state[11]; + Age0 = state[12]; + Age1 = state[13]; + Agi0 = state[14]; + Agi1 = state[15]; + Ago0 = state[16]; + Ago1 = state[17]; + Agu0 = state[18]; + Agu1 = state[19]; + Aka0 = state[20]; + Aka1 = state[21]; + Ake0 = state[22]; + Ake1 = state[23]; + Aki0 = state[24]; + Aki1 = state[25]; + Ako0 = state[26]; + Ako1 = state[27]; + Aku0 = state[28]; + Aku1 = state[29]; + Ama0 = state[30]; + Ama1 = state[31]; + Ame0 = state[32]; + Ame1 = state[33]; + Ami0 = state[34]; + Ami1 = state[35]; + Amo0 = state[36]; + Amo1 = state[37]; + Amu0 = state[38]; + Amu1 = state[39]; + Asa0 = state[40]; + Asa1 = state[41]; + Ase0 = state[42]; + Ase1 = state[43]; + Asi0 = state[44]; + Asi1 = state[45]; + Aso0 = state[46]; + Aso1 = state[47]; + Asu0 = state[48]; + Asu1 = state[49]; + + do + { + /* prepareTheta */ + BCa0 = Aba0 ^ Aga0 ^ Aka0 ^ Ama0 ^ Asa0; + BCa1 = Aba1 ^ Aga1 ^ Aka1 ^ Ama1 ^ Asa1; + BCe0 = Abe0 ^ Age0 ^ Ake0 ^ Ame0 ^ Ase0; + BCe1 = Abe1 ^ Age1 ^ Ake1 ^ Ame1 ^ Ase1; + BCi0 = Abi0 ^ Agi0 ^ Aki0 ^ Ami0 ^ Asi0; + BCi1 = Abi1 ^ Agi1 ^ Aki1 ^ Ami1 ^ Asi1; + BCo0 = Abo0 ^ Ago0 ^ Ako0 ^ Amo0 ^ Aso0; + BCo1 = Abo1 ^ Ago1 ^ Ako1 ^ Amo1 ^ Aso1; + BCu0 = Abu0 ^ Agu0 ^ Aku0 ^ Amu0 ^ Asu0; + BCu1 = Abu1 ^ Agu1 ^ Aku1 ^ Amu1 ^ Asu1; + + /* thetaRhoPiChiIota(round , A, E) */ + Da0 = BCu0 ^ ROL32(BCe1, 1); + Da1 = BCu1 ^ BCe0; + De0 = BCa0 ^ ROL32(BCi1, 1); + De1 = BCa1 ^ BCi0; + Di0 = BCe0 ^ ROL32(BCo1, 1); + Di1 = BCe1 ^ BCo0; + Do0 = BCi0 ^ ROL32(BCu1, 1); + Do1 = BCi1 ^ BCu0; + Du0 = BCo0 ^ ROL32(BCa1, 1); + Du1 = BCo1 ^ BCa0; + + Aba0 ^= Da0; + BCa0 = Aba0; + Age0 ^= De0; + BCe0 = ROL32(Age0, 22); + Aki1 ^= Di1; + BCi0 = ROL32(Aki1, 22); + Amo1 ^= Do1; + BCo0 = ROL32(Amo1, 11); + Asu0 ^= Du0; + BCu0 = ROL32(Asu0, 7); + Eba0 = BCa0 ^ ANDN32(BCe0, BCi0); + Eba0 ^= *(round_consts++); + Ebe0 = BCe0 ^ ANDN32(BCi0, BCo0); + Ebi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ebo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Ebu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Aba1 ^= Da1; + BCa1 = Aba1; + Age1 ^= De1; + BCe1 = ROL32(Age1, 22); + Aki0 ^= Di0; + BCi1 = ROL32(Aki0, 21); + Amo0 ^= Do0; + BCo1 = ROL32(Amo0, 10); + Asu1 ^= Du1; + BCu1 = ROL32(Asu1, 7); + Eba1 = BCa1 ^ ANDN32(BCe1, BCi1); + Eba1 ^= *(round_consts++); + Ebe1 = BCe1 ^ ANDN32(BCi1, BCo1); + Ebi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ebo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Ebu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abo0 ^= Do0; + BCa0 = ROL32(Abo0, 14); + Agu0 ^= Du0; + BCe0 = ROL32(Agu0, 10); + Aka1 ^= Da1; + BCi0 = ROL32(Aka1, 2); + Ame1 ^= De1; + BCo0 = ROL32(Ame1, 23); + Asi1 ^= Di1; + BCu0 = ROL32(Asi1, 31); + Ega0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ege0 = BCe0 ^ ANDN32(BCi0, BCo0); + Egi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ego0 = BCo0 ^ ANDN32(BCu0, BCa0); + Egu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abo1 ^= Do1; + BCa1 = ROL32(Abo1, 14); + Agu1 ^= Du1; + BCe1 = ROL32(Agu1, 10); + Aka0 ^= Da0; + BCi1 = ROL32(Aka0, 1); + Ame0 ^= De0; + BCo1 = ROL32(Ame0, 22); + Asi0 ^= Di0; + BCu1 = ROL32(Asi0, 30); + Ega1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ege1 = BCe1 ^ ANDN32(BCi1, BCo1); + Egi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ego1 = BCo1 ^ ANDN32(BCu1, BCa1); + Egu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abe1 ^= De1; + BCa0 = ROL32(Abe1, 1); + Agi0 ^= Di0; + BCe0 = ROL32(Agi0, 3); + Ako1 ^= Do1; + BCi0 = ROL32(Ako1, 13); + Amu0 ^= Du0; + BCo0 = ROL32(Amu0, 4); + Asa0 ^= Da0; + BCu0 = ROL32(Asa0, 9); + Eka0 = BCa0 ^ ANDN32(BCe0, BCi0); + Eke0 = BCe0 ^ ANDN32(BCi0, BCo0); + Eki0 = BCi0 ^ ANDN32(BCo0, BCu0); + Eko0 = BCo0 ^ ANDN32(BCu0, BCa0); + Eku0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abe0 ^= De0; + BCa1 = Abe0; + Agi1 ^= Di1; + BCe1 = ROL32(Agi1, 3); + Ako0 ^= Do0; + BCi1 = ROL32(Ako0, 12); + Amu1 ^= Du1; + BCo1 = ROL32(Amu1, 4); + Asa1 ^= Da1; + BCu1 = ROL32(Asa1, 9); + Eka1 = BCa1 ^ ANDN32(BCe1, BCi1); + Eke1 = BCe1 ^ ANDN32(BCi1, BCo1); + Eki1 = BCi1 ^ ANDN32(BCo1, BCu1); + Eko1 = BCo1 ^ ANDN32(BCu1, BCa1); + Eku1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abu1 ^= Du1; + BCa0 = ROL32(Abu1, 14); + Aga0 ^= Da0; + BCe0 = ROL32(Aga0, 18); + Ake0 ^= De0; + BCi0 = ROL32(Ake0, 5); + Ami1 ^= Di1; + BCo0 = ROL32(Ami1, 8); + Aso0 ^= Do0; + BCu0 = ROL32(Aso0, 28); + Ema0 = BCa0 ^ ANDN32(BCe0, BCi0); + Eme0 = BCe0 ^ ANDN32(BCi0, BCo0); + Emi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Emo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Emu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abu0 ^= Du0; + BCa1 = ROL32(Abu0, 13); + Aga1 ^= Da1; + BCe1 = ROL32(Aga1, 18); + Ake1 ^= De1; + BCi1 = ROL32(Ake1, 5); + Ami0 ^= Di0; + BCo1 = ROL32(Ami0, 7); + Aso1 ^= Do1; + BCu1 = ROL32(Aso1, 28); + Ema1 = BCa1 ^ ANDN32(BCe1, BCi1); + Eme1 = BCe1 ^ ANDN32(BCi1, BCo1); + Emi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Emo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Emu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Abi0 ^= Di0; + BCa0 = ROL32(Abi0, 31); + Ago1 ^= Do1; + BCe0 = ROL32(Ago1, 28); + Aku1 ^= Du1; + BCi0 = ROL32(Aku1, 20); + Ama1 ^= Da1; + BCo0 = ROL32(Ama1, 21); + Ase0 ^= De0; + BCu0 = ROL32(Ase0, 1); + Esa0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ese0 = BCe0 ^ ANDN32(BCi0, BCo0); + Esi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Eso0 = BCo0 ^ ANDN32(BCu0, BCa0); + Esu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Abi1 ^= Di1; + BCa1 = ROL32(Abi1, 31); + Ago0 ^= Do0; + BCe1 = ROL32(Ago0, 27); + Aku0 ^= Du0; + BCi1 = ROL32(Aku0, 19); + Ama0 ^= Da0; + BCo1 = ROL32(Ama0, 20); + Ase1 ^= De1; + BCu1 = ROL32(Ase1, 1); + Esa1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ese1 = BCe1 ^ ANDN32(BCi1, BCo1); + Esi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Eso1 = BCo1 ^ ANDN32(BCu1, BCa1); + Esu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + /* prepareTheta */ + BCa0 = Eba0 ^ Ega0 ^ Eka0 ^ Ema0 ^ Esa0; + BCa1 = Eba1 ^ Ega1 ^ Eka1 ^ Ema1 ^ Esa1; + BCe0 = Ebe0 ^ Ege0 ^ Eke0 ^ Eme0 ^ Ese0; + BCe1 = Ebe1 ^ Ege1 ^ Eke1 ^ Eme1 ^ Ese1; + BCi0 = Ebi0 ^ Egi0 ^ Eki0 ^ Emi0 ^ Esi0; + BCi1 = Ebi1 ^ Egi1 ^ Eki1 ^ Emi1 ^ Esi1; + BCo0 = Ebo0 ^ Ego0 ^ Eko0 ^ Emo0 ^ Eso0; + BCo1 = Ebo1 ^ Ego1 ^ Eko1 ^ Emo1 ^ Eso1; + BCu0 = Ebu0 ^ Egu0 ^ Eku0 ^ Emu0 ^ Esu0; + BCu1 = Ebu1 ^ Egu1 ^ Eku1 ^ Emu1 ^ Esu1; + + /* thetaRhoPiChiIota(round+1, E, A) */ + Da0 = BCu0 ^ ROL32(BCe1, 1); + Da1 = BCu1 ^ BCe0; + De0 = BCa0 ^ ROL32(BCi1, 1); + De1 = BCa1 ^ BCi0; + Di0 = BCe0 ^ ROL32(BCo1, 1); + Di1 = BCe1 ^ BCo0; + Do0 = BCi0 ^ ROL32(BCu1, 1); + Do1 = BCi1 ^ BCu0; + Du0 = BCo0 ^ ROL32(BCa1, 1); + Du1 = BCo1 ^ BCa0; + + Eba0 ^= Da0; + BCa0 = Eba0; + Ege0 ^= De0; + BCe0 = ROL32(Ege0, 22); + Eki1 ^= Di1; + BCi0 = ROL32(Eki1, 22); + Emo1 ^= Do1; + BCo0 = ROL32(Emo1, 11); + Esu0 ^= Du0; + BCu0 = ROL32(Esu0, 7); + Aba0 = BCa0 ^ ANDN32(BCe0, BCi0); + Aba0 ^= *(round_consts++); + Abe0 = BCe0 ^ ANDN32(BCi0, BCo0); + Abi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Abo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Abu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Eba1 ^= Da1; + BCa1 = Eba1; + Ege1 ^= De1; + BCe1 = ROL32(Ege1, 22); + Eki0 ^= Di0; + BCi1 = ROL32(Eki0, 21); + Emo0 ^= Do0; + BCo1 = ROL32(Emo0, 10); + Esu1 ^= Du1; + BCu1 = ROL32(Esu1, 7); + Aba1 = BCa1 ^ ANDN32(BCe1, BCi1); + Aba1 ^= *(round_consts++); + Abe1 = BCe1 ^ ANDN32(BCi1, BCo1); + Abi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Abo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Abu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebo0 ^= Do0; + BCa0 = ROL32(Ebo0, 14); + Egu0 ^= Du0; + BCe0 = ROL32(Egu0, 10); + Eka1 ^= Da1; + BCi0 = ROL32(Eka1, 2); + Eme1 ^= De1; + BCo0 = ROL32(Eme1, 23); + Esi1 ^= Di1; + BCu0 = ROL32(Esi1, 31); + Aga0 = BCa0 ^ ANDN32(BCe0, BCi0); + Age0 = BCe0 ^ ANDN32(BCi0, BCo0); + Agi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ago0 = BCo0 ^ ANDN32(BCu0, BCa0); + Agu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebo1 ^= Do1; + BCa1 = ROL32(Ebo1, 14); + Egu1 ^= Du1; + BCe1 = ROL32(Egu1, 10); + Eka0 ^= Da0; + BCi1 = ROL32(Eka0, 1); + Eme0 ^= De0; + BCo1 = ROL32(Eme0, 22); + Esi0 ^= Di0; + BCu1 = ROL32(Esi0, 30); + Aga1 = BCa1 ^ ANDN32(BCe1, BCi1); + Age1 = BCe1 ^ ANDN32(BCi1, BCo1); + Agi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ago1 = BCo1 ^ ANDN32(BCu1, BCa1); + Agu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebe1 ^= De1; + BCa0 = ROL32(Ebe1, 1); + Egi0 ^= Di0; + BCe0 = ROL32(Egi0, 3); + Eko1 ^= Do1; + BCi0 = ROL32(Eko1, 13); + Emu0 ^= Du0; + BCo0 = ROL32(Emu0, 4); + Esa0 ^= Da0; + BCu0 = ROL32(Esa0, 9); + Aka0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ake0 = BCe0 ^ ANDN32(BCi0, BCo0); + Aki0 = BCi0 ^ ANDN32(BCo0, BCu0); + Ako0 = BCo0 ^ ANDN32(BCu0, BCa0); + Aku0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebe0 ^= De0; + BCa1 = Ebe0; + Egi1 ^= Di1; + BCe1 = ROL32(Egi1, 3); + Eko0 ^= Do0; + BCi1 = ROL32(Eko0, 12); + Emu1 ^= Du1; + BCo1 = ROL32(Emu1, 4); + Esa1 ^= Da1; + BCu1 = ROL32(Esa1, 9); + Aka1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ake1 = BCe1 ^ ANDN32(BCi1, BCo1); + Aki1 = BCi1 ^ ANDN32(BCo1, BCu1); + Ako1 = BCo1 ^ ANDN32(BCu1, BCa1); + Aku1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebu1 ^= Du1; + BCa0 = ROL32(Ebu1, 14); + Ega0 ^= Da0; + BCe0 = ROL32(Ega0, 18); + Eke0 ^= De0; + BCi0 = ROL32(Eke0, 5); + Emi1 ^= Di1; + BCo0 = ROL32(Emi1, 8); + Eso0 ^= Do0; + BCu0 = ROL32(Eso0, 28); + Ama0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ame0 = BCe0 ^ ANDN32(BCi0, BCo0); + Ami0 = BCi0 ^ ANDN32(BCo0, BCu0); + Amo0 = BCo0 ^ ANDN32(BCu0, BCa0); + Amu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebu0 ^= Du0; + BCa1 = ROL32(Ebu0, 13); + Ega1 ^= Da1; + BCe1 = ROL32(Ega1, 18); + Eke1 ^= De1; + BCi1 = ROL32(Eke1, 5); + Emi0 ^= Di0; + BCo1 = ROL32(Emi0, 7); + Eso1 ^= Do1; + BCu1 = ROL32(Eso1, 28); + Ama1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ame1 = BCe1 ^ ANDN32(BCi1, BCo1); + Ami1 = BCi1 ^ ANDN32(BCo1, BCu1); + Amo1 = BCo1 ^ ANDN32(BCu1, BCa1); + Amu1 = BCu1 ^ ANDN32(BCa1, BCe1); + + Ebi0 ^= Di0; + BCa0 = ROL32(Ebi0, 31); + Ego1 ^= Do1; + BCe0 = ROL32(Ego1, 28); + Eku1 ^= Du1; + BCi0 = ROL32(Eku1, 20); + Ema1 ^= Da1; + BCo0 = ROL32(Ema1, 21); + Ese0 ^= De0; + BCu0 = ROL32(Ese0, 1); + Asa0 = BCa0 ^ ANDN32(BCe0, BCi0); + Ase0 = BCe0 ^ ANDN32(BCi0, BCo0); + Asi0 = BCi0 ^ ANDN32(BCo0, BCu0); + Aso0 = BCo0 ^ ANDN32(BCu0, BCa0); + Asu0 = BCu0 ^ ANDN32(BCa0, BCe0); + + Ebi1 ^= Di1; + BCa1 = ROL32(Ebi1, 31); + Ego0 ^= Do0; + BCe1 = ROL32(Ego0, 27); + Eku0 ^= Du0; + BCi1 = ROL32(Eku0, 19); + Ema0 ^= Da0; + BCo1 = ROL32(Ema0, 20); + Ese1 ^= De1; + BCu1 = ROL32(Ese1, 1); + Asa1 = BCa1 ^ ANDN32(BCe1, BCi1); + Ase1 = BCe1 ^ ANDN32(BCi1, BCo1); + Asi1 = BCi1 ^ ANDN32(BCo1, BCu1); + Aso1 = BCo1 ^ ANDN32(BCu1, BCa1); + Asu1 = BCu1 ^ ANDN32(BCa1, BCe1); + } + while (round_consts < round_consts_end); + + state[0] = Aba0; + state[1] = Aba1; + state[2] = Abe0; + state[3] = Abe1; + state[4] = Abi0; + state[5] = Abi1; + state[6] = Abo0; + state[7] = Abo1; + state[8] = Abu0; + state[9] = Abu1; + state[10] = Aga0; + state[11] = Aga1; + state[12] = Age0; + state[13] = Age1; + state[14] = Agi0; + state[15] = Agi1; + state[16] = Ago0; + state[17] = Ago1; + state[18] = Agu0; + state[19] = Agu1; + state[20] = Aka0; + state[21] = Aka1; + state[22] = Ake0; + state[23] = Ake1; + state[24] = Aki0; + state[25] = Aki1; + state[26] = Ako0; + state[27] = Ako1; + state[28] = Aku0; + state[29] = Aku1; + state[30] = Ama0; + state[31] = Ama1; + state[32] = Ame0; + state[33] = Ame1; + state[34] = Ami0; + state[35] = Ami1; + state[36] = Amo0; + state[37] = Amo1; + state[38] = Amu0; + state[39] = Amu1; + state[40] = Asa0; + state[41] = Asa1; + state[42] = Ase0; + state[43] = Ase1; + state[44] = Asi0; + state[45] = Asi1; + state[46] = Aso0; + state[47] = Aso1; + state[48] = Asu0; + state[49] = Asu1; + + return sizeof(void *) * 4 + sizeof(u32) * 12 * 5 * 2; +} diff --git a/comm/third_party/libgcrypt/cipher/keccak_permute_64.h b/comm/third_party/libgcrypt/cipher/keccak_permute_64.h new file mode 100644 index 0000000000..b28c871ec1 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/keccak_permute_64.h @@ -0,0 +1,385 @@ +/* keccak_permute_64.h - Keccak permute function (simple 64bit) + * Copyright (C) 2015 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +/* The code is based on public-domain/CC0 "keccakc1024/simple/Keccak-simple.c" + * implementation by Ronny Van Keer from SUPERCOP toolkit package. + */ + +/* Function that computes the Keccak-f[1600] permutation on the given state. */ +static unsigned int +KECCAK_F1600_PERMUTE_FUNC_NAME(KECCAK_STATE *hd) +{ + const u64 *round_consts = _gcry_keccak_round_consts_64bit; + const u64 *round_consts_end = _gcry_keccak_round_consts_64bit + 24; + u64 Aba, Abe, Abi, Abo, Abu; + u64 Aga, Age, Agi, Ago, Agu; + u64 Aka, Ake, Aki, Ako, Aku; + u64 Ama, Ame, Ami, Amo, Amu; + u64 Asa, Ase, Asi, Aso, Asu; + u64 BCa, BCe, BCi, BCo, BCu; + u64 Da, De, Di, Do, Du; + u64 Eba, Ebe, Ebi, Ebo, Ebu; + u64 Ega, Ege, Egi, Ego, Egu; + u64 Eka, Eke, Eki, Eko, Eku; + u64 Ema, Eme, Emi, Emo, Emu; + u64 Esa, Ese, Esi, Eso, Esu; + u64 *state = hd->u.state64; + + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + do + { + /* prepareTheta */ + BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; + BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + /* thetaRhoPiChiIotaPrepareTheta(round , A, E) */ + Da = BCu ^ ROL64(BCe, 1); + De = BCa ^ ROL64(BCi, 1); + Di = BCe ^ ROL64(BCo, 1); + Do = BCi ^ ROL64(BCu, 1); + Du = BCo ^ ROL64(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL64(Age, 44); + Aki ^= Di; + BCi = ROL64(Aki, 43); + Amo ^= Do; + BCo = ROL64(Amo, 21); + Asu ^= Du; + BCu = ROL64(Asu, 14); + Eba = BCa ^ ANDN64(BCe, BCi); + Eba ^= *(round_consts++); + Ebe = BCe ^ ANDN64(BCi, BCo); + Ebi = BCi ^ ANDN64(BCo, BCu); + Ebo = BCo ^ ANDN64(BCu, BCa); + Ebu = BCu ^ ANDN64(BCa, BCe); + + Abo ^= Do; + BCa = ROL64(Abo, 28); + Agu ^= Du; + BCe = ROL64(Agu, 20); + Aka ^= Da; + BCi = ROL64(Aka, 3); + Ame ^= De; + BCo = ROL64(Ame, 45); + Asi ^= Di; + BCu = ROL64(Asi, 61); + Ega = BCa ^ ANDN64(BCe, BCi); + Ege = BCe ^ ANDN64(BCi, BCo); + Egi = BCi ^ ANDN64(BCo, BCu); + Ego = BCo ^ ANDN64(BCu, BCa); + Egu = BCu ^ ANDN64(BCa, BCe); + + Abe ^= De; + BCa = ROL64(Abe, 1); + Agi ^= Di; + BCe = ROL64(Agi, 6); + Ako ^= Do; + BCi = ROL64(Ako, 25); + Amu ^= Du; + BCo = ROL64(Amu, 8); + Asa ^= Da; + BCu = ROL64(Asa, 18); + Eka = BCa ^ ANDN64(BCe, BCi); + Eke = BCe ^ ANDN64(BCi, BCo); + Eki = BCi ^ ANDN64(BCo, BCu); + Eko = BCo ^ ANDN64(BCu, BCa); + Eku = BCu ^ ANDN64(BCa, BCe); + + Abu ^= Du; + BCa = ROL64(Abu, 27); + Aga ^= Da; + BCe = ROL64(Aga, 36); + Ake ^= De; + BCi = ROL64(Ake, 10); + Ami ^= Di; + BCo = ROL64(Ami, 15); + Aso ^= Do; + BCu = ROL64(Aso, 56); + Ema = BCa ^ ANDN64(BCe, BCi); + Eme = BCe ^ ANDN64(BCi, BCo); + Emi = BCi ^ ANDN64(BCo, BCu); + Emo = BCo ^ ANDN64(BCu, BCa); + Emu = BCu ^ ANDN64(BCa, BCe); + + Abi ^= Di; + BCa = ROL64(Abi, 62); + Ago ^= Do; + BCe = ROL64(Ago, 55); + Aku ^= Du; + BCi = ROL64(Aku, 39); + Ama ^= Da; + BCo = ROL64(Ama, 41); + Ase ^= De; + BCu = ROL64(Ase, 2); + Esa = BCa ^ ANDN64(BCe, BCi); + Ese = BCe ^ ANDN64(BCi, BCo); + Esi = BCi ^ ANDN64(BCo, BCu); + Eso = BCo ^ ANDN64(BCu, BCa); + Esu = BCu ^ ANDN64(BCa, BCe); + + /* prepareTheta */ + BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + /* thetaRhoPiChiIotaPrepareTheta(round+1, E, A) */ + Da = BCu ^ ROL64(BCe, 1); + De = BCa ^ ROL64(BCi, 1); + Di = BCe ^ ROL64(BCo, 1); + Do = BCi ^ ROL64(BCu, 1); + Du = BCo ^ ROL64(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL64(Ege, 44); + Eki ^= Di; + BCi = ROL64(Eki, 43); + Emo ^= Do; + BCo = ROL64(Emo, 21); + Esu ^= Du; + BCu = ROL64(Esu, 14); + Aba = BCa ^ ANDN64(BCe, BCi); + Aba ^= *(round_consts++); + Abe = BCe ^ ANDN64(BCi, BCo); + Abi = BCi ^ ANDN64(BCo, BCu); + Abo = BCo ^ ANDN64(BCu, BCa); + Abu = BCu ^ ANDN64(BCa, BCe); + + Ebo ^= Do; + BCa = ROL64(Ebo, 28); + Egu ^= Du; + BCe = ROL64(Egu, 20); + Eka ^= Da; + BCi = ROL64(Eka, 3); + Eme ^= De; + BCo = ROL64(Eme, 45); + Esi ^= Di; + BCu = ROL64(Esi, 61); + Aga = BCa ^ ANDN64(BCe, BCi); + Age = BCe ^ ANDN64(BCi, BCo); + Agi = BCi ^ ANDN64(BCo, BCu); + Ago = BCo ^ ANDN64(BCu, BCa); + Agu = BCu ^ ANDN64(BCa, BCe); + + Ebe ^= De; + BCa = ROL64(Ebe, 1); + Egi ^= Di; + BCe = ROL64(Egi, 6); + Eko ^= Do; + BCi = ROL64(Eko, 25); + Emu ^= Du; + BCo = ROL64(Emu, 8); + Esa ^= Da; + BCu = ROL64(Esa, 18); + Aka = BCa ^ ANDN64(BCe, BCi); + Ake = BCe ^ ANDN64(BCi, BCo); + Aki = BCi ^ ANDN64(BCo, BCu); + Ako = BCo ^ ANDN64(BCu, BCa); + Aku = BCu ^ ANDN64(BCa, BCe); + + Ebu ^= Du; + BCa = ROL64(Ebu, 27); + Ega ^= Da; + BCe = ROL64(Ega, 36); + Eke ^= De; + BCi = ROL64(Eke, 10); + Emi ^= Di; + BCo = ROL64(Emi, 15); + Eso ^= Do; + BCu = ROL64(Eso, 56); + Ama = BCa ^ ANDN64(BCe, BCi); + Ame = BCe ^ ANDN64(BCi, BCo); + Ami = BCi ^ ANDN64(BCo, BCu); + Amo = BCo ^ ANDN64(BCu, BCa); + Amu = BCu ^ ANDN64(BCa, BCe); + + Ebi ^= Di; + BCa = ROL64(Ebi, 62); + Ego ^= Do; + BCe = ROL64(Ego, 55); + Eku ^= Du; + BCi = ROL64(Eku, 39); + Ema ^= Da; + BCo = ROL64(Ema, 41); + Ese ^= De; + BCu = ROL64(Ese, 2); + Asa = BCa ^ ANDN64(BCe, BCi); + Ase = BCe ^ ANDN64(BCi, BCo); + Asi = BCi ^ ANDN64(BCo, BCu); + Aso = BCo ^ ANDN64(BCu, BCa); + Asu = BCu ^ ANDN64(BCa, BCe); + } + while (round_consts < round_consts_end); + + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; + + return sizeof(void *) * 4 + sizeof(u64) * 12 * 5; +} + +static unsigned int +KECCAK_F1600_ABSORB_FUNC_NAME(KECCAK_STATE *hd, int pos, const byte *lanes, + unsigned int nlanes, int blocklanes) +{ + unsigned int burn = 0; + + while (nlanes) + { + switch (blocklanes) + { + case 21: + /* SHAKE128 */ + while (pos == 0 && nlanes >= 21) + { + nlanes -= 21; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[16], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[20], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 18: + /* SHA3-224 */ + while (pos == 0 && nlanes >= 18) + { + nlanes -= 18; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_2(&hd->u.state64[16], lanes); lanes += 8 * 2; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 17: + /* SHA3-256 & SHAKE256 */ + while (pos == 0 && nlanes >= 17) + { + nlanes -= 17; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_8(&hd->u.state64[8], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[16], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 13: + /* SHA3-384 */ + while (pos == 0 && nlanes >= 13) + { + nlanes -= 13; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_4(&hd->u.state64[8], lanes); lanes += 8 * 4; + absorb_lanes64_1(&hd->u.state64[12], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + + case 9: + /* SHA3-512 */ + while (pos == 0 && nlanes >= 9) + { + nlanes -= 9; + absorb_lanes64_8(&hd->u.state64[0], lanes); lanes += 8 * 8; + absorb_lanes64_1(&hd->u.state64[8], lanes); lanes += 8 * 1; + + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + } + break; + } + + while (nlanes) + { + hd->u.state64[pos] ^= buf_get_le64(lanes); + lanes += 8; + nlanes--; + + if (++pos == blocklanes) + { + burn = KECCAK_F1600_PERMUTE_FUNC_NAME(hd); + pos = 0; + break; + } + } + } + + return burn; +} diff --git a/comm/third_party/libgcrypt/cipher/mac-cmac.c b/comm/third_party/libgcrypt/cipher/mac-cmac.c new file mode 100644 index 0000000000..8d5d5ca304 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/mac-cmac.c @@ -0,0 +1,524 @@ +/* mac-cmac.c - CMAC glue for MAC API + * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "./mac-internal.h" + + +static int +map_mac_algo_to_cipher (int mac_algo) +{ + switch (mac_algo) + { + default: + return GCRY_CIPHER_NONE; + case GCRY_MAC_CMAC_AES: + return GCRY_CIPHER_AES; + case GCRY_MAC_CMAC_3DES: + return GCRY_CIPHER_3DES; + case GCRY_MAC_CMAC_CAMELLIA: + return GCRY_CIPHER_CAMELLIA128; + case GCRY_MAC_CMAC_IDEA: + return GCRY_CIPHER_IDEA; + case GCRY_MAC_CMAC_CAST5: + return GCRY_CIPHER_CAST5; + case GCRY_MAC_CMAC_BLOWFISH: + return GCRY_CIPHER_BLOWFISH; + case GCRY_MAC_CMAC_TWOFISH: + return GCRY_CIPHER_TWOFISH; + case GCRY_MAC_CMAC_SERPENT: + return GCRY_CIPHER_SERPENT128; + case GCRY_MAC_CMAC_SEED: + return GCRY_CIPHER_SEED; + case GCRY_MAC_CMAC_RFC2268: + return GCRY_CIPHER_RFC2268_128; + case GCRY_MAC_CMAC_GOST28147: + return GCRY_CIPHER_GOST28147; + case GCRY_MAC_CMAC_SM4: + return GCRY_CIPHER_SM4; + } +} + + +static gcry_err_code_t +cmac_open (gcry_mac_hd_t h) +{ + gcry_err_code_t err; + gcry_cipher_hd_t hd; + int secure = (h->magic == CTX_MAC_MAGIC_SECURE); + int cipher_algo; + unsigned int flags; + + cipher_algo = map_mac_algo_to_cipher (h->spec->algo); + flags = (secure ? GCRY_CIPHER_SECURE : 0); + + err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_CMAC, + flags); + if (err) + return err; + + h->u.cmac.cipher_algo = cipher_algo; + h->u.cmac.ctx = hd; + h->u.cmac.blklen = _gcry_cipher_get_algo_blklen (cipher_algo); + return 0; +} + + +static void +cmac_close (gcry_mac_hd_t h) +{ + _gcry_cipher_close (h->u.cmac.ctx); + h->u.cmac.ctx = NULL; +} + + +static gcry_err_code_t +cmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + return _gcry_cipher_setkey (h->u.cmac.ctx, key, keylen); +} + + +static gcry_err_code_t +cmac_reset (gcry_mac_hd_t h) +{ + return _gcry_cipher_reset (h->u.cmac.ctx); +} + + +static gcry_err_code_t +cmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_cmac_authenticate (h->u.cmac.ctx, buf, buflen); +} + + +static gcry_err_code_t +cmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + if (*outlen > h->u.cmac.blklen) + *outlen = h->u.cmac.blklen; + return _gcry_cipher_cmac_get_tag (h->u.cmac.ctx, outbuf, *outlen); +} + + +static gcry_err_code_t +cmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_cmac_check_tag (h->u.cmac.ctx, buf, buflen); +} + + +static unsigned int +cmac_get_maclen (int algo) +{ + return _gcry_cipher_get_algo_blklen (map_mac_algo_to_cipher (algo)); +} + + +static unsigned int +cmac_get_keylen (int algo) +{ + return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo)); +} + + +/* Check one CMAC with MAC ALGO using the regular MAC + * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key + * and (EXPECT,EXPECTLEN) the expected result. Returns NULL on + * success or a string describing the failure. */ +static const char * +check_one (int algo, const char *data, size_t datalen, + const char *key, size_t keylen, + const char *expect, size_t expectlen) +{ + gcry_mac_hd_t hd; + unsigned char mac[512]; /* hardcoded to avoid allocation */ + unsigned int maclen; + size_t macoutlen; + int i; + gcry_error_t err = 0; + + err = _gcry_mac_open (&hd, algo, 0, NULL); + if (err) + return "gcry_mac_open failed"; + + i = _gcry_mac_get_algo (hd); + if (i != algo) + return "gcry_mac_get_algo failed"; + + maclen = _gcry_mac_get_algo_maclen (algo); + if (maclen < 1 || maclen > 500) + return "gcry_mac_get_algo_maclen failed"; + + if (maclen != expectlen) + return "invalid tests data"; + + err = _gcry_mac_setkey (hd, key, keylen); + if (err) + { + _gcry_mac_close (hd); + return "gcry_mac_setkey failed"; + } + + err = _gcry_mac_write (hd, data, datalen); + if (err) + { + _gcry_mac_close (hd); + return "gcry_mac_write failed"; + } + + err = _gcry_mac_verify (hd, expect, maclen); + if (err) + { + _gcry_mac_close (hd); + return "gcry_mac_verify failed"; + } + + macoutlen = maclen; + err = _gcry_mac_read (hd, mac, &macoutlen); + _gcry_mac_close (hd); + if (err) + return "gcry_mac_read failed"; + + if (memcmp (mac, expect, maclen)) + return "does not match"; + + return NULL; +} + + +/* + * CMAC AES and DES test vectors are from + * http://web.archive.org/web/20130930212819/http://csrc.nist.gov/publica \ + * tions/nistpubs/800-38B/Updated_CMAC_Examples.pdf + */ + +static gpg_err_code_t +selftests_cmac_3des (int extended, selftest_report_func_t report) +{ + static const struct + { + const char *desc; + const char *data; + const char *key; + const char *expect; + } tv[] = + { + { "Basic 3DES", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57", + "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" + "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", + "\x74\x3d\xdb\xe0\xce\x2d\xc2\xed" }, + { "Extended 3DES #1", + "", + "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" + "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", + "\xb7\xa6\x88\xe1\x22\xff\xaf\x95" }, + { "Extended 3DES #2", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96", + "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" + "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", + "\x8e\x8f\x29\x31\x36\x28\x37\x97" }, + { "Extended 3DES #3", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51", + "\x8a\xa8\x3b\xf8\xcb\xda\x10\x62\x0b\xc1\xbf\x19\xfb\xb6\xcd\x58" + "\xbc\x31\x3d\x4a\x37\x1c\xa8\xb5", + "\x33\xe6\xb1\x09\x24\x00\xea\xe5" }, + { "Extended 3DES #4", + "", + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38" + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5", + "\xbd\x2e\xbf\x9a\x3b\xa0\x03\x61" }, + { "Extended 3DES #5", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96", + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38" + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5", + "\x4f\xf2\xab\x81\x3c\x53\xce\x83" }, + { "Extended 3DES #6", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57", + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38" + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5", + "\x62\xdd\x1b\x47\x19\x02\xbd\x4e" }, + { "Extended 3DES #7", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51", + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5\x8a\x3d\x10\xba\x80\x57\x0d\x38" + "\x4c\xf1\x51\x34\xa2\x85\x0d\xd5", + "\x31\xb1\xe4\x31\xda\xbc\x4e\xb8" }, + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MAC_CMAC_3DES, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, 8); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cmac", GCRY_MAC_CMAC_3DES, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +static gpg_err_code_t +selftests_cmac_aes (int extended, selftest_report_func_t report) +{ + static const struct + { + const char *desc; + const char *data; + const char *key; + const char *expect; + } tv[] = + { + { "Basic AES128", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11", + "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + "\xdf\xa6\x67\x47\xde\x9a\xe6\x30\x30\xca\x32\x61\x14\x97\xc8\x27" }, + { "Basic AES192", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11", + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + "\x8a\x1d\xe5\xbe\x2e\xb3\x1a\xad\x08\x9a\x82\xe6\xee\x90\x8b\x0e" }, + { "Basic AES256", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11", + "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + "\xaa\xf3\xd8\xf1\xde\x56\x40\xc2\x32\xf5\xb1\x69\xb9\xc9\x11\xe6" }, + { "Extended AES #1", + "", + "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + "\xbb\x1d\x69\x29\xe9\x59\x37\x28\x7f\xa3\x7d\x12\x9b\x75\x67\x46" }, + { "Extended AES #2", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a", + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + "\x9e\x99\xa7\xbf\x31\xe7\x10\x90\x06\x62\xf6\x5e\x61\x7c\x51\x84" }, + { "Extended AES #3", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + "\xe1\x99\x21\x90\x54\x9f\x6e\xd5\x69\x6a\x2c\x05\x6c\x31\x54\x10" }, + { "Extended AES #4", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a", + "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + "\x07\x0a\x16\xb4\x6b\x4d\x41\x44\xf7\x9b\xdd\x9d\xd0\x4a\x28\x7c" }, + { "Extended AES #5", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + "\x2b\x7e\x15\x16\x28\xae\xd2\xa6\xab\xf7\x15\x88\x09\xcf\x4f\x3c", + "\x51\xf0\xbe\xbf\x7e\x3b\x9d\x92\xfc\x49\x74\x17\x79\x36\x3c\xfe" }, + { "Extended AES #6", + "", + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + "\xd1\x7d\xdf\x46\xad\xaa\xcd\xe5\x31\xca\xc4\x83\xde\x7a\x93\x67" }, + { "Extended AES #7", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a" + "\xae\x2d\x8a\x57\x1e\x03\xac\x9c\x9e\xb7\x6f\xac\x45\xaf\x8e\x51" + "\x30\xc8\x1c\x46\xa3\x5c\xe4\x11\xe5\xfb\xc1\x19\x1a\x0a\x52\xef" + "\xf6\x9f\x24\x45\xdf\x4f\x9b\x17\xad\x2b\x41\x7b\xe6\x6c\x37\x10", + "\x8e\x73\xb0\xf7\xda\x0e\x64\x52\xc8\x10\xf3\x2b\x80\x90\x79\xe5" + "\x62\xf8\xea\xd2\x52\x2c\x6b\x7b", + "\xa1\xd5\xdf\x0e\xed\x79\x0f\x79\x4d\x77\x58\x96\x59\xf3\x9a\x11" }, + { "Extended AES #8", + "", + "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + "\x02\x89\x62\xf6\x1b\x7b\xf8\x9e\xfc\x6b\x55\x1f\x46\x67\xd9\x83" }, + { "Extended AES #9", + "\x6b\xc1\xbe\xe2\x2e\x40\x9f\x96\xe9\x3d\x7e\x11\x73\x93\x17\x2a", + "\x60\x3d\xeb\x10\x15\xca\x71\xbe\x2b\x73\xae\xf0\x85\x7d\x77\x81" + "\x1f\x35\x2c\x07\x3b\x61\x08\xd7\x2d\x98\x10\xa3\x09\x14\xdf\xf4", + "\x28\xa7\x02\x3f\x45\x2e\x8f\x82\xbd\x4b\xf2\x8d\x8c\x37\xc3\x5c" }, + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MAC_CMAC_AES, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, strlen (tv[tvidx].expect)); + if (errtxt) + goto failed; + if (tvidx >= 2 && !extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("cmac", GCRY_MAC_CMAC_AES, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + +static gpg_err_code_t +cmac_selftest (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MAC_CMAC_3DES: + ec = selftests_cmac_3des (extended, report); + break; + case GCRY_MAC_CMAC_AES: + ec = selftests_cmac_aes (extended, report); + break; + + default: + ec = GPG_ERR_MAC_ALGO; + break; + } + + return ec; +} + + +static gcry_mac_spec_ops_t cmac_ops = { + cmac_open, + cmac_close, + cmac_setkey, + NULL, + cmac_reset, + cmac_write, + cmac_read, + cmac_verify, + cmac_get_maclen, + cmac_get_keylen, + NULL, + cmac_selftest +}; + + +#if USE_BLOWFISH +gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish = { + GCRY_MAC_CMAC_BLOWFISH, {0, 0}, "CMAC_BLOWFISH", + &cmac_ops +}; +#endif +#if USE_DES +gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes = { + GCRY_MAC_CMAC_3DES, {0, 1}, "CMAC_3DES", + &cmac_ops +}; +#endif +#if USE_CAST5 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5 = { + GCRY_MAC_CMAC_CAST5, {0, 0}, "CMAC_CAST5", + &cmac_ops +}; +#endif +#if USE_AES +gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes = { + GCRY_MAC_CMAC_AES, {0, 1}, "CMAC_AES", + &cmac_ops +}; +#endif +#if USE_TWOFISH +gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish = { + GCRY_MAC_CMAC_TWOFISH, {0, 0}, "CMAC_TWOFISH", + &cmac_ops +}; +#endif +#if USE_SERPENT +gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent = { + GCRY_MAC_CMAC_SERPENT, {0, 0}, "CMAC_SERPENT", + &cmac_ops +}; +#endif +#if USE_RFC2268 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268 = { + GCRY_MAC_CMAC_RFC2268, {0, 0}, "CMAC_RFC2268", + &cmac_ops +}; +#endif +#if USE_SEED +gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed = { + GCRY_MAC_CMAC_SEED, {0, 0}, "CMAC_SEED", + &cmac_ops +}; +#endif +#if USE_CAMELLIA +gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia = { + GCRY_MAC_CMAC_CAMELLIA, {0, 0}, "CMAC_CAMELLIA", + &cmac_ops +}; +#endif +#ifdef USE_IDEA +gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea = { + GCRY_MAC_CMAC_IDEA, {0, 0}, "CMAC_IDEA", + &cmac_ops +}; +#endif +#if USE_GOST28147 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147 = { + GCRY_MAC_CMAC_GOST28147, {0, 0}, "CMAC_GOST28147", + &cmac_ops +}; +#endif +#if USE_SM4 +gcry_mac_spec_t _gcry_mac_type_spec_cmac_sm4 = { + GCRY_MAC_CMAC_SM4, {0, 0}, "CMAC_SM4", + &cmac_ops +}; +#endif diff --git a/comm/third_party/libgcrypt/cipher/mac-gmac.c b/comm/third_party/libgcrypt/cipher/mac-gmac.c new file mode 100644 index 0000000000..e04c6d1ef0 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/mac-gmac.c @@ -0,0 +1,187 @@ +/* mac-gmac.c - GMAC glue for MAC API + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" +#include "./mac-internal.h" + + +static int +map_mac_algo_to_cipher (int mac_algo) +{ + switch (mac_algo) + { + default: + return GCRY_CIPHER_NONE; + case GCRY_MAC_GMAC_AES: + return GCRY_CIPHER_AES; + case GCRY_MAC_GMAC_CAMELLIA: + return GCRY_CIPHER_CAMELLIA128; + case GCRY_MAC_GMAC_TWOFISH: + return GCRY_CIPHER_TWOFISH; + case GCRY_MAC_GMAC_SERPENT: + return GCRY_CIPHER_SERPENT128; + case GCRY_MAC_GMAC_SEED: + return GCRY_CIPHER_SEED; + } +} + + +static gcry_err_code_t +gmac_open (gcry_mac_hd_t h) +{ + gcry_err_code_t err; + gcry_cipher_hd_t hd; + int secure = (h->magic == CTX_MAC_MAGIC_SECURE); + int cipher_algo; + unsigned int flags; + + cipher_algo = map_mac_algo_to_cipher (h->spec->algo); + flags = (secure ? GCRY_CIPHER_SECURE : 0); + + err = _gcry_cipher_open_internal (&hd, cipher_algo, GCRY_CIPHER_MODE_GCM, + flags); + if (err) + return err; + + h->u.gmac.cipher_algo = cipher_algo; + h->u.gmac.ctx = hd; + return 0; +} + + +static void +gmac_close (gcry_mac_hd_t h) +{ + _gcry_cipher_close (h->u.gmac.ctx); + h->u.gmac.ctx = NULL; +} + + +static gcry_err_code_t +gmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + return _gcry_cipher_setkey (h->u.gmac.ctx, key, keylen); +} + + +static gcry_err_code_t +gmac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen) +{ + return _gcry_cipher_setiv (h->u.gmac.ctx, iv, ivlen); +} + + +static gcry_err_code_t +gmac_reset (gcry_mac_hd_t h) +{ + return _gcry_cipher_reset (h->u.gmac.ctx); +} + + +static gcry_err_code_t +gmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_authenticate (h->u.gmac.ctx, buf, buflen); +} + + +static gcry_err_code_t +gmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + if (*outlen > GCRY_GCM_BLOCK_LEN) + *outlen = GCRY_GCM_BLOCK_LEN; + return _gcry_cipher_gettag (h->u.gmac.ctx, outbuf, *outlen); +} + + +static gcry_err_code_t +gmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + return _gcry_cipher_checktag (h->u.gmac.ctx, buf, buflen); +} + + +static unsigned int +gmac_get_maclen (int algo) +{ + (void)algo; + return GCRY_GCM_BLOCK_LEN; +} + + +static unsigned int +gmac_get_keylen (int algo) +{ + return _gcry_cipher_get_algo_keylen (map_mac_algo_to_cipher (algo)); +} + + +static gcry_mac_spec_ops_t gmac_ops = { + gmac_open, + gmac_close, + gmac_setkey, + gmac_setiv, + gmac_reset, + gmac_write, + gmac_read, + gmac_verify, + gmac_get_maclen, + gmac_get_keylen, + NULL, + NULL +}; + + +#if USE_AES +gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes = { + GCRY_MAC_GMAC_AES, {0, 1}, "GMAC_AES", + &gmac_ops +}; +#endif +#if USE_TWOFISH +gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish = { + GCRY_MAC_GMAC_TWOFISH, {0, 0}, "GMAC_TWOFISH", + &gmac_ops +}; +#endif +#if USE_SERPENT +gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent = { + GCRY_MAC_GMAC_SERPENT, {0, 0}, "GMAC_SERPENT", + &gmac_ops +}; +#endif +#if USE_SEED +gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed = { + GCRY_MAC_GMAC_SEED, {0, 0}, "GMAC_SEED", + &gmac_ops +}; +#endif +#if USE_CAMELLIA +gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia = { + GCRY_MAC_GMAC_CAMELLIA, {0, 0}, "GMAC_CAMELLIA", + &gmac_ops +}; +#endif diff --git a/comm/third_party/libgcrypt/cipher/mac-hmac.c b/comm/third_party/libgcrypt/cipher/mac-hmac.c new file mode 100644 index 0000000000..4e10dd2c9e --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/mac-hmac.c @@ -0,0 +1,1495 @@ +/* mac-hmac.c - HMAC glue for MAC API + * Copyright (C) 2013 Jussi Kivilinna + * Copyright (C) 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "./mac-internal.h" +#include "bufhelp.h" +#include "cipher.h" +#include "hmac256.h" + + +static int +map_mac_algo_to_md (int mac_algo) +{ + switch (mac_algo) + { + default: + return GCRY_MD_NONE; + case GCRY_MAC_HMAC_MD2: + return GCRY_MD_MD2; + case GCRY_MAC_HMAC_MD4: + return GCRY_MD_MD4; + case GCRY_MAC_HMAC_MD5: + return GCRY_MD_MD5; + case GCRY_MAC_HMAC_SHA1: + return GCRY_MD_SHA1; + case GCRY_MAC_HMAC_SHA224: + return GCRY_MD_SHA224; + case GCRY_MAC_HMAC_SHA256: + return GCRY_MD_SHA256; + case GCRY_MAC_HMAC_SHA384: + return GCRY_MD_SHA384; + case GCRY_MAC_HMAC_SHA512: + return GCRY_MD_SHA512; + case GCRY_MAC_HMAC_SHA512_256: + return GCRY_MD_SHA512_256; + case GCRY_MAC_HMAC_SHA512_224: + return GCRY_MD_SHA512_224; + case GCRY_MAC_HMAC_SHA3_224: + return GCRY_MD_SHA3_224; + case GCRY_MAC_HMAC_SHA3_256: + return GCRY_MD_SHA3_256; + case GCRY_MAC_HMAC_SHA3_384: + return GCRY_MD_SHA3_384; + case GCRY_MAC_HMAC_SHA3_512: + return GCRY_MD_SHA3_512; + case GCRY_MAC_HMAC_RMD160: + return GCRY_MD_RMD160; + case GCRY_MAC_HMAC_TIGER1: + return GCRY_MD_TIGER1; + case GCRY_MAC_HMAC_WHIRLPOOL: + return GCRY_MD_WHIRLPOOL; + case GCRY_MAC_HMAC_GOSTR3411_94: + return GCRY_MD_GOSTR3411_94; + case GCRY_MAC_HMAC_GOSTR3411_CP: + return GCRY_MD_GOSTR3411_CP; + case GCRY_MAC_HMAC_STRIBOG256: + return GCRY_MD_STRIBOG256; + case GCRY_MAC_HMAC_STRIBOG512: + return GCRY_MD_STRIBOG512; + case GCRY_MAC_HMAC_BLAKE2B_512: + return GCRY_MD_BLAKE2B_512; + case GCRY_MAC_HMAC_BLAKE2B_384: + return GCRY_MD_BLAKE2B_384; + case GCRY_MAC_HMAC_BLAKE2B_256: + return GCRY_MD_BLAKE2B_256; + case GCRY_MAC_HMAC_BLAKE2B_160: + return GCRY_MD_BLAKE2B_160; + case GCRY_MAC_HMAC_BLAKE2S_256: + return GCRY_MD_BLAKE2S_256; + case GCRY_MAC_HMAC_BLAKE2S_224: + return GCRY_MD_BLAKE2S_224; + case GCRY_MAC_HMAC_BLAKE2S_160: + return GCRY_MD_BLAKE2S_160; + case GCRY_MAC_HMAC_BLAKE2S_128: + return GCRY_MD_BLAKE2S_128; + case GCRY_MAC_HMAC_SM3: + return GCRY_MD_SM3; + } +} + + +static gcry_err_code_t +hmac_open (gcry_mac_hd_t h) +{ + gcry_err_code_t err; + gcry_md_hd_t hd; + int secure = (h->magic == CTX_MAC_MAGIC_SECURE); + unsigned int flags; + int md_algo; + + md_algo = map_mac_algo_to_md (h->spec->algo); + + flags = GCRY_MD_FLAG_HMAC; + flags |= (secure ? GCRY_MD_FLAG_SECURE : 0); + + err = _gcry_md_open (&hd, md_algo, flags); + if (err) + return err; + + h->u.hmac.md_algo = md_algo; + h->u.hmac.md_ctx = hd; + return 0; +} + + +static void +hmac_close (gcry_mac_hd_t h) +{ + _gcry_md_close (h->u.hmac.md_ctx); + h->u.hmac.md_ctx = NULL; +} + + +static gcry_err_code_t +hmac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + return _gcry_md_setkey (h->u.hmac.md_ctx, key, keylen); +} + + +static gcry_err_code_t +hmac_reset (gcry_mac_hd_t h) +{ + _gcry_md_reset (h->u.hmac.md_ctx); + return 0; +} + + +static gcry_err_code_t +hmac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + _gcry_md_write (h->u.hmac.md_ctx, buf, buflen); + return 0; +} + + +static gcry_err_code_t +hmac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t * outlen) +{ + unsigned int dlen; + const unsigned char *digest; + + dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo); + digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo); + + if (*outlen <= dlen) + buf_cpy (outbuf, digest, *outlen); + else + { + buf_cpy (outbuf, digest, dlen); + *outlen = dlen; + } + + return 0; +} + + +static gcry_err_code_t +hmac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + unsigned int dlen; + const unsigned char *digest; + + dlen = _gcry_md_get_algo_dlen (h->u.hmac.md_algo); + digest = _gcry_md_read (h->u.hmac.md_ctx, h->u.hmac.md_algo); + + if (buflen > dlen) + return GPG_ERR_INV_LENGTH; + + return buf_eq_const (buf, digest, buflen) ? 0 : GPG_ERR_CHECKSUM; +} + + +static unsigned int +hmac_get_maclen (int algo) +{ + return _gcry_md_get_algo_dlen (map_mac_algo_to_md (algo)); +} + + +static unsigned int +hmac_get_keylen (int algo) +{ + /* Return blocksize for default key length. */ + switch (algo) + { + case GCRY_MD_SHA3_224: + return 1152 / 8; + case GCRY_MD_SHA3_256: + return 1088 / 8; + case GCRY_MD_SHA3_384: + return 832 / 8; + case GCRY_MD_SHA3_512: + return 576 / 8; + case GCRY_MAC_HMAC_SHA384: + case GCRY_MAC_HMAC_SHA512: + return 128; + case GCRY_MAC_HMAC_GOSTR3411_94: + return 32; + default: + return 64; + } +} + + +/* Check one HMAC with digest ALGO using the regualr HAMC + * API. (DATA,DATALEN) is the data to be MACed, (KEY,KEYLEN) the key + * and (EXPECT,EXPECTLEN) the expected result. If TRUNC is set, the + * EXPECTLEN may be less than the digest length. Returns NULL on + * success or a string describing the failure. */ +static const char * +check_one (int algo, + const void *data, size_t datalen, + const void *key, size_t keylen, + const void *expect, size_t expectlen, int trunc) +{ + gcry_md_hd_t hd; + const unsigned char *digest; + +/* printf ("HMAC algo %d\n", algo); */ + if (trunc) + { + if (_gcry_md_get_algo_dlen (algo) < expectlen) + return "invalid tests data"; + } + else + { + if (_gcry_md_get_algo_dlen (algo) != expectlen) + return "invalid tests data"; + } + if (_gcry_md_open (&hd, algo, GCRY_MD_FLAG_HMAC)) + return "gcry_md_open failed"; + if (_gcry_md_setkey (hd, key, keylen)) + { + _gcry_md_close (hd); + return "gcry_md_setkey failed"; + } + _gcry_md_write (hd, data, datalen); + digest = _gcry_md_read (hd, algo); + if (!digest) + { + _gcry_md_close (hd); + return "gcry_md_read failed"; + } + if (memcmp (digest, expect, expectlen)) + { +/* int i; */ + +/* fputs (" {", stdout); */ +/* for (i=0; i < expectlen-1; i++) */ +/* { */ +/* if (i && !(i % 8)) */ +/* fputs ("\n ", stdout); */ +/* printf (" 0x%02x,", digest[i]); */ +/* } */ +/* printf (" 0x%02x } },\n", digest[i]); */ + + _gcry_md_close (hd); + return "does not match"; + } + _gcry_md_close (hd); + return NULL; +} + + +static gpg_err_code_t +selftests_sha1 (int extended, selftest_report_func_t report) +{ + const char *what; + const char *errtxt; + unsigned char key[128]; + int i, j; + + what = "FIPS-198a, A.1"; + for (i=0; i < 64; i++) + key[i] = i; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #1", 9, + key, 64, + "\x4f\x4c\xa3\xd5\xd6\x8b\xa7\xcc\x0a\x12" + "\x08\xc9\xc6\x1e\x9c\x5d\xa0\x40\x3c\x0a", 20, 0); + if (errtxt) + goto failed; + + if (extended) + { + what = "FIPS-198a, A.2"; + for (i=0, j=0x30; i < 20; i++) + key[i] = j++; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #2", 9, + key, 20, + "\x09\x22\xd3\x40\x5f\xaa\x3d\x19\x4f\x82" + "\xa4\x58\x30\x73\x7d\x5c\xc6\xc7\x5d\x24", 20, 0); + if (errtxt) + goto failed; + + what = "FIPS-198a, A.3"; + for (i=0, j=0x50; i < 100; i++) + key[i] = j++; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #3", 9, + key, 100, + "\xbc\xf4\x1e\xab\x8b\xb2\xd8\x02\xf3\xd0" + "\x5c\xaf\x7c\xb0\x92\xec\xf8\xd1\xa3\xaa", 20, 0); + if (errtxt) + goto failed; + + what = "FIPS-198a, A.4"; + for (i=0, j=0x70; i < 49; i++) + key[i] = j++; + errtxt = check_one (GCRY_MD_SHA1, + "Sample #4", 9, + key, 49, + "\x9e\xa8\x86\xef\xe2\x68\xdb\xec\xce\x42" + "\x0c\x75\x24\xdf\x32\xe0\x75\x1a\x2a\x26", 20, 0); + if (errtxt) + goto failed; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA1, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +static gpg_err_code_t +selftests_sha224 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[28]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0xa3, 0x0e, 0x01, 0x09, 0x8b, 0xc6, 0xdb, 0xbf, + 0x45, 0x69, 0x0f, 0x3a, 0x7e, 0x9e, 0x6d, 0x0f, + 0x8b, 0xbe, 0xa2, 0xa3, 0x9e, 0x61, 0x48, 0x00, + 0x8f, 0xd0, 0x5e, 0x44 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0x89, 0x6f, 0xb1, 0x12, 0x8a, 0xbb, 0xdf, 0x19, + 0x68, 0x32, 0x10, 0x7c, 0xd4, 0x9d, 0xf3, 0x3f, + 0x47, 0xb4, 0xb1, 0x16, 0x99, 0x12, 0xba, 0x4f, + 0x53, 0x68, 0x4b, 0x22 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0x7f, 0xb3, 0xcb, 0x35, 0x88, 0xc6, 0xc1, 0xf6, + 0xff, 0xa9, 0x69, 0x4d, 0x7d, 0x6a, 0xd2, 0x64, + 0x93, 0x65, 0xb0, 0xc1, 0xf6, 0x5d, 0x69, 0xd1, + 0xec, 0x83, 0x33, 0xea } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0x6c, 0x11, 0x50, 0x68, 0x74, 0x01, 0x3c, 0xac, + 0x6a, 0x2a, 0xbc, 0x1b, 0xb3, 0x82, 0x62, 0x7c, + 0xec, 0x6a, 0x90, 0xd8, 0x6e, 0xfc, 0x01, 0x2d, + 0xe7, 0xaf, 0xec, 0x5a } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x95, 0xe9, 0xa0, 0xdb, 0x96, 0x20, 0x95, 0xad, + 0xae, 0xbe, 0x9b, 0x2d, 0x6f, 0x0d, 0xbc, 0xe2, + 0xd4, 0x99, 0xf1, 0x12, 0xf2, 0xd2, 0xb7, 0x27, + 0x3f, 0xa6, 0x87, 0x0e } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x3a, 0x85, 0x41, 0x66, 0xac, 0x5d, 0x9f, 0x02, + 0x3f, 0x54, 0xd5, 0x17, 0xd0, 0xb3, 0x9d, 0xbd, + 0x94, 0x67, 0x70, 0xdb, 0x9c, 0x2b, 0x95, 0xc9, + 0xf6, 0xf5, 0x65, 0xd1 } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA224, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA224, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +selftests_sha256 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[32]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0x5b, 0xdc, 0xc1, 0x46, 0xbf, 0x60, 0x75, 0x4e, + 0x6a, 0x04, 0x24, 0x26, 0x08, 0x95, 0x75, 0xc7, + 0x5a, 0x00, 0x3f, 0x08, 0x9d, 0x27, 0x39, 0x83, + 0x9d, 0xec, 0x58, 0xb9, 0x64, 0xec, 0x38, 0x43 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0xb0, 0x34, 0x4c, 0x61, 0xd8, 0xdb, 0x38, 0x53, + 0x5c, 0xa8, 0xaf, 0xce, 0xaf, 0x0b, 0xf1, 0x2b, + 0x88, 0x1d, 0xc2, 0x00, 0xc9, 0x83, 0x3d, 0xa7, + 0x26, 0xe9, 0x37, 0x6c, 0x2e, 0x32, 0xcf, 0xf7 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0x77, 0x3e, 0xa9, 0x1e, 0x36, 0x80, 0x0e, 0x46, + 0x85, 0x4d, 0xb8, 0xeb, 0xd0, 0x91, 0x81, 0xa7, + 0x29, 0x59, 0x09, 0x8b, 0x3e, 0xf8, 0xc1, 0x22, + 0xd9, 0x63, 0x55, 0x14, 0xce, 0xd5, 0x65, 0xfe } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0x82, 0x55, 0x8a, 0x38, 0x9a, 0x44, 0x3c, 0x0e, + 0xa4, 0xcc, 0x81, 0x98, 0x99, 0xf2, 0x08, 0x3a, + 0x85, 0xf0, 0xfa, 0xa3, 0xe5, 0x78, 0xf8, 0x07, + 0x7a, 0x2e, 0x3f, 0xf4, 0x67, 0x29, 0x66, 0x5b } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x60, 0xe4, 0x31, 0x59, 0x1e, 0xe0, 0xb6, 0x7f, + 0x0d, 0x8a, 0x26, 0xaa, 0xcb, 0xf5, 0xb7, 0x7f, + 0x8e, 0x0b, 0xc6, 0x21, 0x37, 0x28, 0xc5, 0x14, + 0x05, 0x46, 0x04, 0x0f, 0x0e, 0xe3, 0x7f, 0x54 } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x9b, 0x09, 0xff, 0xa7, 0x1b, 0x94, 0x2f, 0xcb, + 0x27, 0x63, 0x5f, 0xbc, 0xd5, 0xb0, 0xe9, 0x44, + 0xbf, 0xdc, 0x63, 0x64, 0x4f, 0x07, 0x13, 0x93, + 0x8a, 0x7f, 0x51, 0x53, 0x5c, 0x3a, 0x35, 0xe2 } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + hmac256_context_t hmachd; + const unsigned char *digest; + size_t dlen; + + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA256, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + + hmachd = _gcry_hmac256_new (tv[tvidx].key, strlen (tv[tvidx].key)); + if (!hmachd) + { + errtxt = "_gcry_hmac256_new failed"; + goto failed; + } + _gcry_hmac256_update (hmachd, tv[tvidx].data, strlen (tv[tvidx].data)); + digest = _gcry_hmac256_finalize (hmachd, &dlen); + if (!digest) + { + errtxt = "_gcry_hmac256_finalize failed"; + _gcry_hmac256_release (hmachd); + goto failed; + } + if (dlen != DIM (tv[tvidx].expect) + || memcmp (digest, tv[tvidx].expect, DIM (tv[tvidx].expect))) + { + errtxt = "does not match in second implementation"; + _gcry_hmac256_release (hmachd); + goto failed; + } + _gcry_hmac256_release (hmachd); + + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA256, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +selftests_sha384 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[48]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0xaf, 0x45, 0xd2, 0xe3, 0x76, 0x48, 0x40, 0x31, + 0x61, 0x7f, 0x78, 0xd2, 0xb5, 0x8a, 0x6b, 0x1b, + 0x9c, 0x7e, 0xf4, 0x64, 0xf5, 0xa0, 0x1b, 0x47, + 0xe4, 0x2e, 0xc3, 0x73, 0x63, 0x22, 0x44, 0x5e, + 0x8e, 0x22, 0x40, 0xca, 0x5e, 0x69, 0xe2, 0xc7, + 0x8b, 0x32, 0x39, 0xec, 0xfa, 0xb2, 0x16, 0x49 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0xaf, 0xd0, 0x39, 0x44, 0xd8, 0x48, 0x95, 0x62, + 0x6b, 0x08, 0x25, 0xf4, 0xab, 0x46, 0x90, 0x7f, + 0x15, 0xf9, 0xda, 0xdb, 0xe4, 0x10, 0x1e, 0xc6, + 0x82, 0xaa, 0x03, 0x4c, 0x7c, 0xeb, 0xc5, 0x9c, + 0xfa, 0xea, 0x9e, 0xa9, 0x07, 0x6e, 0xde, 0x7f, + 0x4a, 0xf1, 0x52, 0xe8, 0xb2, 0xfa, 0x9c, 0xb6 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0x88, 0x06, 0x26, 0x08, 0xd3, 0xe6, 0xad, 0x8a, + 0x0a, 0xa2, 0xac, 0xe0, 0x14, 0xc8, 0xa8, 0x6f, + 0x0a, 0xa6, 0x35, 0xd9, 0x47, 0xac, 0x9f, 0xeb, + 0xe8, 0x3e, 0xf4, 0xe5, 0x59, 0x66, 0x14, 0x4b, + 0x2a, 0x5a, 0xb3, 0x9d, 0xc1, 0x38, 0x14, 0xb9, + 0x4e, 0x3a, 0xb6, 0xe1, 0x01, 0xa3, 0x4f, 0x27 } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0x3e, 0x8a, 0x69, 0xb7, 0x78, 0x3c, 0x25, 0x85, + 0x19, 0x33, 0xab, 0x62, 0x90, 0xaf, 0x6c, 0xa7, + 0x7a, 0x99, 0x81, 0x48, 0x08, 0x50, 0x00, 0x9c, + 0xc5, 0x57, 0x7c, 0x6e, 0x1f, 0x57, 0x3b, 0x4e, + 0x68, 0x01, 0xdd, 0x23, 0xc4, 0xa7, 0xd6, 0x79, + 0xcc, 0xf8, 0xa3, 0x86, 0xc6, 0x74, 0xcf, 0xfb } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x4e, 0xce, 0x08, 0x44, 0x85, 0x81, 0x3e, 0x90, + 0x88, 0xd2, 0xc6, 0x3a, 0x04, 0x1b, 0xc5, 0xb4, + 0x4f, 0x9e, 0xf1, 0x01, 0x2a, 0x2b, 0x58, 0x8f, + 0x3c, 0xd1, 0x1f, 0x05, 0x03, 0x3a, 0xc4, 0xc6, + 0x0c, 0x2e, 0xf6, 0xab, 0x40, 0x30, 0xfe, 0x82, + 0x96, 0x24, 0x8d, 0xf1, 0x63, 0xf4, 0x49, 0x52 } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x66, 0x17, 0x17, 0x8e, 0x94, 0x1f, 0x02, 0x0d, + 0x35, 0x1e, 0x2f, 0x25, 0x4e, 0x8f, 0xd3, 0x2c, + 0x60, 0x24, 0x20, 0xfe, 0xb0, 0xb8, 0xfb, 0x9a, + 0xdc, 0xce, 0xbb, 0x82, 0x46, 0x1e, 0x99, 0xc5, + 0xa6, 0x78, 0xcc, 0x31, 0xe7, 0x99, 0x17, 0x6d, + 0x38, 0x60, 0xe6, 0x11, 0x0c, 0x46, 0x52, 0x3e } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA384, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA384, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +selftests_sha512 (int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect[64]; + } tv[] = + { + { "data-28 key-4", + "what do ya want for nothing?", + "Jefe", + { 0x16, 0x4b, 0x7a, 0x7b, 0xfc, 0xf8, 0x19, 0xe2, + 0xe3, 0x95, 0xfb, 0xe7, 0x3b, 0x56, 0xe0, 0xa3, + 0x87, 0xbd, 0x64, 0x22, 0x2e, 0x83, 0x1f, 0xd6, + 0x10, 0x27, 0x0c, 0xd7, 0xea, 0x25, 0x05, 0x54, + 0x97, 0x58, 0xbf, 0x75, 0xc0, 0x5a, 0x99, 0x4a, + 0x6d, 0x03, 0x4f, 0x65, 0xf8, 0xf0, 0xe6, 0xfd, + 0xca, 0xea, 0xb1, 0xa3, 0x4d, 0x4a, 0x6b, 0x4b, + 0x63, 0x6e, 0x07, 0x0a, 0x38, 0xbc, 0xe7, 0x37 } }, + + { "data-9 key-20", + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + { 0x87, 0xaa, 0x7c, 0xde, 0xa5, 0xef, 0x61, 0x9d, + 0x4f, 0xf0, 0xb4, 0x24, 0x1a, 0x1d, 0x6c, 0xb0, + 0x23, 0x79, 0xf4, 0xe2, 0xce, 0x4e, 0xc2, 0x78, + 0x7a, 0xd0, 0xb3, 0x05, 0x45, 0xe1, 0x7c, 0xde, + 0xda, 0xa8, 0x33, 0xb7, 0xd6, 0xb8, 0xa7, 0x02, + 0x03, 0x8b, 0x27, 0x4e, 0xae, 0xa3, 0xf4, 0xe4, + 0xbe, 0x9d, 0x91, 0x4e, 0xeb, 0x61, 0xf1, 0x70, + 0x2e, 0x69, 0x6c, 0x20, 0x3a, 0x12, 0x68, 0x54 } }, + + { "data-50 key-20", + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + { 0xfa, 0x73, 0xb0, 0x08, 0x9d, 0x56, 0xa2, 0x84, + 0xef, 0xb0, 0xf0, 0x75, 0x6c, 0x89, 0x0b, 0xe9, + 0xb1, 0xb5, 0xdb, 0xdd, 0x8e, 0xe8, 0x1a, 0x36, + 0x55, 0xf8, 0x3e, 0x33, 0xb2, 0x27, 0x9d, 0x39, + 0xbf, 0x3e, 0x84, 0x82, 0x79, 0xa7, 0x22, 0xc8, + 0x06, 0xb4, 0x85, 0xa4, 0x7e, 0x67, 0xc8, 0x07, + 0xb9, 0x46, 0xa3, 0x37, 0xbe, 0xe8, 0x94, 0x26, + 0x74, 0x27, 0x88, 0x59, 0xe1, 0x32, 0x92, 0xfb } }, + + { "data-50 key-26", + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + { 0xb0, 0xba, 0x46, 0x56, 0x37, 0x45, 0x8c, 0x69, + 0x90, 0xe5, 0xa8, 0xc5, 0xf6, 0x1d, 0x4a, 0xf7, + 0xe5, 0x76, 0xd9, 0x7f, 0xf9, 0x4b, 0x87, 0x2d, + 0xe7, 0x6f, 0x80, 0x50, 0x36, 0x1e, 0xe3, 0xdb, + 0xa9, 0x1c, 0xa5, 0xc1, 0x1a, 0xa2, 0x5e, 0xb4, + 0xd6, 0x79, 0x27, 0x5c, 0xc5, 0x78, 0x80, 0x63, + 0xa5, 0xf1, 0x97, 0x41, 0x12, 0x0c, 0x4f, 0x2d, + 0xe2, 0xad, 0xeb, 0xeb, 0x10, 0xa2, 0x98, 0xdd } }, + + { "data-54 key-131", + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0x80, 0xb2, 0x42, 0x63, 0xc7, 0xc1, 0xa3, 0xeb, + 0xb7, 0x14, 0x93, 0xc1, 0xdd, 0x7b, 0xe8, 0xb4, + 0x9b, 0x46, 0xd1, 0xf4, 0x1b, 0x4a, 0xee, 0xc1, + 0x12, 0x1b, 0x01, 0x37, 0x83, 0xf8, 0xf3, 0x52, + 0x6b, 0x56, 0xd0, 0x37, 0xe0, 0x5f, 0x25, 0x98, + 0xbd, 0x0f, 0xd2, 0x21, 0x5d, 0x6a, 0x1e, 0x52, + 0x95, 0xe6, 0x4f, 0x73, 0xf6, 0x3f, 0x0a, 0xec, + 0x8b, 0x91, 0x5a, 0x98, 0x5d, 0x78, 0x65, 0x98 } }, + + { "data-152 key-131", + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + { 0xe3, 0x7b, 0x6a, 0x77, 0x5d, 0xc8, 0x7d, 0xba, + 0xa4, 0xdf, 0xa9, 0xf9, 0x6e, 0x5e, 0x3f, 0xfd, + 0xde, 0xbd, 0x71, 0xf8, 0x86, 0x72, 0x89, 0x86, + 0x5d, 0xf5, 0xa3, 0x2d, 0x20, 0xcd, 0xc9, 0x44, + 0xb6, 0x02, 0x2c, 0xac, 0x3c, 0x49, 0x82, 0xb1, + 0x0d, 0x5e, 0xeb, 0x55, 0xc3, 0xe4, 0xde, 0x15, + 0x13, 0x46, 0x76, 0xfb, 0x6d, 0xe0, 0x44, 0x60, + 0x65, 0xc9, 0x74, 0x40, 0xfa, 0x8c, 0x6a, 0x58 } }, + + { NULL } + }; + const char *what; + const char *errtxt; + int tvidx; + + for (tvidx=0; tv[tvidx].desc; tvidx++) + { + what = tv[tvidx].desc; + errtxt = check_one (GCRY_MD_SHA512, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + tv[tvidx].expect, DIM (tv[tvidx].expect), 0); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", GCRY_MD_SHA512, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + + +/* Test for the SHA3 algorithms. Vectors taken on 2017-07-18 from + * http://www.wolfgang-ehrhardt.de/hmac-sha3-testvectors.html */ +static gpg_err_code_t +selftests_sha3 (int hashalgo, int extended, selftest_report_func_t report) +{ + static struct + { + const char * const desc; + const char * const data; + const char * const key; + const char expect_224[28]; + const char expect_256[32]; + const char expect_384[48]; + const char expect_512[64]; + unsigned char trunc; + } tv[] = + { + { "data-9 key-20", /* Test 1 */ + "Hi There", + "\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b\x0b" + "\x0b\x0b\x0b\x0b", + + { 0x3b, 0x16, 0x54, 0x6b, 0xbc, 0x7b, 0xe2, 0x70, + 0x6a, 0x03, 0x1d, 0xca, 0xfd, 0x56, 0x37, 0x3d, + 0x98, 0x84, 0x36, 0x76, 0x41, 0xd8, 0xc5, 0x9a, + 0xf3, 0xc8, 0x60, 0xf7 }, + { 0xba, 0x85, 0x19, 0x23, 0x10, 0xdf, 0xfa, 0x96, + 0xe2, 0xa3, 0xa4, 0x0e, 0x69, 0x77, 0x43, 0x51, + 0x14, 0x0b, 0xb7, 0x18, 0x5e, 0x12, 0x02, 0xcd, + 0xcc, 0x91, 0x75, 0x89, 0xf9, 0x5e, 0x16, 0xbb }, + { 0x68, 0xd2, 0xdc, 0xf7, 0xfd, 0x4d, 0xdd, 0x0a, + 0x22, 0x40, 0xc8, 0xa4, 0x37, 0x30, 0x5f, 0x61, + 0xfb, 0x73, 0x34, 0xcf, 0xb5, 0xd0, 0x22, 0x6e, + 0x1b, 0xc2, 0x7d, 0xc1, 0x0a, 0x2e, 0x72, 0x3a, + 0x20, 0xd3, 0x70, 0xb4, 0x77, 0x43, 0x13, 0x0e, + 0x26, 0xac, 0x7e, 0x3d, 0x53, 0x28, 0x86, 0xbd }, + { 0xeb, 0x3f, 0xbd, 0x4b, 0x2e, 0xaa, 0xb8, 0xf5, + 0xc5, 0x04, 0xbd, 0x3a, 0x41, 0x46, 0x5a, 0xac, + 0xec, 0x15, 0x77, 0x0a, 0x7c, 0xab, 0xac, 0x53, + 0x1e, 0x48, 0x2f, 0x86, 0x0b, 0x5e, 0xc7, 0xba, + 0x47, 0xcc, 0xb2, 0xc6, 0xf2, 0xaf, 0xce, 0x8f, + 0x88, 0xd2, 0x2b, 0x6d, 0xc6, 0x13, 0x80, 0xf2, + 0x3a, 0x66, 0x8f, 0xd3, 0x88, 0x8b, 0xb8, 0x05, + 0x37, 0xc0, 0xa0, 0xb8, 0x64, 0x07, 0x68, 0x9e } + }, + + { "data-28 key-4", /* Test 2 */ + /* Test with a key shorter than the length of the HMAC output. */ + "what do ya want for nothing?", + "Jefe", + + { 0x7f, 0xdb, 0x8d, 0xd8, 0x8b, 0xd2, 0xf6, 0x0d, + 0x1b, 0x79, 0x86, 0x34, 0xad, 0x38, 0x68, 0x11, + 0xc2, 0xcf, 0xc8, 0x5b, 0xfa, 0xf5, 0xd5, 0x2b, + 0xba, 0xce, 0x5e, 0x66 }, + { 0xc7, 0xd4, 0x07, 0x2e, 0x78, 0x88, 0x77, 0xae, + 0x35, 0x96, 0xbb, 0xb0, 0xda, 0x73, 0xb8, 0x87, + 0xc9, 0x17, 0x1f, 0x93, 0x09, 0x5b, 0x29, 0x4a, + 0xe8, 0x57, 0xfb, 0xe2, 0x64, 0x5e, 0x1b, 0xa5 }, + { 0xf1, 0x10, 0x1f, 0x8c, 0xbf, 0x97, 0x66, 0xfd, + 0x67, 0x64, 0xd2, 0xed, 0x61, 0x90, 0x3f, 0x21, + 0xca, 0x9b, 0x18, 0xf5, 0x7c, 0xf3, 0xe1, 0xa2, + 0x3c, 0xa1, 0x35, 0x08, 0xa9, 0x32, 0x43, 0xce, + 0x48, 0xc0, 0x45, 0xdc, 0x00, 0x7f, 0x26, 0xa2, + 0x1b, 0x3f, 0x5e, 0x0e, 0x9d, 0xf4, 0xc2, 0x0a }, + { 0x5a, 0x4b, 0xfe, 0xab, 0x61, 0x66, 0x42, 0x7c, + 0x7a, 0x36, 0x47, 0xb7, 0x47, 0x29, 0x2b, 0x83, + 0x84, 0x53, 0x7c, 0xdb, 0x89, 0xaf, 0xb3, 0xbf, + 0x56, 0x65, 0xe4, 0xc5, 0xe7, 0x09, 0x35, 0x0b, + 0x28, 0x7b, 0xae, 0xc9, 0x21, 0xfd, 0x7c, 0xa0, + 0xee, 0x7a, 0x0c, 0x31, 0xd0, 0x22, 0xa9, 0x5e, + 0x1f, 0xc9, 0x2b, 0xa9, 0xd7, 0x7d, 0xf8, 0x83, + 0x96, 0x02, 0x75, 0xbe, 0xb4, 0xe6, 0x20, 0x24 } + }, + + { "data-50 key-20", /* Test 3 */ + /* Test with a combined length of key and data that is larger + * than 64 bytes (= block-size of SHA-224 and SHA-256). */ + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd\xdd" + "\xdd\xdd", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa", + + { 0x67, 0x6c, 0xfc, 0x7d, 0x16, 0x15, 0x36, 0x38, + 0x78, 0x03, 0x90, 0x69, 0x2b, 0xe1, 0x42, 0xd2, + 0xdf, 0x7c, 0xe9, 0x24, 0xb9, 0x09, 0xc0, 0xc0, + 0x8d, 0xbf, 0xdc, 0x1a }, + { 0x84, 0xec, 0x79, 0x12, 0x4a, 0x27, 0x10, 0x78, + 0x65, 0xce, 0xdd, 0x8b, 0xd8, 0x2d, 0xa9, 0x96, + 0x5e, 0x5e, 0xd8, 0xc3, 0x7b, 0x0a, 0xc9, 0x80, + 0x05, 0xa7, 0xf3, 0x9e, 0xd5, 0x8a, 0x42, 0x07 }, + { 0x27, 0x5c, 0xd0, 0xe6, 0x61, 0xbb, 0x8b, 0x15, + 0x1c, 0x64, 0xd2, 0x88, 0xf1, 0xf7, 0x82, 0xfb, + 0x91, 0xa8, 0xab, 0xd5, 0x68, 0x58, 0xd7, 0x2b, + 0xab, 0xb2, 0xd4, 0x76, 0xf0, 0x45, 0x83, 0x73, + 0xb4, 0x1b, 0x6a, 0xb5, 0xbf, 0x17, 0x4b, 0xec, + 0x42, 0x2e, 0x53, 0xfc, 0x31, 0x35, 0xac, 0x6e }, + { 0x30, 0x9e, 0x99, 0xf9, 0xec, 0x07, 0x5e, 0xc6, + 0xc6, 0xd4, 0x75, 0xed, 0xa1, 0x18, 0x06, 0x87, + 0xfc, 0xf1, 0x53, 0x11, 0x95, 0x80, 0x2a, 0x99, + 0xb5, 0x67, 0x74, 0x49, 0xa8, 0x62, 0x51, 0x82, + 0x85, 0x1c, 0xb3, 0x32, 0xaf, 0xb6, 0xa8, 0x9c, + 0x41, 0x13, 0x25, 0xfb, 0xcb, 0xcd, 0x42, 0xaf, + 0xcb, 0x7b, 0x6e, 0x5a, 0xab, 0x7e, 0xa4, 0x2c, + 0x66, 0x0f, 0x97, 0xfd, 0x85, 0x84, 0xbf, 0x03 } + }, + + { "data-50 key-25", /* Test 4 */ + /* Test with a combined length of key and data that is larger + * than 64 bytes (= block-size of SHA-224 and SHA-256). */ + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd\xcd" + "\xcd\xcd", + "\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10" + "\x11\x12\x13\x14\x15\x16\x17\x18\x19", + + { 0xa9, 0xd7, 0x68, 0x5a, 0x19, 0xc4, 0xe0, 0xdb, + 0xd9, 0xdf, 0x25, 0x56, 0xcc, 0x8a, 0x7d, 0x2a, + 0x77, 0x33, 0xb6, 0x76, 0x25, 0xce, 0x59, 0x4c, + 0x78, 0x27, 0x0e, 0xeb }, + { 0x57, 0x36, 0x6a, 0x45, 0xe2, 0x30, 0x53, 0x21, + 0xa4, 0xbc, 0x5a, 0xa5, 0xfe, 0x2e, 0xf8, 0xa9, + 0x21, 0xf6, 0xaf, 0x82, 0x73, 0xd7, 0xfe, 0x7b, + 0xe6, 0xcf, 0xed, 0xb3, 0xf0, 0xae, 0xa6, 0xd7 }, + { 0x3a, 0x5d, 0x7a, 0x87, 0x97, 0x02, 0xc0, 0x86, + 0xbc, 0x96, 0xd1, 0xdd, 0x8a, 0xa1, 0x5d, 0x9c, + 0x46, 0x44, 0x6b, 0x95, 0x52, 0x13, 0x11, 0xc6, + 0x06, 0xfd, 0xc4, 0xe3, 0x08, 0xf4, 0xb9, 0x84, + 0xda, 0x2d, 0x0f, 0x94, 0x49, 0xb3, 0xba, 0x84, + 0x25, 0xec, 0x7f, 0xb8, 0xc3, 0x1b, 0xc1, 0x36 }, + { 0xb2, 0x7e, 0xab, 0x1d, 0x6e, 0x8d, 0x87, 0x46, + 0x1c, 0x29, 0xf7, 0xf5, 0x73, 0x9d, 0xd5, 0x8e, + 0x98, 0xaa, 0x35, 0xf8, 0xe8, 0x23, 0xad, 0x38, + 0xc5, 0x49, 0x2a, 0x20, 0x88, 0xfa, 0x02, 0x81, + 0x99, 0x3b, 0xbf, 0xff, 0x9a, 0x0e, 0x9c, 0x6b, + 0xf1, 0x21, 0xae, 0x9e, 0xc9, 0xbb, 0x09, 0xd8, + 0x4a, 0x5e, 0xba, 0xc8, 0x17, 0x18, 0x2e, 0xa9, + 0x74, 0x67, 0x3f, 0xb1, 0x33, 0xca, 0x0d, 0x1d } + }, + + { "data-20 key-20 trunc", /* Test 5 */ + /* Test with a truncation of output to 128 bits. */ + "Test With Truncation", + "\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c\x0c" + "\x0c\x0c\x0c\x0c", + + { 0x49, 0xfd, 0xd3, 0xab, 0xd0, 0x05, 0xeb, 0xb8, + 0xae, 0x63, 0xfe, 0xa9, 0x46, 0xd1, 0x88, 0x3c }, + { 0x6e, 0x02, 0xc6, 0x45, 0x37, 0xfb, 0x11, 0x80, + 0x57, 0xab, 0xb7, 0xfb, 0x66, 0xa2, 0x3b, 0x3c }, + { 0x47, 0xc5, 0x1a, 0xce, 0x1f, 0xfa, 0xcf, 0xfd, + 0x74, 0x94, 0x72, 0x46, 0x82, 0x61, 0x57, 0x83 }, + { 0x0f, 0xa7, 0x47, 0x59, 0x48, 0xf4, 0x3f, 0x48, + 0xca, 0x05, 0x16, 0x67, 0x1e, 0x18, 0x97, 0x8c }, + 16 + }, + + { "data-54 key-131", /* Test 6 */ + /* Test with a key larger than 128 bytes (= block-size of + * SHA-384 and SHA-512). */ + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0xb4, 0xa1, 0xf0, 0x4c, 0x00, 0x28, 0x7a, 0x9b, + 0x7f, 0x60, 0x75, 0xb3, 0x13, 0xd2, 0x79, 0xb8, + 0x33, 0xbc, 0x8f, 0x75, 0x12, 0x43, 0x52, 0xd0, + 0x5f, 0xb9, 0x99, 0x5f }, + { 0xed, 0x73, 0xa3, 0x74, 0xb9, 0x6c, 0x00, 0x52, + 0x35, 0xf9, 0x48, 0x03, 0x2f, 0x09, 0x67, 0x4a, + 0x58, 0xc0, 0xce, 0x55, 0x5c, 0xfc, 0x1f, 0x22, + 0x3b, 0x02, 0x35, 0x65, 0x60, 0x31, 0x2c, 0x3b }, + { 0x0f, 0xc1, 0x95, 0x13, 0xbf, 0x6b, 0xd8, 0x78, + 0x03, 0x70, 0x16, 0x70, 0x6a, 0x0e, 0x57, 0xbc, + 0x52, 0x81, 0x39, 0x83, 0x6b, 0x9a, 0x42, 0xc3, + 0xd4, 0x19, 0xe4, 0x98, 0xe0, 0xe1, 0xfb, 0x96, + 0x16, 0xfd, 0x66, 0x91, 0x38, 0xd3, 0x3a, 0x11, + 0x05, 0xe0, 0x7c, 0x72, 0xb6, 0x95, 0x3b, 0xcc }, + { 0x00, 0xf7, 0x51, 0xa9, 0xe5, 0x06, 0x95, 0xb0, + 0x90, 0xed, 0x69, 0x11, 0xa4, 0xb6, 0x55, 0x24, + 0x95, 0x1c, 0xdc, 0x15, 0xa7, 0x3a, 0x5d, 0x58, + 0xbb, 0x55, 0x21, 0x5e, 0xa2, 0xcd, 0x83, 0x9a, + 0xc7, 0x9d, 0x2b, 0x44, 0xa3, 0x9b, 0xaf, 0xab, + 0x27, 0xe8, 0x3f, 0xde, 0x9e, 0x11, 0xf6, 0x34, + 0x0b, 0x11, 0xd9, 0x91, 0xb1, 0xb9, 0x1b, 0xf2, + 0xee, 0xe7, 0xfc, 0x87, 0x24, 0x26, 0xc3, 0xa4 } + }, + + { "data-54 key-147", /* Test 6a */ + /* Test with a key larger than 144 bytes (= block-size of + * SHA3-224). */ + "Test Using Larger Than Block-Size Key - Hash Key First", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0xb9, 0x6d, 0x73, 0x0c, 0x14, 0x8c, 0x2d, 0xaa, + 0xd8, 0x64, 0x9d, 0x83, 0xde, 0xfa, 0xa3, 0x71, + 0x97, 0x38, 0xd3, 0x47, 0x75, 0x39, 0x7b, 0x75, + 0x71, 0xc3, 0x85, 0x15 }, + { 0xa6, 0x07, 0x2f, 0x86, 0xde, 0x52, 0xb3, 0x8b, + 0xb3, 0x49, 0xfe, 0x84, 0xcd, 0x6d, 0x97, 0xfb, + 0x6a, 0x37, 0xc4, 0xc0, 0xf6, 0x2a, 0xae, 0x93, + 0x98, 0x11, 0x93, 0xa7, 0x22, 0x9d, 0x34, 0x67 }, + { 0x71, 0x3d, 0xff, 0x03, 0x02, 0xc8, 0x50, 0x86, + 0xec, 0x5a, 0xd0, 0x76, 0x8d, 0xd6, 0x5a, 0x13, + 0xdd, 0xd7, 0x90, 0x68, 0xd8, 0xd4, 0xc6, 0x21, + 0x2b, 0x71, 0x2e, 0x41, 0x64, 0x94, 0x49, 0x11, + 0x14, 0x80, 0x23, 0x00, 0x44, 0x18, 0x5a, 0x99, + 0x10, 0x3e, 0xd8, 0x20, 0x04, 0xdd, 0xbf, 0xcc }, + { 0xb1, 0x48, 0x35, 0xc8, 0x19, 0xa2, 0x90, 0xef, + 0xb0, 0x10, 0xac, 0xe6, 0xd8, 0x56, 0x8d, 0xc6, + 0xb8, 0x4d, 0xe6, 0x0b, 0xc4, 0x9b, 0x00, 0x4c, + 0x3b, 0x13, 0xed, 0xa7, 0x63, 0x58, 0x94, 0x51, + 0xe5, 0xdd, 0x74, 0x29, 0x28, 0x84, 0xd1, 0xbd, + 0xce, 0x64, 0xe6, 0xb9, 0x19, 0xdd, 0x61, 0xdc, + 0x9c, 0x56, 0xa2, 0x82, 0xa8, 0x1c, 0x0b, 0xd1, + 0x4f, 0x1f, 0x36, 0x5b, 0x49, 0xb8, 0x3a, 0x5b } + }, + + { "data-152 key-131", /* Test 7 */ + /* Test with a key and data that is larger than 128 bytes (= + * block-size of SHA-384 and SHA-512). */ + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0x05, 0xd8, 0xcd, 0x6d, 0x00, 0xfa, 0xea, 0x8d, + 0x1e, 0xb6, 0x8a, 0xde, 0x28, 0x73, 0x0b, 0xbd, + 0x3c, 0xba, 0xb6, 0x92, 0x9f, 0x0a, 0x08, 0x6b, + 0x29, 0xcd, 0x62, 0xa0 }, + { 0x65, 0xc5, 0xb0, 0x6d, 0x4c, 0x3d, 0xe3, 0x2a, + 0x7a, 0xef, 0x87, 0x63, 0x26, 0x1e, 0x49, 0xad, + 0xb6, 0xe2, 0x29, 0x3e, 0xc8, 0xe7, 0xc6, 0x1e, + 0x8d, 0xe6, 0x17, 0x01, 0xfc, 0x63, 0xe1, 0x23 }, + { 0x02, 0x6f, 0xdf, 0x6b, 0x50, 0x74, 0x1e, 0x37, + 0x38, 0x99, 0xc9, 0xf7, 0xd5, 0x40, 0x6d, 0x4e, + 0xb0, 0x9f, 0xc6, 0x66, 0x56, 0x36, 0xfc, 0x1a, + 0x53, 0x00, 0x29, 0xdd, 0xf5, 0xcf, 0x3c, 0xa5, + 0xa9, 0x00, 0xed, 0xce, 0x01, 0xf5, 0xf6, 0x1e, + 0x2f, 0x40, 0x8c, 0xdf, 0x2f, 0xd3, 0xe7, 0xe8 }, + { 0x38, 0xa4, 0x56, 0xa0, 0x04, 0xbd, 0x10, 0xd3, + 0x2c, 0x9a, 0xb8, 0x33, 0x66, 0x84, 0x11, 0x28, + 0x62, 0xc3, 0xdb, 0x61, 0xad, 0xcc, 0xa3, 0x18, + 0x29, 0x35, 0x5e, 0xaf, 0x46, 0xfd, 0x5c, 0x73, + 0xd0, 0x6a, 0x1f, 0x0d, 0x13, 0xfe, 0xc9, 0xa6, + 0x52, 0xfb, 0x38, 0x11, 0xb5, 0x77, 0xb1, 0xb1, + 0xd1, 0xb9, 0x78, 0x9f, 0x97, 0xae, 0x5b, 0x83, + 0xc6, 0xf4, 0x4d, 0xfc, 0xf1, 0xd6, 0x7e, 0xba } + }, + + { "data-152 key-147", /* Test 7a */ + /* Test with a key larger than 144 bytes (= block-size of + * SHA3-224). */ + "This is a test using a larger than block-size key and a larger " + "than block-size data. The key needs to be hashed before being " + "used by the HMAC algorithm.", + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa" + "\xaa\xaa\xaa", + + { 0xc7, 0x9c, 0x9b, 0x09, 0x34, 0x24, 0xe5, 0x88, + 0xa9, 0x87, 0x8b, 0xbc, 0xb0, 0x89, 0xe0, 0x18, + 0x27, 0x00, 0x96, 0xe9, 0xb4, 0xb1, 0xa9, 0xe8, + 0x22, 0x0c, 0x86, 0x6a }, + { 0xe6, 0xa3, 0x6d, 0x9b, 0x91, 0x5f, 0x86, 0xa0, + 0x93, 0xca, 0xc7, 0xd1, 0x10, 0xe9, 0xe0, 0x4c, + 0xf1, 0xd6, 0x10, 0x0d, 0x30, 0x47, 0x55, 0x09, + 0xc2, 0x47, 0x5f, 0x57, 0x1b, 0x75, 0x8b, 0x5a }, + { 0xca, 0xd1, 0x8a, 0x8f, 0xf6, 0xc4, 0xcc, 0x3a, + 0xd4, 0x87, 0xb9, 0x5f, 0x97, 0x69, 0xe9, 0xb6, + 0x1c, 0x06, 0x2a, 0xef, 0xd6, 0x95, 0x25, 0x69, + 0xe6, 0xe6, 0x42, 0x18, 0x97, 0x05, 0x4c, 0xfc, + 0x70, 0xb5, 0xfd, 0xc6, 0x60, 0x5c, 0x18, 0x45, + 0x71, 0x12, 0xfc, 0x6a, 0xaa, 0xd4, 0x55, 0x85 }, + { 0xdc, 0x03, 0x0e, 0xe7, 0x88, 0x70, 0x34, 0xf3, + 0x2c, 0xf4, 0x02, 0xdf, 0x34, 0x62, 0x2f, 0x31, + 0x1f, 0x3e, 0x6c, 0xf0, 0x48, 0x60, 0xc6, 0xbb, + 0xd7, 0xfa, 0x48, 0x86, 0x74, 0x78, 0x2b, 0x46, + 0x59, 0xfd, 0xbd, 0xf3, 0xfd, 0x87, 0x78, 0x52, + 0x88, 0x5c, 0xfe, 0x6e, 0x22, 0x18, 0x5f, 0xe7, + 0xb2, 0xee, 0x95, 0x20, 0x43, 0x62, 0x9b, 0xc9, + 0xd5, 0xf3, 0x29, 0x8a, 0x41, 0xd0, 0x2c, 0x66 } + }/*,*/ + + /* Our API does not allow to specify a bit count and thus we + * can't use the following test. */ + /* { "data-5bit key-4", /\* Test 8 *\/ */ + /* /\* Test with data bit size no multiple of 8, the data bits are */ + /* * '11001' from the NIST example using SHA-3 order (= 5 bits */ + /* * from LSB hex byte 13 or 5 bits from MSB hex byte c8). *\/ */ + /* "\xc8", */ + /* "Jefe", */ + + /* { 0x5f, 0x8c, 0x0e, 0xa7, 0xfa, 0xfe, 0xcd, 0x0c, */ + /* 0x34, 0x63, 0xaa, 0xd0, 0x97, 0x42, 0xce, 0xce, */ + /* 0xb1, 0x42, 0xfe, 0x0a, 0xb6, 0xf4, 0x53, 0x94, */ + /* 0x38, 0xc5, 0x9d, 0xe8 }, */ + /* { 0xec, 0x82, 0x22, 0x77, 0x3f, 0xac, 0x68, 0xb3, */ + /* 0xd3, 0xdc, 0xb1, 0x82, 0xae, 0xc8, 0xb0, 0x50, */ + /* 0x7a, 0xce, 0x44, 0x48, 0xd2, 0x0a, 0x11, 0x47, */ + /* 0xe6, 0x82, 0x11, 0x8d, 0xa4, 0xe3, 0xf4, 0x4c }, */ + /* { 0x21, 0xfb, 0xd3, 0xbf, 0x3e, 0xbb, 0xa3, 0xcf, */ + /* 0xc9, 0xef, 0x64, 0xc0, 0x59, 0x1c, 0x92, 0xc5, */ + /* 0xac, 0xb2, 0x65, 0xe9, 0x2d, 0x87, 0x61, 0xd1, */ + /* 0xf9, 0x1a, 0x52, 0xa1, 0x03, 0xa6, 0xc7, 0x96, */ + /* 0x94, 0xcf, 0xd6, 0x7a, 0x9a, 0x2a, 0xc1, 0x32, */ + /* 0x4f, 0x02, 0xfe, 0xa6, 0x3b, 0x81, 0xef, 0xfc }, */ + /* { 0x27, 0xf9, 0x38, 0x8c, 0x15, 0x67, 0xef, 0x4e, */ + /* 0xf2, 0x00, 0x60, 0x2a, 0x6c, 0xf8, 0x71, 0xd6, */ + /* 0x8a, 0x6f, 0xb0, 0x48, 0xd4, 0x73, 0x7a, 0xc4, */ + /* 0x41, 0x8a, 0x2f, 0x02, 0x12, 0x89, 0xd1, 0x3d, */ + /* 0x1f, 0xd1, 0x12, 0x0f, 0xec, 0xb9, 0xcf, 0x96, */ + /* 0x4c, 0x5b, 0x11, 0x7a, 0xb5, 0xb1, 0x1c, 0x61, */ + /* 0x4b, 0x2d, 0xa3, 0x9d, 0xad, 0xd5, 0x1f, 0x2f, */ + /* 0x5e, 0x22, 0xaa, 0xcc, 0xec, 0x7d, 0x57, 0x6e } */ + /* } */ + + }; + const char *what; + const char *errtxt; + int tvidx; + const char *expect; + int nexpect; + + for (tvidx=0; tvidx < DIM(tv); tvidx++) + { + what = tv[tvidx].desc; + if (hashalgo == GCRY_MD_SHA3_224) + { + expect = tv[tvidx].expect_224; + nexpect = DIM (tv[tvidx].expect_224); + } + else if (hashalgo == GCRY_MD_SHA3_256) + { + expect = tv[tvidx].expect_256; + nexpect = DIM (tv[tvidx].expect_256); + } + else if (hashalgo == GCRY_MD_SHA3_384) + { + expect = tv[tvidx].expect_384; + nexpect = DIM (tv[tvidx].expect_384); + } + else if (hashalgo == GCRY_MD_SHA3_512) + { + expect = tv[tvidx].expect_512; + nexpect = DIM (tv[tvidx].expect_512); + } + else + BUG(); + + if (tv[tvidx].trunc && tv[tvidx].trunc < nexpect) + nexpect = tv[tvidx].trunc; + + errtxt = check_one (hashalgo, + tv[tvidx].data, strlen (tv[tvidx].data), + tv[tvidx].key, strlen (tv[tvidx].key), + expect, nexpect, !!tv[tvidx].trunc); + if (errtxt) + goto failed; + if (!extended) + break; + } + + return 0; /* Succeeded. */ + + failed: + if (report) + report ("hmac", hashalgo, what, errtxt); + return GPG_ERR_SELFTEST_FAILED; +} + + +static gpg_err_code_t +hmac_selftest (int algo, int extended, selftest_report_func_t report) +{ + gpg_err_code_t ec; + + switch (algo) + { + case GCRY_MAC_HMAC_SHA1: + ec = selftests_sha1 (extended, report); + break; + case GCRY_MAC_HMAC_SHA224: + ec = selftests_sha224 (extended, report); + break; + case GCRY_MAC_HMAC_SHA256: + ec = selftests_sha256 (extended, report); + break; + case GCRY_MAC_HMAC_SHA384: + ec = selftests_sha384 (extended, report); + break; + case GCRY_MAC_HMAC_SHA512: + ec = selftests_sha512 (extended, report); + break; + + case GCRY_MAC_HMAC_SHA3_224: + case GCRY_MAC_HMAC_SHA3_256: + case GCRY_MAC_HMAC_SHA3_384: + case GCRY_MAC_HMAC_SHA3_512: + { + int md_algo = map_mac_algo_to_md (algo); + ec = selftests_sha3 (md_algo, extended, report); + } + break; + + default: + ec = GPG_ERR_MAC_ALGO; + break; + } + + return ec; +} + + +static const gcry_mac_spec_ops_t hmac_ops = { + hmac_open, + hmac_close, + hmac_setkey, + NULL, + hmac_reset, + hmac_write, + hmac_read, + hmac_verify, + hmac_get_maclen, + hmac_get_keylen, + NULL, + hmac_selftest +}; + + +#if USE_SHA1 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1 = { + GCRY_MAC_HMAC_SHA1, {0, 1}, "HMAC_SHA1", + &hmac_ops +}; +#endif +#if USE_SHA256 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256 = { + GCRY_MAC_HMAC_SHA256, {0, 1}, "HMAC_SHA256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224 = { + GCRY_MAC_HMAC_SHA224, {0, 1}, "HMAC_SHA224", + &hmac_ops +}; +#endif +#if USE_SHA512 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512 = { + GCRY_MAC_HMAC_SHA512, {0, 1}, "HMAC_SHA512", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384 = { + GCRY_MAC_HMAC_SHA384, {0, 1}, "HMAC_SHA384", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_256 = { + GCRY_MAC_HMAC_SHA512_256, {0, 1}, "HMAC_SHA512_256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_224 = { + GCRY_MAC_HMAC_SHA512_224, {0, 1}, "HMAC_SHA512_224", + &hmac_ops +}; + +#endif +#if USE_SHA3 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224 = { + GCRY_MAC_HMAC_SHA3_224, {0, 1}, "HMAC_SHA3_224", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256 = { + GCRY_MAC_HMAC_SHA3_256, {0, 1}, "HMAC_SHA3_256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384 = { + GCRY_MAC_HMAC_SHA3_384, {0, 1}, "HMAC_SHA3_384", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512 = { + GCRY_MAC_HMAC_SHA3_512, {0, 1}, "HMAC_SHA3_512", + &hmac_ops +}; +#endif +#ifdef USE_GOST_R_3411_94 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94 = { + GCRY_MAC_HMAC_GOSTR3411_94, {0, 0}, "HMAC_GOSTR3411_94", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_cp = { + GCRY_MAC_HMAC_GOSTR3411_CP, {0, 0}, "HMAC_GOSTR3411_CP", + &hmac_ops +}; +#endif +#ifdef USE_GOST_R_3411_12 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256 = { + GCRY_MAC_HMAC_STRIBOG256, {0, 0}, "HMAC_STRIBOG256", + &hmac_ops +}; + +gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512 = { + GCRY_MAC_HMAC_STRIBOG512, {0, 0}, "HMAC_STRIBOG512", + &hmac_ops +}; +#endif +#if USE_WHIRLPOOL +gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool = { + GCRY_MAC_HMAC_WHIRLPOOL, {0, 0}, "HMAC_WHIRLPOOL", + &hmac_ops +}; +#endif +#if USE_RMD160 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160 = { + GCRY_MAC_HMAC_RMD160, {0, 0}, "HMAC_RIPEMD160", + &hmac_ops +}; +#endif +#if USE_TIGER +gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1 = { + GCRY_MAC_HMAC_TIGER1, {0, 0}, "HMAC_TIGER", + &hmac_ops +}; +#endif +#if USE_MD5 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5 = { + GCRY_MAC_HMAC_MD5, {0, 0}, "HMAC_MD5", + &hmac_ops +}; +#endif +#if USE_MD4 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4 = { + GCRY_MAC_HMAC_MD4, {0, 0}, "HMAC_MD4", + &hmac_ops +}; +#endif +#if USE_MD2 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_md2 = { + GCRY_MAC_HMAC_MD2, {0, 0}, "HMAC_MD2", + &hmac_ops +}; +#endif +#if USE_BLAKE2 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_512 = { + GCRY_MAC_HMAC_BLAKE2B_512, {0, 0}, "HMAC_BLAKE2B_512", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_384 = { + GCRY_MAC_HMAC_BLAKE2B_384, {0, 0}, "HMAC_BLAKE2B_384", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_256 = { + GCRY_MAC_HMAC_BLAKE2B_256, {0, 0}, "HMAC_BLAKE2B_256", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_160 = { + GCRY_MAC_HMAC_BLAKE2B_160, {0, 0}, "HMAC_BLAKE2B_160", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_256 = { + GCRY_MAC_HMAC_BLAKE2S_256, {0, 0}, "HMAC_BLAKE2S_256", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_224 = { + GCRY_MAC_HMAC_BLAKE2S_224, {0, 0}, "HMAC_BLAKE2S_224", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_160 = { + GCRY_MAC_HMAC_BLAKE2S_160, {0, 0}, "HMAC_BLAKE2S_160", + &hmac_ops +}; +gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_128 = { + GCRY_MAC_HMAC_BLAKE2S_128, {0, 0}, "HMAC_BLAKE2S_128", + &hmac_ops +}; +#endif +#if USE_SM3 +gcry_mac_spec_t _gcry_mac_type_spec_hmac_sm3 = { + GCRY_MAC_HMAC_SM3, {0, 0}, "HMAC_SM3", + &hmac_ops +}; +#endif diff --git a/comm/third_party/libgcrypt/cipher/mac-internal.h b/comm/third_party/libgcrypt/cipher/mac-internal.h new file mode 100644 index 0000000000..e49885beec --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/mac-internal.h @@ -0,0 +1,275 @@ +/* mac-internal.h - Internal defs for mac.c + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include + +#include "g10lib.h" +#include "cipher-proto.h" +#include "gost.h" + + +/* The data object used to hold a handle to an encryption object. */ +struct gcry_mac_handle; + +/* The data object used to hold poly1305-mac context. */ +struct poly1305mac_context_s; + + +/* + * + * Message authentication code related definitions. + * + */ + + +/* Magic values for the context structure. */ +#define CTX_MAC_MAGIC_NORMAL 0x59d9b8af +#define CTX_MAC_MAGIC_SECURE 0x12c27cd0 + + +/* MAC module functions. */ +typedef gcry_err_code_t (*gcry_mac_open_func_t)(gcry_mac_hd_t h); +typedef void (*gcry_mac_close_func_t)(gcry_mac_hd_t h); +typedef gcry_err_code_t (*gcry_mac_setkey_func_t)(gcry_mac_hd_t h, + const unsigned char *key, + size_t keylen); +typedef gcry_err_code_t (*gcry_mac_setiv_func_t)(gcry_mac_hd_t h, + const unsigned char *iv, + size_t ivlen); +typedef gcry_err_code_t (*gcry_mac_reset_func_t)(gcry_mac_hd_t h); +typedef gcry_err_code_t (*gcry_mac_write_func_t)(gcry_mac_hd_t h, + const unsigned char *inbuf, + size_t inlen); +typedef gcry_err_code_t (*gcry_mac_read_func_t)(gcry_mac_hd_t h, + unsigned char *outbuf, + size_t *outlen); +typedef gcry_err_code_t (*gcry_mac_verify_func_t)(gcry_mac_hd_t h, + const unsigned char *inbuf, + size_t inlen); +typedef unsigned int (*gcry_mac_get_maclen_func_t)(int algo); +typedef unsigned int (*gcry_mac_get_keylen_func_t)(int algo); + +/* The type used to convey additional information to a MAC. */ +typedef gpg_err_code_t (*gcry_mac_set_extra_info_t) + (gcry_mac_hd_t h, int what, const void *buffer, size_t buflen); + +typedef struct gcry_mac_spec_ops +{ + gcry_mac_open_func_t open; + gcry_mac_close_func_t close; + gcry_mac_setkey_func_t setkey; + gcry_mac_setiv_func_t setiv; + gcry_mac_reset_func_t reset; + gcry_mac_write_func_t write; + gcry_mac_read_func_t read; + gcry_mac_verify_func_t verify; + gcry_mac_get_maclen_func_t get_maclen; + gcry_mac_get_keylen_func_t get_keylen; + gcry_mac_set_extra_info_t set_extra_info; + selftest_func_t selftest; +} gcry_mac_spec_ops_t; + + +/* Module specification structure for message authentication codes. */ +typedef struct gcry_mac_spec +{ + int algo; + struct { + unsigned int disabled:1; + unsigned int fips:1; + } flags; + const char *name; + const gcry_mac_spec_ops_t *ops; +} gcry_mac_spec_t; + +/* The handle structure. */ +struct gcry_mac_handle +{ + int magic; + int algo; + const gcry_mac_spec_t *spec; + gcry_ctx_t gcry_ctx; + union { + struct { + gcry_md_hd_t md_ctx; + int md_algo; + } hmac; + struct { + gcry_cipher_hd_t ctx; + int cipher_algo; + unsigned int blklen; + } cmac; + struct { + gcry_cipher_hd_t ctx; + int cipher_algo; + } gmac; + struct { + struct poly1305mac_context_s *ctx; + } poly1305mac; + struct { + GOST28147_context ctx; + u32 n1, n2; + unsigned int unused; + unsigned int count; + unsigned char lastiv[8]; /* IMIT blocksize */ + } imit; + } u; +}; + + +/* + * The HMAC algorithm specifications (mac-hmac.c). + */ +#if USE_SHA1 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha1; +#endif +#if USE_SHA256 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha224; +#endif +#if USE_SHA512 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha384; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_224; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha512_256; +#endif +#if USE_SHA3 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_224; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_384; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sha3_512; +#endif +#ifdef USE_GOST_R_3411_94 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_94; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_gost3411_cp; +#endif +#ifdef USE_GOST_R_3411_12 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_stribog512; +#endif +#if USE_WHIRLPOOL +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_whirlpool; +#endif +#if USE_RMD160 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_rmd160; +#endif +#if USE_TIGER +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_tiger1; +#endif +#if USE_MD5 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_md5; +#endif +#if USE_MD4 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_md4; +#endif +#if USE_BLAKE2 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_512; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_384; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2b_160; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_256; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_224; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_160; +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_blake2s_128; +#endif +#if USE_SM3 +extern gcry_mac_spec_t _gcry_mac_type_spec_hmac_sm3; +#endif + +/* + * The CMAC algorithm specifications (mac-cmac.c). + */ +#if USE_BLOWFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_blowfish; +#endif +#if USE_DES +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_tripledes; +#endif +#if USE_CAST5 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_cast5; +#endif +#if USE_AES +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_aes; +#endif +#if USE_TWOFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_twofish; +#endif +#if USE_SERPENT +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_serpent; +#endif +#if USE_RFC2268 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_rfc2268; +#endif +#if USE_SEED +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_seed; +#endif +#if USE_CAMELLIA +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_camellia; +#endif +#ifdef USE_IDEA +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_idea; +#endif +#if USE_GOST28147 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_gost28147; +#endif +#if USE_GOST28147 +extern gcry_mac_spec_t _gcry_mac_type_spec_gost28147_imit; +#endif +#if USE_SM4 +extern gcry_mac_spec_t _gcry_mac_type_spec_cmac_sm4; +#endif + +/* + * The GMAC algorithm specifications (mac-gmac.c). + */ +#if USE_AES +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_aes; +#endif +#if USE_TWOFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_twofish; +#endif +#if USE_SERPENT +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_serpent; +#endif +#if USE_SEED +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_seed; +#endif +#if USE_CAMELLIA +extern gcry_mac_spec_t _gcry_mac_type_spec_gmac_camellia; +#endif + +/* + * The Poly1305 MAC algorithm specifications (mac-poly1305.c). + */ +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac; +#if USE_AES +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes; +#endif +#if USE_CAMELLIA +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia; +#endif +#if USE_TWOFISH +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish; +#endif +#if USE_SERPENT +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent; +#endif +#if USE_SEED +extern gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed; +#endif diff --git a/comm/third_party/libgcrypt/cipher/mac-poly1305.c b/comm/third_party/libgcrypt/cipher/mac-poly1305.c new file mode 100644 index 0000000000..46ea735f89 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/mac-poly1305.c @@ -0,0 +1,364 @@ +/* mac-poly1305.c - Poly1305 based MACs + * Copyright (C) 2014 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mac-internal.h" +#include "poly1305-internal.h" + + +struct poly1305mac_context_s { + poly1305_context_t ctx; + gcry_cipher_hd_t hd; + struct { + unsigned int key_set:1; + unsigned int nonce_set:1; + unsigned int tag:1; + } marks; + byte tag[POLY1305_TAGLEN]; + byte key[POLY1305_KEYLEN]; +}; + + +static gcry_err_code_t +poly1305mac_open (gcry_mac_hd_t h) +{ + struct poly1305mac_context_s *mac_ctx; + int secure = (h->magic == CTX_MAC_MAGIC_SECURE); + unsigned int flags = (secure ? GCRY_CIPHER_SECURE : 0); + gcry_err_code_t err; + int cipher_algo; + + if (secure) + mac_ctx = xtrycalloc_secure (1, sizeof(*mac_ctx)); + else + mac_ctx = xtrycalloc (1, sizeof(*mac_ctx)); + + if (!mac_ctx) + return gpg_err_code_from_syserror (); + + h->u.poly1305mac.ctx = mac_ctx; + + switch (h->spec->algo) + { + default: + /* already checked. */ + case GCRY_MAC_POLY1305: + /* plain Poly1305. */ + cipher_algo = -1; + return 0; + case GCRY_MAC_POLY1305_AES: + cipher_algo = GCRY_CIPHER_AES; + break; + case GCRY_MAC_POLY1305_CAMELLIA: + cipher_algo = GCRY_CIPHER_CAMELLIA128; + break; + case GCRY_MAC_POLY1305_TWOFISH: + cipher_algo = GCRY_CIPHER_TWOFISH; + break; + case GCRY_MAC_POLY1305_SERPENT: + cipher_algo = GCRY_CIPHER_SERPENT128; + break; + case GCRY_MAC_POLY1305_SEED: + cipher_algo = GCRY_CIPHER_SEED; + break; + } + + err = _gcry_cipher_open_internal (&mac_ctx->hd, cipher_algo, + GCRY_CIPHER_MODE_ECB, flags); + if (err) + goto err_free; + + return 0; + +err_free: + xfree(h->u.poly1305mac.ctx); + return err; +} + + +static void +poly1305mac_close (gcry_mac_hd_t h) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (h->spec->algo != GCRY_MAC_POLY1305) + _gcry_cipher_close (mac_ctx->hd); + + xfree(mac_ctx); +} + + +static gcry_err_code_t +poly1305mac_prepare_key (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + size_t block_keylen = keylen - 16; + + /* Need at least 16 + 1 byte key. */ + if (keylen <= 16) + return GPG_ERR_INV_KEYLEN; + + /* For Poly1305-AES, first part of key is passed to Poly1305 as is. */ + memcpy (mac_ctx->key, key + block_keylen, 16); + + /* Remaining part is used as key for the block cipher. */ + return _gcry_cipher_setkey (mac_ctx->hd, key, block_keylen); +} + + +static gcry_err_code_t +poly1305mac_setkey (gcry_mac_hd_t h, const unsigned char *key, size_t keylen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + gcry_err_code_t err; + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag)); + memset(&mac_ctx->key, 0, sizeof(mac_ctx->key)); + + mac_ctx->marks.key_set = 0; + mac_ctx->marks.nonce_set = 0; + mac_ctx->marks.tag = 0; + + if (h->spec->algo != GCRY_MAC_POLY1305) + { + err = poly1305mac_prepare_key (h, key, keylen); + if (err) + return err; + + /* Poly1305-AES/etc also need nonce. */ + mac_ctx->marks.key_set = 1; + mac_ctx->marks.nonce_set = 0; + } + else + { + /* For plain Poly1305, key is the nonce and setup is complete now. */ + + if (keylen != POLY1305_KEYLEN) + return GPG_ERR_INV_KEYLEN; + + memcpy (mac_ctx->key, key, keylen); + + err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN); + if (err) + { + memset(&mac_ctx->key, 0, sizeof(mac_ctx->key)); + return err; + } + + mac_ctx->marks.key_set = 1; + mac_ctx->marks.nonce_set = 1; + } + + return 0; +} + + +static gcry_err_code_t +poly1305mac_setiv (gcry_mac_hd_t h, const unsigned char *iv, size_t ivlen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + gcry_err_code_t err; + + if (h->spec->algo == GCRY_MAC_POLY1305) + return GPG_ERR_INV_ARG; + + if (ivlen != 16) + return GPG_ERR_INV_ARG; + + if (!mac_ctx->marks.key_set) + return 0; + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag)); + mac_ctx->marks.nonce_set = 0; + mac_ctx->marks.tag = 0; + + /* Prepare second part of the poly1305 key. */ + + err = _gcry_cipher_encrypt (mac_ctx->hd, mac_ctx->key + 16, 16, iv, 16); + if (err) + return err; + + err = _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN); + if (err) + return err; + + mac_ctx->marks.nonce_set = 1; + return 0; +} + + +static gcry_err_code_t +poly1305mac_reset (gcry_mac_hd_t h) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set) + return GPG_ERR_INV_STATE; + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + memset(&mac_ctx->tag, 0, sizeof(mac_ctx->tag)); + + mac_ctx->marks.key_set = 1; + mac_ctx->marks.nonce_set = 1; + mac_ctx->marks.tag = 0; + + return _gcry_poly1305_init (&mac_ctx->ctx, mac_ctx->key, POLY1305_KEYLEN); +} + + +static gcry_err_code_t +poly1305mac_write (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set || + mac_ctx->marks.tag) + return GPG_ERR_INV_STATE; + + _gcry_poly1305_update (&mac_ctx->ctx, buf, buflen); + return 0; +} + + +static gcry_err_code_t +poly1305mac_read (gcry_mac_hd_t h, unsigned char *outbuf, size_t *outlen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + + if (!mac_ctx->marks.key_set || !mac_ctx->marks.nonce_set) + return GPG_ERR_INV_STATE; + + if (!mac_ctx->marks.tag) + { + _gcry_poly1305_finish(&mac_ctx->ctx, mac_ctx->tag); + + memset(&mac_ctx->ctx, 0, sizeof(mac_ctx->ctx)); + mac_ctx->marks.tag = 1; + } + + if (*outlen == 0) + return 0; + + if (*outlen <= POLY1305_TAGLEN) + buf_cpy (outbuf, mac_ctx->tag, *outlen); + else + { + buf_cpy (outbuf, mac_ctx->tag, POLY1305_TAGLEN); + *outlen = POLY1305_TAGLEN; + } + + return 0; +} + + +static gcry_err_code_t +poly1305mac_verify (gcry_mac_hd_t h, const unsigned char *buf, size_t buflen) +{ + struct poly1305mac_context_s *mac_ctx = h->u.poly1305mac.ctx; + gcry_err_code_t err; + size_t outlen = 0; + + /* Check and finalize tag. */ + err = poly1305mac_read(h, NULL, &outlen); + if (err) + return err; + + if (buflen > POLY1305_TAGLEN) + return GPG_ERR_INV_LENGTH; + + return buf_eq_const (buf, mac_ctx->tag, buflen) ? 0 : GPG_ERR_CHECKSUM; +} + + +static unsigned int +poly1305mac_get_maclen (int algo) +{ + (void)algo; + + return POLY1305_TAGLEN; +} + + +static unsigned int +poly1305mac_get_keylen (int algo) +{ + (void)algo; + + return POLY1305_KEYLEN; +} + + +static gcry_mac_spec_ops_t poly1305mac_ops = { + poly1305mac_open, + poly1305mac_close, + poly1305mac_setkey, + poly1305mac_setiv, + poly1305mac_reset, + poly1305mac_write, + poly1305mac_read, + poly1305mac_verify, + poly1305mac_get_maclen, + poly1305mac_get_keylen, + NULL, + NULL, +}; + + +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac = { + GCRY_MAC_POLY1305, {0, 0}, "POLY1305", + &poly1305mac_ops +}; +#if USE_AES +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_aes = { + GCRY_MAC_POLY1305_AES, {0, 0}, "POLY1305_AES", + &poly1305mac_ops +}; +#endif +#if USE_CAMELLIA +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_camellia = { + GCRY_MAC_POLY1305_CAMELLIA, {0, 0}, "POLY1305_CAMELLIA", + &poly1305mac_ops +}; +#endif +#if USE_TWOFISH +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_twofish = { + GCRY_MAC_POLY1305_TWOFISH, {0, 0}, "POLY1305_TWOFISH", + &poly1305mac_ops +}; +#endif +#if USE_SERPENT +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_serpent = { + GCRY_MAC_POLY1305_SERPENT, {0, 0}, "POLY1305_SERPENT", + &poly1305mac_ops +}; +#endif +#if USE_SEED +gcry_mac_spec_t _gcry_mac_type_spec_poly1305mac_seed = { + GCRY_MAC_POLY1305_SEED, {0, 0}, "POLY1305_SEED", + &poly1305mac_ops +}; +#endif diff --git a/comm/third_party/libgcrypt/cipher/mac.c b/comm/third_party/libgcrypt/cipher/mac.c new file mode 100644 index 0000000000..babe99e3a8 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/mac.c @@ -0,0 +1,808 @@ +/* mac.c - message authentication code dispatcher + * Copyright (C) 2013 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "mac-internal.h" + + +/* This is the list of the digest implementations included in + libgcrypt. */ +static gcry_mac_spec_t * const mac_list[] = { +#if USE_SHA1 + &_gcry_mac_type_spec_hmac_sha1, +#endif +#if USE_SHA256 + &_gcry_mac_type_spec_hmac_sha256, + &_gcry_mac_type_spec_hmac_sha224, +#endif +#if USE_SHA512 + &_gcry_mac_type_spec_hmac_sha512, + &_gcry_mac_type_spec_hmac_sha384, + &_gcry_mac_type_spec_hmac_sha512_256, + &_gcry_mac_type_spec_hmac_sha512_224, +#endif +#if USE_SHA3 + &_gcry_mac_type_spec_hmac_sha3_224, + &_gcry_mac_type_spec_hmac_sha3_256, + &_gcry_mac_type_spec_hmac_sha3_384, + &_gcry_mac_type_spec_hmac_sha3_512, +#endif +#ifdef USE_GOST_R_3411_94 + &_gcry_mac_type_spec_hmac_gost3411_94, + &_gcry_mac_type_spec_hmac_gost3411_cp, +#endif +#ifdef USE_GOST_R_3411_12 + &_gcry_mac_type_spec_hmac_stribog256, + &_gcry_mac_type_spec_hmac_stribog512, +#endif +#if USE_WHIRLPOOL + &_gcry_mac_type_spec_hmac_whirlpool, +#endif +#if USE_RMD160 + &_gcry_mac_type_spec_hmac_rmd160, +#endif +#if USE_TIGER + &_gcry_mac_type_spec_hmac_tiger1, +#endif +#if USE_MD5 + &_gcry_mac_type_spec_hmac_md5, +#endif +#if USE_MD4 + &_gcry_mac_type_spec_hmac_md4, +#endif +#if USE_BLAKE2 + &_gcry_mac_type_spec_hmac_blake2b_512, + &_gcry_mac_type_spec_hmac_blake2b_384, + &_gcry_mac_type_spec_hmac_blake2b_256, + &_gcry_mac_type_spec_hmac_blake2b_160, + &_gcry_mac_type_spec_hmac_blake2s_256, + &_gcry_mac_type_spec_hmac_blake2s_224, + &_gcry_mac_type_spec_hmac_blake2s_160, + &_gcry_mac_type_spec_hmac_blake2s_128, +#endif +#if USE_SM3 + &_gcry_mac_type_spec_hmac_sm3, +#endif +#if USE_BLOWFISH + &_gcry_mac_type_spec_cmac_blowfish, +#endif +#if USE_DES + &_gcry_mac_type_spec_cmac_tripledes, +#endif +#if USE_CAST5 + &_gcry_mac_type_spec_cmac_cast5, +#endif +#if USE_AES + &_gcry_mac_type_spec_cmac_aes, + &_gcry_mac_type_spec_gmac_aes, + &_gcry_mac_type_spec_poly1305mac_aes, +#endif +#if USE_TWOFISH + &_gcry_mac_type_spec_cmac_twofish, + &_gcry_mac_type_spec_gmac_twofish, + &_gcry_mac_type_spec_poly1305mac_twofish, +#endif +#if USE_SERPENT + &_gcry_mac_type_spec_cmac_serpent, + &_gcry_mac_type_spec_gmac_serpent, + &_gcry_mac_type_spec_poly1305mac_serpent, +#endif +#if USE_RFC2268 + &_gcry_mac_type_spec_cmac_rfc2268, +#endif +#if USE_SEED + &_gcry_mac_type_spec_cmac_seed, + &_gcry_mac_type_spec_gmac_seed, + &_gcry_mac_type_spec_poly1305mac_seed, +#endif +#if USE_CAMELLIA + &_gcry_mac_type_spec_cmac_camellia, + &_gcry_mac_type_spec_gmac_camellia, + &_gcry_mac_type_spec_poly1305mac_camellia, +#endif +#ifdef USE_IDEA + &_gcry_mac_type_spec_cmac_idea, +#endif +#if USE_GOST28147 + &_gcry_mac_type_spec_cmac_gost28147, + &_gcry_mac_type_spec_gost28147_imit, +#endif + &_gcry_mac_type_spec_poly1305mac, +#if USE_SM4 + &_gcry_mac_type_spec_cmac_sm4, +#endif + NULL, +}; + +/* HMAC implementations start with index 101 (enum gcry_mac_algos) */ +static gcry_mac_spec_t * const mac_list_algo101[] = + { +#if USE_SHA256 + &_gcry_mac_type_spec_hmac_sha256, + &_gcry_mac_type_spec_hmac_sha224, +#else + NULL, + NULL, +#endif +#if USE_SHA512 + &_gcry_mac_type_spec_hmac_sha512, + &_gcry_mac_type_spec_hmac_sha384, +#else + NULL, + NULL, +#endif +#if USE_SHA1 + &_gcry_mac_type_spec_hmac_sha1, +#else + NULL, +#endif +#if USE_MD5 + &_gcry_mac_type_spec_hmac_md5, +#else + NULL, +#endif +#if USE_MD4 + &_gcry_mac_type_spec_hmac_md4, +#else + NULL, +#endif +#if USE_RMD160 + &_gcry_mac_type_spec_hmac_rmd160, +#else + NULL, +#endif +#if USE_TIGER + &_gcry_mac_type_spec_hmac_tiger1, +#else + NULL, +#endif +#if USE_WHIRLPOOL + &_gcry_mac_type_spec_hmac_whirlpool, +#else + NULL, +#endif +#ifdef USE_GOST_R_3411_94 + &_gcry_mac_type_spec_hmac_gost3411_94, +#else + NULL, +#endif +#ifdef USE_GOST_R_3411_12 + &_gcry_mac_type_spec_hmac_stribog256, + &_gcry_mac_type_spec_hmac_stribog512, +#else + NULL, + NULL, +#endif +#if USE_MD2 + &_gcry_mac_type_spec_hmac_md2, +#else + NULL, +#endif +#if USE_SHA3 + &_gcry_mac_type_spec_hmac_sha3_224, + &_gcry_mac_type_spec_hmac_sha3_256, + &_gcry_mac_type_spec_hmac_sha3_384, + &_gcry_mac_type_spec_hmac_sha3_512, +#else + NULL, + NULL, + NULL, + NULL, +#endif +#ifdef USE_GOST_R_3411_94 + &_gcry_mac_type_spec_hmac_gost3411_cp, +#else + NULL, +#endif +#if USE_BLAKE2 + &_gcry_mac_type_spec_hmac_blake2b_512, + &_gcry_mac_type_spec_hmac_blake2b_384, + &_gcry_mac_type_spec_hmac_blake2b_256, + &_gcry_mac_type_spec_hmac_blake2b_160, + &_gcry_mac_type_spec_hmac_blake2s_256, + &_gcry_mac_type_spec_hmac_blake2s_224, + &_gcry_mac_type_spec_hmac_blake2s_160, + &_gcry_mac_type_spec_hmac_blake2s_128, +#else + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +#endif +#if USE_SM3 + &_gcry_mac_type_spec_hmac_sm3, +#else + NULL, +#endif +#if USE_SHA512 + &_gcry_mac_type_spec_hmac_sha512_256, + &_gcry_mac_type_spec_hmac_sha512_224, +#else + NULL, + NULL, +#endif + }; + +/* CMAC implementations start with index 201 (enum gcry_mac_algos) */ +static gcry_mac_spec_t * const mac_list_algo201[] = + { +#if USE_AES + &_gcry_mac_type_spec_cmac_aes, +#else + NULL, +#endif +#if USE_DES + &_gcry_mac_type_spec_cmac_tripledes, +#else + NULL, +#endif +#if USE_CAMELLIA + &_gcry_mac_type_spec_cmac_camellia, +#else + NULL, +#endif +#if USE_CAST5 + &_gcry_mac_type_spec_cmac_cast5, +#else + NULL, +#endif +#if USE_BLOWFISH + &_gcry_mac_type_spec_cmac_blowfish, +#else + NULL, +#endif +#if USE_TWOFISH + &_gcry_mac_type_spec_cmac_twofish, +#else + NULL, +#endif +#if USE_SERPENT + &_gcry_mac_type_spec_cmac_serpent, +#else + NULL, +#endif +#if USE_SEED + &_gcry_mac_type_spec_cmac_seed, +#else + NULL, +#endif +#if USE_RFC2268 + &_gcry_mac_type_spec_cmac_rfc2268, +#else + NULL, +#endif +#ifdef USE_IDEA + &_gcry_mac_type_spec_cmac_idea, +#else + NULL, +#endif +#if USE_GOST28147 + &_gcry_mac_type_spec_cmac_gost28147, +#else + NULL, +#endif +#if USE_SM4 + &_gcry_mac_type_spec_cmac_sm4 +#else + NULL +#endif + }; + +/* GMAC implementations start with index 401 (enum gcry_mac_algos) */ +static gcry_mac_spec_t * const mac_list_algo401[] = + { +#if USE_AES + &_gcry_mac_type_spec_gmac_aes, +#else + NULL, +#endif +#if USE_CAMELLIA + &_gcry_mac_type_spec_gmac_camellia, +#else + NULL, +#endif +#if USE_TWOFISH + &_gcry_mac_type_spec_gmac_twofish, +#else + NULL, +#endif +#if USE_SERPENT + &_gcry_mac_type_spec_gmac_serpent, +#else + NULL, +#endif +#if USE_SEED + &_gcry_mac_type_spec_gmac_seed +#else + NULL +#endif + }; + +/* Poly1305-MAC implementations start with index 501 (enum gcry_mac_algos) */ +static gcry_mac_spec_t * const mac_list_algo501[] = + { + &_gcry_mac_type_spec_poly1305mac, +#if USE_AES + &_gcry_mac_type_spec_poly1305mac_aes, +#else + NULL, +#endif +#if USE_CAMELLIA + &_gcry_mac_type_spec_poly1305mac_camellia, +#else + NULL, +#endif +#if USE_TWOFISH + &_gcry_mac_type_spec_poly1305mac_twofish, +#else + NULL, +#endif +#if USE_SERPENT + &_gcry_mac_type_spec_poly1305mac_serpent, +#else + NULL, +#endif +#if USE_SEED + &_gcry_mac_type_spec_poly1305mac_seed +#else + NULL +#endif + }; + + + + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_mac_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_mac_spec_t *spec; + + for (idx = 0; (spec = mac_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +/* Return the spec structure for the MAC algorithm ALGO. For an + unknown algorithm NULL is returned. */ +static gcry_mac_spec_t * +spec_from_algo (int algo) +{ + gcry_mac_spec_t *spec = NULL; + + if (algo >= 101 && algo < 101 + DIM(mac_list_algo101)) + spec = mac_list_algo101[algo - 101]; + else if (algo >= 201 && algo < 201 + DIM(mac_list_algo201)) + spec = mac_list_algo201[algo - 201]; + else if (algo >= 401 && algo < 401 + DIM(mac_list_algo401)) + spec = mac_list_algo401[algo - 401]; + else if (algo >= 501 && algo < 501 + DIM(mac_list_algo501)) + spec = mac_list_algo501[algo - 501]; +#ifdef USE_GOST28147 + else if (algo == GCRY_MAC_GOST28147_IMIT) + spec = &_gcry_mac_type_spec_gost28147_imit; +#endif + + if (spec) + gcry_assert (spec->algo == algo); + + return spec; +} + + +/* Lookup a mac's spec by its name. */ +static gcry_mac_spec_t * +spec_from_name (const char *name) +{ + gcry_mac_spec_t *spec; + int idx; + + for (idx = 0; (spec = mac_list[idx]); idx++) + if (!stricmp (name, spec->name)) + return spec; + + return NULL; +} + + +/**************** + * Map a string to the mac algo + */ +int +_gcry_mac_map_name (const char *string) +{ + gcry_mac_spec_t *spec; + + if (!string) + return 0; + + /* Not found, search a matching mac name. */ + spec = spec_from_name (string); + if (spec) + return spec->algo; + + return 0; +} + + +/**************** + * This function simply returns the name of the algorithm or some constant + * string when there is no algo. It will never return NULL. + * Use the macro gcry_mac_test_algo() to check whether the algorithm + * is valid. + */ +const char * +_gcry_mac_algo_name (int algorithm) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec ? spec->name : "?"; +} + + +static gcry_err_code_t +check_mac_algo (int algorithm) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algorithm); + if (spec && !spec->flags.disabled) + return 0; + + return GPG_ERR_MAC_ALGO; +} + + +/**************** + * Open a message digest handle for use with algorithm ALGO. + */ +static gcry_err_code_t +mac_open (gcry_mac_hd_t * hd, int algo, int secure, gcry_ctx_t ctx) +{ + gcry_mac_spec_t *spec; + gcry_err_code_t err; + gcry_mac_hd_t h; + + spec = spec_from_algo (algo); + if (!spec) + return GPG_ERR_MAC_ALGO; + else if (spec->flags.disabled) + return GPG_ERR_MAC_ALGO; + else if (!spec->ops) + return GPG_ERR_MAC_ALGO; + else if (!spec->ops->open || !spec->ops->write || !spec->ops->setkey || + !spec->ops->read || !spec->ops->verify || !spec->ops->reset) + return GPG_ERR_MAC_ALGO; + + if (secure) + h = xtrycalloc_secure (1, sizeof (*h)); + else + h = xtrycalloc (1, sizeof (*h)); + + if (!h) + return gpg_err_code_from_syserror (); + + h->magic = secure ? CTX_MAC_MAGIC_SECURE : CTX_MAC_MAGIC_NORMAL; + h->spec = spec; + h->algo = algo; + h->gcry_ctx = ctx; + + err = h->spec->ops->open (h); + if (err) + xfree (h); + else + *hd = h; + + return err; +} + + +static gcry_err_code_t +mac_reset (gcry_mac_hd_t hd) +{ + if (hd->spec->ops->reset) + return hd->spec->ops->reset (hd); + + return 0; +} + + +static void +mac_close (gcry_mac_hd_t hd) +{ + if (hd->spec->ops->close) + hd->spec->ops->close (hd); + + wipememory (hd, sizeof (*hd)); + + xfree (hd); +} + + +static gcry_err_code_t +mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen) +{ + if (!hd->spec->ops->setkey) + return GPG_ERR_INV_ARG; + if (keylen > 0 && !key) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->setkey (hd, key, keylen); +} + + +static gcry_err_code_t +mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen) +{ + if (!hd->spec->ops->setiv) + return GPG_ERR_INV_ARG; + if (ivlen > 0 && !iv) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->setiv (hd, iv, ivlen); +} + + +static gcry_err_code_t +mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen) +{ + if (!hd->spec->ops->write) + return GPG_ERR_INV_ARG; + if (inlen > 0 && !inbuf) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->write (hd, inbuf, inlen); +} + + +static gcry_err_code_t +mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen) +{ + if (!outbuf || !outlen || *outlen == 0 || !hd->spec->ops->read) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->read (hd, outbuf, outlen); +} + + +static gcry_err_code_t +mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen) +{ + if (!buf || buflen == 0 || !hd->spec->ops->verify) + return GPG_ERR_INV_ARG; + + return hd->spec->ops->verify (hd, buf, buflen); +} + + +/* Create a MAC object for algorithm ALGO. FLAGS may be + given as an bitwise OR of the gcry_mac_flags values. + H is guaranteed to be a valid handle or NULL on error. */ +gpg_err_code_t +_gcry_mac_open (gcry_mac_hd_t * h, int algo, unsigned int flags, + gcry_ctx_t ctx) +{ + gcry_err_code_t rc; + gcry_mac_hd_t hd = NULL; + + if ((flags & ~GCRY_MAC_FLAG_SECURE)) + rc = GPG_ERR_INV_ARG; + else + rc = mac_open (&hd, algo, !!(flags & GCRY_MAC_FLAG_SECURE), ctx); + + *h = rc ? NULL : hd; + return rc; +} + + +void +_gcry_mac_close (gcry_mac_hd_t hd) +{ + if (hd) + mac_close (hd); +} + + +gcry_err_code_t +_gcry_mac_setkey (gcry_mac_hd_t hd, const void *key, size_t keylen) +{ + return mac_setkey (hd, key, keylen); +} + + +gcry_err_code_t +_gcry_mac_setiv (gcry_mac_hd_t hd, const void *iv, size_t ivlen) +{ + return mac_setiv (hd, iv, ivlen); +} + + +gcry_err_code_t +_gcry_mac_write (gcry_mac_hd_t hd, const void *inbuf, size_t inlen) +{ + return mac_write (hd, inbuf, inlen); +} + + +gcry_err_code_t +_gcry_mac_read (gcry_mac_hd_t hd, void *outbuf, size_t * outlen) +{ + return mac_read (hd, outbuf, outlen); +} + + +gcry_err_code_t +_gcry_mac_verify (gcry_mac_hd_t hd, const void *buf, size_t buflen) +{ + return mac_verify (hd, buf, buflen); +} + + +int +_gcry_mac_get_algo (gcry_mac_hd_t hd) +{ + return hd->algo; +} + + +unsigned int +_gcry_mac_get_algo_maclen (int algo) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algo); + if (!spec || !spec->ops || !spec->ops->get_maclen) + return 0; + + return spec->ops->get_maclen (algo); +} + + +unsigned int +_gcry_mac_get_algo_keylen (int algo) +{ + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algo); + if (!spec || !spec->ops || !spec->ops->get_keylen) + return 0; + + return spec->ops->get_keylen (algo); +} + + +gcry_err_code_t +_gcry_mac_ctl (gcry_mac_hd_t hd, int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc; + + /* Currently not used. */ + (void) hd; + (void) buffer; + (void) buflen; + + switch (cmd) + { + case GCRYCTL_RESET: + rc = mac_reset (hd); + break; + case GCRYCTL_SET_SBOX: + if (hd->spec->ops->set_extra_info) + rc = hd->spec->ops->set_extra_info + (hd, GCRYCTL_SET_SBOX, buffer, buflen); + else + rc = GPG_ERR_NOT_SUPPORTED; + break; + default: + rc = GPG_ERR_INV_OP; + } + return rc; +} + + +/* Return information about the given MAC algorithm ALGO. + + GCRYCTL_TEST_ALGO: + Returns 0 if the specified algorithm ALGO is available for use. + BUFFER and NBYTES must be zero. + + Note: Because this function is in most cases used to return an + integer value, we can make it easier for the caller to just look at + the return value. The caller will in all cases consult the value + and thereby detecting whether a error occurred or not (i.e. while + checking the block size) + */ +gcry_err_code_t +_gcry_mac_algo_info (int algo, int what, void *buffer, size_t * nbytes) +{ + gcry_err_code_t rc = 0; + unsigned int ui; + + switch (what) + { + case GCRYCTL_GET_KEYLEN: + if (buffer || (!nbytes)) + rc = GPG_ERR_INV_ARG; + else + { + ui = _gcry_mac_get_algo_keylen (algo); + if (ui > 0) + *nbytes = (size_t) ui; + else + /* The only reason for an error is an invalid algo. */ + rc = GPG_ERR_MAC_ALGO; + } + break; + case GCRYCTL_TEST_ALGO: + if (buffer || nbytes) + rc = GPG_ERR_INV_ARG; + else + rc = check_mac_algo (algo); + break; + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Run the self-tests for the MAC. */ +gpg_error_t +_gcry_mac_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec; + gcry_mac_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec && !spec->flags.disabled && spec->ops && spec->ops->selftest) + ec = spec->ops->selftest (algo, extended, report); + else + { + ec = GPG_ERR_MAC_ALGO; + if (report) + report ("mac", algo, "module", + spec && !spec->flags.disabled? + "no selftest available" : + spec? "algorithm disabled" : + "algorithm not found"); + } + + return gpg_error (ec); +} diff --git a/comm/third_party/libgcrypt/cipher/md.c b/comm/third_party/libgcrypt/cipher/md.c new file mode 100644 index 0000000000..efb7376a1a --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/md.c @@ -0,0 +1,1639 @@ +/* md.c - message digest dispatcher + * Copyright (C) 1998, 1999, 2002, 2003, 2006, + * 2008 Free Software Foundation, Inc. + * Copyright (C) 2013, 2014 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" + + +/* This is the list of the digest implementations included in + libgcrypt. */ +static gcry_md_spec_t * const digest_list[] = + { +#if USE_CRC + &_gcry_digest_spec_crc32, + &_gcry_digest_spec_crc32_rfc1510, + &_gcry_digest_spec_crc24_rfc2440, +#endif +#if USE_SHA1 + &_gcry_digest_spec_sha1, +#endif +#if USE_SHA256 + &_gcry_digest_spec_sha256, + &_gcry_digest_spec_sha224, +#endif +#if USE_SHA512 + &_gcry_digest_spec_sha512, + &_gcry_digest_spec_sha384, + &_gcry_digest_spec_sha512_256, + &_gcry_digest_spec_sha512_224, +#endif +#if USE_SHA3 + &_gcry_digest_spec_sha3_224, + &_gcry_digest_spec_sha3_256, + &_gcry_digest_spec_sha3_384, + &_gcry_digest_spec_sha3_512, + &_gcry_digest_spec_shake128, + &_gcry_digest_spec_shake256, +#endif +#if USE_GOST_R_3411_94 + &_gcry_digest_spec_gost3411_94, + &_gcry_digest_spec_gost3411_cp, +#endif +#if USE_GOST_R_3411_12 + &_gcry_digest_spec_stribog_256, + &_gcry_digest_spec_stribog_512, +#endif +#if USE_WHIRLPOOL + &_gcry_digest_spec_whirlpool, +#endif +#if USE_RMD160 + &_gcry_digest_spec_rmd160, +#endif +#if USE_TIGER + &_gcry_digest_spec_tiger, + &_gcry_digest_spec_tiger1, + &_gcry_digest_spec_tiger2, +#endif +#if USE_MD5 + &_gcry_digest_spec_md5, +#endif +#if USE_MD4 + &_gcry_digest_spec_md4, +#endif +#if USE_MD2 + &_gcry_digest_spec_md2, +#endif +#if USE_BLAKE2 + &_gcry_digest_spec_blake2b_512, + &_gcry_digest_spec_blake2b_384, + &_gcry_digest_spec_blake2b_256, + &_gcry_digest_spec_blake2b_160, + &_gcry_digest_spec_blake2s_256, + &_gcry_digest_spec_blake2s_224, + &_gcry_digest_spec_blake2s_160, + &_gcry_digest_spec_blake2s_128, +#endif +#if USE_SM3 + &_gcry_digest_spec_sm3, +#endif + NULL + }; + +/* Digest implementations starting with index 0 (enum gcry_md_algos) */ +static gcry_md_spec_t * const digest_list_algo0[] = + { + NULL, /* GCRY_MD_NONE */ +#if USE_MD5 + &_gcry_digest_spec_md5, +#else + NULL, +#endif +#if USE_SHA1 + &_gcry_digest_spec_sha1, +#else + NULL, +#endif +#if USE_RMD160 + &_gcry_digest_spec_rmd160, +#else + NULL, +#endif + NULL, /* Unused index 4 */ +#if USE_MD2 + &_gcry_digest_spec_md2, +#else + NULL, +#endif +#if USE_TIGER + &_gcry_digest_spec_tiger, +#else + NULL, +#endif + NULL, /* GCRY_MD_HAVAL */ +#if USE_SHA256 + &_gcry_digest_spec_sha256, +#else + NULL, +#endif +#if USE_SHA512 + &_gcry_digest_spec_sha384, + &_gcry_digest_spec_sha512, +#else + NULL, + NULL, +#endif +#if USE_SHA256 + &_gcry_digest_spec_sha224 +#else + NULL +#endif + }; + +/* Digest implementations starting with index 301 (enum gcry_md_algos) */ +static gcry_md_spec_t * const digest_list_algo301[] = + { +#if USE_MD4 + &_gcry_digest_spec_md4, +#else + NULL, +#endif +#if USE_CRC + &_gcry_digest_spec_crc32, + &_gcry_digest_spec_crc32_rfc1510, + &_gcry_digest_spec_crc24_rfc2440, +#else + NULL, + NULL, + NULL, +#endif +#if USE_WHIRLPOOL + &_gcry_digest_spec_whirlpool, +#else + NULL, +#endif +#if USE_TIGER + &_gcry_digest_spec_tiger1, + &_gcry_digest_spec_tiger2, +#else + NULL, + NULL, +#endif +#if USE_GOST_R_3411_94 + &_gcry_digest_spec_gost3411_94, +#else + NULL, +#endif +#if USE_GOST_R_3411_12 + &_gcry_digest_spec_stribog_256, + &_gcry_digest_spec_stribog_512, +#else + NULL, + NULL, +#endif +#if USE_GOST_R_3411_94 + &_gcry_digest_spec_gost3411_cp, +#else + NULL, +#endif +#if USE_SHA3 + &_gcry_digest_spec_sha3_224, + &_gcry_digest_spec_sha3_256, + &_gcry_digest_spec_sha3_384, + &_gcry_digest_spec_sha3_512, + &_gcry_digest_spec_shake128, + &_gcry_digest_spec_shake256, +#else + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +#endif +#if USE_BLAKE2 + &_gcry_digest_spec_blake2b_512, + &_gcry_digest_spec_blake2b_384, + &_gcry_digest_spec_blake2b_256, + &_gcry_digest_spec_blake2b_160, + &_gcry_digest_spec_blake2s_256, + &_gcry_digest_spec_blake2s_224, + &_gcry_digest_spec_blake2s_160, + &_gcry_digest_spec_blake2s_128, +#else + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, + NULL, +#endif +#if USE_SM3 + &_gcry_digest_spec_sm3, +#else + NULL, +#endif +#if USE_SHA512 + &_gcry_digest_spec_sha512_256, + &_gcry_digest_spec_sha512_224, +#else + NULL, + NULL, +#endif + }; + + +typedef struct gcry_md_list +{ + gcry_md_spec_t *spec; + struct gcry_md_list *next; + size_t actual_struct_size; /* Allocated size of this structure. */ + PROPERLY_ALIGNED_TYPE context[1]; +} GcryDigestEntry; + +/* This structure is put right after the gcry_md_hd_t buffer, so that + * only one memory block is needed. */ +struct gcry_md_context +{ + int magic; + size_t actual_handle_size; /* Allocated size of this handle. */ + FILE *debug; + struct { + unsigned int secure:1; + unsigned int finalized:1; + unsigned int bugemu1:1; + unsigned int hmac:1; + } flags; + GcryDigestEntry *list; +}; + + +#define CTX_MAGIC_NORMAL 0x11071961 +#define CTX_MAGIC_SECURE 0x16917011 + +static gcry_err_code_t md_enable (gcry_md_hd_t hd, int algo); +static void md_close (gcry_md_hd_t a); +static void md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen); +static byte *md_read( gcry_md_hd_t a, int algo ); +static int md_get_algo( gcry_md_hd_t a ); +static int md_digest_length( int algo ); +static void md_start_debug ( gcry_md_hd_t a, const char *suffix ); +static void md_stop_debug ( gcry_md_hd_t a ); + + + +static int +map_algo (int algo) +{ + return algo; +} + + +/* Return the spec structure for the hash algorithm ALGO. For an + unknown algorithm NULL is returned. */ +static gcry_md_spec_t * +spec_from_algo (int algo) +{ + gcry_md_spec_t *spec = NULL; + + algo = map_algo (algo); + + if (algo >= 0 && algo < DIM(digest_list_algo0)) + spec = digest_list_algo0[algo]; + else if (algo >= 301 && algo < 301 + DIM(digest_list_algo301)) + spec = digest_list_algo301[algo - 301]; + + if (spec) + gcry_assert (spec->algo == algo); + + return spec; +} + + +/* Lookup a hash's spec by its name. */ +static gcry_md_spec_t * +spec_from_name (const char *name) +{ + gcry_md_spec_t *spec; + int idx; + + for (idx=0; (spec = digest_list[idx]); idx++) + { + if (!stricmp (name, spec->name)) + return spec; + } + + return NULL; +} + + +/* Lookup a hash's spec by its OID. */ +static gcry_md_spec_t * +spec_from_oid (const char *oid) +{ + gcry_md_spec_t *spec; + gcry_md_oid_spec_t *oid_specs; + int idx, j; + + for (idx=0; (spec = digest_list[idx]); idx++) + { + oid_specs = spec->oids; + if (oid_specs) + { + for (j = 0; oid_specs[j].oidstring; j++) + if (!stricmp (oid, oid_specs[j].oidstring)) + return spec; + } + } + + return NULL; +} + + +static gcry_md_spec_t * +search_oid (const char *oid, gcry_md_oid_spec_t *oid_spec) +{ + gcry_md_spec_t *spec; + int i; + + if (!oid) + return NULL; + + if (!strncmp (oid, "oid.", 4) || !strncmp (oid, "OID.", 4)) + oid += 4; + + spec = spec_from_oid (oid); + if (spec && spec->oids) + { + for (i = 0; spec->oids[i].oidstring; i++) + if (!stricmp (oid, spec->oids[i].oidstring)) + { + if (oid_spec) + *oid_spec = spec->oids[i]; + return spec; + } + } + + return NULL; +} + + +/**************** + * Map a string to the digest algo + */ +int +_gcry_md_map_name (const char *string) +{ + gcry_md_spec_t *spec; + + if (!string) + return 0; + + /* If the string starts with a digit (optionally prefixed with + either "OID." or "oid."), we first look into our table of ASN.1 + object identifiers to figure out the algorithm */ + spec = search_oid (string, NULL); + if (spec) + return spec->algo; + + /* Not found, search a matching digest name. */ + spec = spec_from_name (string); + if (spec) + return spec->algo; + + return 0; +} + + +/**************** + * This function simply returns the name of the algorithm or some constant + * string when there is no algo. It will never return NULL. + * Use the macro gcry_md_test_algo() to check whether the algorithm + * is valid. + */ +const char * +_gcry_md_algo_name (int algorithm) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec ? spec->name : "?"; +} + + +static gcry_err_code_t +check_digest_algo (int algorithm) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algorithm); + if (spec && !spec->flags.disabled) + return 0; + + return GPG_ERR_DIGEST_ALGO; + +} + + +/**************** + * Open a message digest handle for use with algorithm ALGO. + * More algorithms may be added by md_enable(). The initial algorithm + * may be 0. + */ +static gcry_err_code_t +md_open (gcry_md_hd_t *h, int algo, unsigned int flags) +{ + gcry_err_code_t err = 0; + int secure = !!(flags & GCRY_MD_FLAG_SECURE); + int hmac = !!(flags & GCRY_MD_FLAG_HMAC); + int bufsize = secure ? 512 : 1024; + struct gcry_md_context *ctx; + gcry_md_hd_t hd; + size_t n; + + /* Allocate a memory area to hold the caller visible buffer with it's + * control information and the data required by this module. Set the + * context pointer at the beginning to this area. + * We have to use this strange scheme because we want to hide the + * internal data but have a variable sized buffer. + * + * +---+------+---........------+-------------+ + * !ctx! bctl ! buffer ! private ! + * +---+------+---........------+-------------+ + * ! ^ + * !---------------------------! + * + * We have to make sure that private is well aligned. + */ + n = sizeof (struct gcry_md_handle) + bufsize; + n = ((n + sizeof (PROPERLY_ALIGNED_TYPE) - 1) + / sizeof (PROPERLY_ALIGNED_TYPE)) * sizeof (PROPERLY_ALIGNED_TYPE); + + /* Allocate and set the Context pointer to the private data */ + if (secure) + hd = xtrymalloc_secure (n + sizeof (struct gcry_md_context)); + else + hd = xtrymalloc (n + sizeof (struct gcry_md_context)); + + if (! hd) + err = gpg_err_code_from_errno (errno); + + if (! err) + { + hd->ctx = ctx = (void *) ((char *) hd + n); + /* Setup the globally visible data (bctl in the diagram).*/ + hd->bufsize = n - sizeof (struct gcry_md_handle) + 1; + hd->bufpos = 0; + + /* Initialize the private data. */ + memset (hd->ctx, 0, sizeof *hd->ctx); + ctx->magic = secure ? CTX_MAGIC_SECURE : CTX_MAGIC_NORMAL; + ctx->actual_handle_size = n + sizeof (struct gcry_md_context); + ctx->flags.secure = secure; + ctx->flags.hmac = hmac; + ctx->flags.bugemu1 = !!(flags & GCRY_MD_FLAG_BUGEMU1); + } + + if (! err) + { + /* Hmmm, should we really do that? - yes [-wk] */ + _gcry_fast_random_poll (); + + if (algo) + { + err = md_enable (hd, algo); + if (err) + md_close (hd); + } + } + + if (! err) + *h = hd; + + return err; +} + +/* Create a message digest object for algorithm ALGO. FLAGS may be + given as an bitwise OR of the gcry_md_flags values. ALGO may be + given as 0 if the algorithms to be used are later set using + gcry_md_enable. H is guaranteed to be a valid handle or NULL on + error. */ +gcry_err_code_t +_gcry_md_open (gcry_md_hd_t *h, int algo, unsigned int flags) +{ + gcry_err_code_t rc; + gcry_md_hd_t hd; + + if ((flags & ~(GCRY_MD_FLAG_SECURE + | GCRY_MD_FLAG_HMAC + | GCRY_MD_FLAG_BUGEMU1))) + rc = GPG_ERR_INV_ARG; + else + rc = md_open (&hd, algo, flags); + + *h = rc? NULL : hd; + return rc; +} + + + +static gcry_err_code_t +md_enable (gcry_md_hd_t hd, int algorithm) +{ + struct gcry_md_context *h = hd->ctx; + gcry_md_spec_t *spec; + GcryDigestEntry *entry; + gcry_err_code_t err = 0; + + for (entry = h->list; entry; entry = entry->next) + if (entry->spec->algo == algorithm) + return 0; /* Already enabled */ + + spec = spec_from_algo (algorithm); + if (!spec) + { + log_debug ("md_enable: algorithm %d not available\n", algorithm); + err = GPG_ERR_DIGEST_ALGO; + } + + + if (!err && algorithm == GCRY_MD_MD5 && fips_mode ()) + { + _gcry_inactivate_fips_mode ("MD5 used"); + if (_gcry_enforced_fips_mode () ) + { + /* We should never get to here because we do not register + MD5 in enforced fips mode. But better throw an error. */ + err = GPG_ERR_DIGEST_ALGO; + } + } + + if (!err && h->flags.hmac && spec->read == NULL) + { + /* Expandable output function cannot act as part of HMAC. */ + err = GPG_ERR_DIGEST_ALGO; + } + + if (!err) + { + size_t size = (sizeof (*entry) + + spec->contextsize * (h->flags.hmac? 3 : 1) + - sizeof (entry->context)); + + /* And allocate a new list entry. */ + if (h->flags.secure) + entry = xtrymalloc_secure (size); + else + entry = xtrymalloc (size); + + if (! entry) + err = gpg_err_code_from_errno (errno); + else + { + entry->spec = spec; + entry->next = h->list; + entry->actual_struct_size = size; + h->list = entry; + + /* And init this instance. */ + entry->spec->init (entry->context, + h->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + } + } + + return err; +} + + +gcry_err_code_t +_gcry_md_enable (gcry_md_hd_t hd, int algorithm) +{ + return md_enable (hd, algorithm); +} + + +static gcry_err_code_t +md_copy (gcry_md_hd_t ahd, gcry_md_hd_t *b_hd) +{ + gcry_err_code_t err = 0; + struct gcry_md_context *a = ahd->ctx; + struct gcry_md_context *b; + GcryDigestEntry *ar, *br; + gcry_md_hd_t bhd; + size_t n; + + if (ahd->bufpos) + md_write (ahd, NULL, 0); + + n = (char *) ahd->ctx - (char *) ahd; + if (a->flags.secure) + bhd = xtrymalloc_secure (n + sizeof (struct gcry_md_context)); + else + bhd = xtrymalloc (n + sizeof (struct gcry_md_context)); + + if (!bhd) + { + err = gpg_err_code_from_syserror (); + goto leave; + } + + bhd->ctx = b = (void *) ((char *) bhd + n); + /* No need to copy the buffer due to the write above. */ + gcry_assert (ahd->bufsize == (n - sizeof (struct gcry_md_handle) + 1)); + bhd->bufsize = ahd->bufsize; + bhd->bufpos = 0; + gcry_assert (! ahd->bufpos); + memcpy (b, a, sizeof *a); + b->list = NULL; + b->debug = NULL; + + /* Copy the complete list of algorithms. The copied list is + reversed, but that doesn't matter. */ + for (ar = a->list; ar; ar = ar->next) + { + if (a->flags.secure) + br = xtrymalloc_secure (ar->actual_struct_size); + else + br = xtrymalloc (ar->actual_struct_size); + if (!br) + { + err = gpg_err_code_from_syserror (); + md_close (bhd); + goto leave; + } + + memcpy (br, ar, ar->actual_struct_size); + br->next = b->list; + b->list = br; + } + + if (a->debug) + md_start_debug (bhd, "unknown"); + + *b_hd = bhd; + + leave: + return err; +} + + +gcry_err_code_t +_gcry_md_copy (gcry_md_hd_t *handle, gcry_md_hd_t hd) +{ + gcry_err_code_t rc; + + rc = md_copy (hd, handle); + if (rc) + *handle = NULL; + return rc; +} + + +/* + * Reset all contexts and discard any buffered stuff. This may be used + * instead of a md_close(); md_open(). + */ +void +_gcry_md_reset (gcry_md_hd_t a) +{ + GcryDigestEntry *r; + + /* Note: We allow this even in fips non operational mode. */ + + a->bufpos = a->ctx->flags.finalized = 0; + + if (a->ctx->flags.hmac) + for (r = a->ctx->list; r; r = r->next) + { + memcpy (r->context, (char *)r->context + r->spec->contextsize, + r->spec->contextsize); + } + else + for (r = a->ctx->list; r; r = r->next) + { + memset (r->context, 0, r->spec->contextsize); + (*r->spec->init) (r->context, + a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + } +} + + +static void +md_close (gcry_md_hd_t a) +{ + GcryDigestEntry *r, *r2; + + if (! a) + return; + if (a->ctx->debug) + md_stop_debug (a); + for (r = a->ctx->list; r; r = r2) + { + r2 = r->next; + wipememory (r, r->actual_struct_size); + xfree (r); + } + + wipememory (a, a->ctx->actual_handle_size); + xfree(a); +} + + +void +_gcry_md_close (gcry_md_hd_t hd) +{ + /* Note: We allow this even in fips non operational mode. */ + md_close (hd); +} + + +static void +md_write (gcry_md_hd_t a, const void *inbuf, size_t inlen) +{ + GcryDigestEntry *r; + + if (a->ctx->debug) + { + if (a->bufpos && fwrite (a->buf, a->bufpos, 1, a->ctx->debug) != 1) + BUG(); + if (inlen && fwrite (inbuf, inlen, 1, a->ctx->debug) != 1) + BUG(); + } + + for (r = a->ctx->list; r; r = r->next) + { + if (a->bufpos) + (*r->spec->write) (r->context, a->buf, a->bufpos); + (*r->spec->write) (r->context, inbuf, inlen); + } + a->bufpos = 0; +} + + +/* Note that this function may be used after finalize and read to keep + on writing to the transform function so to mitigate timing + attacks. */ +void +_gcry_md_write (gcry_md_hd_t hd, const void *inbuf, size_t inlen) +{ + md_write (hd, inbuf, inlen); +} + + +static void +md_final (gcry_md_hd_t a) +{ + GcryDigestEntry *r; + + if (a->ctx->flags.finalized) + return; + + if (a->bufpos) + md_write (a, NULL, 0); + + for (r = a->ctx->list; r; r = r->next) + (*r->spec->final) (r->context); + + a->ctx->flags.finalized = 1; + + if (!a->ctx->flags.hmac) + return; + + for (r = a->ctx->list; r; r = r->next) + { + byte *p; + size_t dlen = r->spec->mdlen; + byte *hash; + gcry_err_code_t err; + + if (r->spec->read == NULL) + continue; + + p = r->spec->read (r->context); + + if (a->ctx->flags.secure) + hash = xtrymalloc_secure (dlen); + else + hash = xtrymalloc (dlen); + if (!hash) + { + err = gpg_err_code_from_errno (errno); + _gcry_fatal_error (err, NULL); + } + + memcpy (hash, p, dlen); + memcpy (r->context, (char *)r->context + r->spec->contextsize * 2, + r->spec->contextsize); + (*r->spec->write) (r->context, hash, dlen); + (*r->spec->final) (r->context); + xfree (hash); + } +} + + +static gcry_err_code_t +md_setkey (gcry_md_hd_t h, const unsigned char *key, size_t keylen) +{ + gcry_err_code_t rc = 0; + GcryDigestEntry *r; + int algo_had_setkey = 0; + + if (!h->ctx->list) + return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */ + + if (h->ctx->flags.hmac) + return GPG_ERR_DIGEST_ALGO; /* Tried md_setkey for HMAC md. */ + + for (r = h->ctx->list; r; r = r->next) + { + switch (r->spec->algo) + { +#if USE_BLAKE2 + /* TODO? add spec->init_with_key? */ + case GCRY_MD_BLAKE2B_512: + case GCRY_MD_BLAKE2B_384: + case GCRY_MD_BLAKE2B_256: + case GCRY_MD_BLAKE2B_160: + case GCRY_MD_BLAKE2S_256: + case GCRY_MD_BLAKE2S_224: + case GCRY_MD_BLAKE2S_160: + case GCRY_MD_BLAKE2S_128: + algo_had_setkey = 1; + memset (r->context, 0, r->spec->contextsize); + rc = _gcry_blake2_init_with_key (r->context, + h->ctx->flags.bugemu1 + ? GCRY_MD_FLAG_BUGEMU1:0, + key, keylen, r->spec->algo); + break; +#endif + default: + rc = GPG_ERR_DIGEST_ALGO; + break; + } + + if (rc) + break; + } + + if (rc && !algo_had_setkey) + { + /* None of algorithms had setkey implementation, so contexts were not + * modified. Just return error. */ + return rc; + } + else if (rc && algo_had_setkey) + { + /* Some of the contexts have been modified, but got error. Reset + * all contexts. */ + _gcry_md_reset (h); + return rc; + } + + /* Successful md_setkey implies reset. */ + h->bufpos = h->ctx->flags.finalized = 0; + + return 0; +} + + +static gcry_err_code_t +prepare_macpads (gcry_md_hd_t a, const unsigned char *key, size_t keylen) +{ + GcryDigestEntry *r; + + if (!a->ctx->list) + return GPG_ERR_DIGEST_ALGO; /* Might happen if no algo is enabled. */ + + if (!a->ctx->flags.hmac) + return GPG_ERR_DIGEST_ALGO; /* Tried prepare_macpads for non-HMAC md. */ + + for (r = a->ctx->list; r; r = r->next) + { + const unsigned char *k; + size_t k_len; + unsigned char *key_allocated = NULL; + int macpad_Bsize; + int i; + + switch (r->spec->algo) + { + /* TODO: add spec->blocksize */ + case GCRY_MD_SHA3_224: + macpad_Bsize = 1152 / 8; + break; + case GCRY_MD_SHA3_256: + macpad_Bsize = 1088 / 8; + break; + case GCRY_MD_SHA3_384: + macpad_Bsize = 832 / 8; + break; + case GCRY_MD_SHA3_512: + macpad_Bsize = 576 / 8; + break; + case GCRY_MD_SHA384: + case GCRY_MD_SHA512: + case GCRY_MD_SHA512_256: + case GCRY_MD_SHA512_224: + case GCRY_MD_BLAKE2B_512: + case GCRY_MD_BLAKE2B_384: + case GCRY_MD_BLAKE2B_256: + case GCRY_MD_BLAKE2B_160: + macpad_Bsize = 128; + break; + case GCRY_MD_GOSTR3411_94: + case GCRY_MD_GOSTR3411_CP: + macpad_Bsize = 32; + break; + default: + macpad_Bsize = 64; + break; + } + + if ( keylen > macpad_Bsize ) + { + k = key_allocated = xtrymalloc_secure (r->spec->mdlen); + if (!k) + return gpg_err_code_from_errno (errno); + _gcry_md_hash_buffer (r->spec->algo, key_allocated, key, keylen); + k_len = r->spec->mdlen; + gcry_assert ( k_len <= macpad_Bsize ); + } + else + { + k = key; + k_len = keylen; + } + + (*r->spec->init) (r->context, + a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + a->bufpos = 0; + for (i=0; i < k_len; i++ ) + _gcry_md_putc (a, k[i] ^ 0x36); + for (; i < macpad_Bsize; i++ ) + _gcry_md_putc (a, 0x36); + (*r->spec->write) (r->context, a->buf, a->bufpos); + memcpy ((char *)r->context + r->spec->contextsize, r->context, + r->spec->contextsize); + + (*r->spec->init) (r->context, + a->ctx->flags.bugemu1? GCRY_MD_FLAG_BUGEMU1:0); + a->bufpos = 0; + for (i=0; i < k_len; i++ ) + _gcry_md_putc (a, k[i] ^ 0x5c); + for (; i < macpad_Bsize; i++ ) + _gcry_md_putc (a, 0x5c); + (*r->spec->write) (r->context, a->buf, a->bufpos); + memcpy ((char *)r->context + r->spec->contextsize*2, r->context, + r->spec->contextsize); + + xfree (key_allocated); + } + + a->bufpos = 0; + return 0; +} + + +gcry_err_code_t +_gcry_md_ctl (gcry_md_hd_t hd, int cmd, void *buffer, size_t buflen) +{ + gcry_err_code_t rc = 0; + + (void)buflen; /* Currently not used. */ + + switch (cmd) + { + case GCRYCTL_FINALIZE: + md_final (hd); + break; + case GCRYCTL_START_DUMP: + md_start_debug (hd, buffer); + break; + case GCRYCTL_STOP_DUMP: + md_stop_debug ( hd ); + break; + default: + rc = GPG_ERR_INV_OP; + } + return rc; +} + + +gcry_err_code_t +_gcry_md_setkey (gcry_md_hd_t hd, const void *key, size_t keylen) +{ + gcry_err_code_t rc; + + if (hd->ctx->flags.hmac) + { + rc = prepare_macpads (hd, key, keylen); + if (!rc) + _gcry_md_reset (hd); + } + else + { + rc = md_setkey (hd, key, keylen); + } + + return rc; +} + + +/* The new debug interface. If SUFFIX is a string it creates an debug + file for the context HD. IF suffix is NULL, the file is closed and + debugging is stopped. */ +void +_gcry_md_debug (gcry_md_hd_t hd, const char *suffix) +{ + if (suffix) + md_start_debug (hd, suffix); + else + md_stop_debug (hd); +} + + +/**************** + * If ALGO is null get the digest for the used algo (which should be + * only one) + */ +static byte * +md_read( gcry_md_hd_t a, int algo ) +{ + GcryDigestEntry *r = a->ctx->list; + + if (! algo) + { + /* Return the first algorithm */ + if (r) + { + if (r->next) + log_debug ("more than one algorithm in md_read(0)\n"); + if (r->spec->read) + return r->spec->read (r->context); + } + } + else + { + for (r = a->ctx->list; r; r = r->next) + if (r->spec->algo == algo) + { + if (r->spec->read) + return r->spec->read (r->context); + break; + } + } + + if (r && !r->spec->read) + _gcry_fatal_error (GPG_ERR_DIGEST_ALGO, + "requested algo has no fixed digest length"); + else + _gcry_fatal_error (GPG_ERR_DIGEST_ALGO, "requested algo not in md context"); + return NULL; +} + + +/* + * Read out the complete digest, this function implictly finalizes + * the hash. + */ +byte * +_gcry_md_read (gcry_md_hd_t hd, int algo) +{ + /* This function is expected to always return a digest, thus we + can't return an error which we actually should do in + non-operational state. */ + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + return md_read (hd, algo); +} + + +/**************** + * If ALGO is null get the digest for the used algo (which should be + * only one) + */ +static gcry_err_code_t +md_extract(gcry_md_hd_t a, int algo, void *out, size_t outlen) +{ + GcryDigestEntry *r = a->ctx->list; + + if (!algo) + { + /* Return the first algorithm */ + if (r && r->spec->extract) + { + if (r->next) + log_debug ("more than one algorithm in md_extract(0)\n"); + r->spec->extract (r->context, out, outlen); + return 0; + } + } + else + { + for (r = a->ctx->list; r; r = r->next) + if (r->spec->algo == algo && r->spec->extract) + { + r->spec->extract (r->context, out, outlen); + return 0; + } + } + + return GPG_ERR_DIGEST_ALGO; +} + + +/* + * Expand the output from XOF class digest, this function implictly finalizes + * the hash. + */ +gcry_err_code_t +_gcry_md_extract (gcry_md_hd_t hd, int algo, void *out, size_t outlen) +{ + _gcry_md_ctl (hd, GCRYCTL_FINALIZE, NULL, 0); + return md_extract (hd, algo, out, outlen); +} + + +/* + * Read out an intermediate digest. Not yet functional. + */ +gcry_err_code_t +_gcry_md_get (gcry_md_hd_t hd, int algo, byte *buffer, int buflen) +{ + (void)hd; + (void)algo; + (void)buffer; + (void)buflen; + + /*md_digest ... */ + fips_signal_error ("unimplemented function called"); + return GPG_ERR_INTERNAL; +} + + +/* + * Shortcut function to hash a buffer with a given algo. The only + * guaranteed supported algorithms are RIPE-MD160 and SHA-1. The + * supplied digest buffer must be large enough to store the resulting + * hash. No error is returned, the function will abort on an invalid + * algo. DISABLED_ALGOS are ignored here. */ +void +_gcry_md_hash_buffer (int algo, void *digest, + const void *buffer, size_t length) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algo); + if (!spec) + { + log_debug ("md_hash_buffer: algorithm %d not available\n", algo); + return; + } + + if (algo == GCRY_MD_MD5 && fips_mode ()) + { + _gcry_inactivate_fips_mode ("MD5 used"); + if (_gcry_enforced_fips_mode () ) + { + /* We should never get to here because we do not register + MD5 in enforced fips mode. */ + _gcry_fips_noreturn (); + } + } + + if (spec->hash_buffer != NULL) + { + spec->hash_buffer (digest, buffer, length); + } + else if (spec->hash_buffers != NULL) + { + gcry_buffer_t iov; + + iov.size = 0; + iov.data = (void *)buffer; + iov.off = 0; + iov.len = length; + + spec->hash_buffers (digest, &iov, 1); + } + else + { + /* For the others we do not have a fast function, so we use the + normal functions. */ + gcry_md_hd_t h; + gpg_err_code_t err; + + err = md_open (&h, algo, 0); + if (err) + log_bug ("gcry_md_open failed for algo %d: %s", + algo, gpg_strerror (gcry_error(err))); + md_write (h, (byte *) buffer, length); + md_final (h); + memcpy (digest, md_read (h, algo), md_digest_length (algo)); + md_close (h); + } +} + + +/* Shortcut function to hash multiple buffers with a given algo. In + contrast to gcry_md_hash_buffer, this function returns an error on + invalid arguments or on other problems; disabled algorithms are + _not_ ignored but flagged as an error. + + The data to sign is taken from the array IOV which has IOVCNT items. + + The only supported flag in FLAGS is GCRY_MD_FLAG_HMAC which turns + this function into a HMAC function; the first item in IOV is then + used as the key. + + On success 0 is returned and resulting hash or HMAC is stored at + DIGEST which must have been provided by the caller with an + appropriate length. */ +gpg_err_code_t +_gcry_md_hash_buffers (int algo, unsigned int flags, void *digest, + const gcry_buffer_t *iov, int iovcnt) +{ + gcry_md_spec_t *spec; + int hmac; + + if (!iov || iovcnt < 0) + return GPG_ERR_INV_ARG; + if (flags & ~(GCRY_MD_FLAG_HMAC)) + return GPG_ERR_INV_ARG; + + hmac = !!(flags & GCRY_MD_FLAG_HMAC); + if (hmac && iovcnt < 1) + return GPG_ERR_INV_ARG; + + spec = spec_from_algo (algo); + if (!spec) + { + log_debug ("md_hash_buffers: algorithm %d not available\n", algo); + return GPG_ERR_DIGEST_ALGO; + } + + if (algo == GCRY_MD_MD5 && fips_mode ()) + { + _gcry_inactivate_fips_mode ("MD5 used"); + if (_gcry_enforced_fips_mode () ) + { + /* We should never get to here because we do not register + MD5 in enforced fips mode. */ + _gcry_fips_noreturn (); + } + } + + if (!hmac && spec->hash_buffers) + { + spec->hash_buffers (digest, iov, iovcnt); + } + else + { + /* For the others we do not have a fast function, so we use the + normal functions. */ + gcry_md_hd_t h; + gpg_err_code_t rc; + int dlen; + + /* Detect SHAKE128 like algorithms which we can't use because + * our API does not allow for a variable length digest. */ + dlen = md_digest_length (algo); + if (!dlen) + return GPG_ERR_DIGEST_ALGO; + + rc = md_open (&h, algo, (hmac? GCRY_MD_FLAG_HMAC:0)); + if (rc) + return rc; + + if (hmac) + { + rc = _gcry_md_setkey (h, + (const char*)iov[0].data + iov[0].off, + iov[0].len); + if (rc) + { + md_close (h); + return rc; + } + iov++; iovcnt--; + } + for (;iovcnt; iov++, iovcnt--) + md_write (h, (const char*)iov[0].data + iov[0].off, iov[0].len); + md_final (h); + memcpy (digest, md_read (h, algo), dlen); + md_close (h); + } + + return 0; +} + + +static int +md_get_algo (gcry_md_hd_t a) +{ + GcryDigestEntry *r = a->ctx->list; + + if (r && r->next) + { + fips_signal_error ("possible usage error"); + log_error ("WARNING: more than one algorithm in md_get_algo()\n"); + } + return r ? r->spec->algo : 0; +} + + +int +_gcry_md_get_algo (gcry_md_hd_t hd) +{ + return md_get_algo (hd); +} + + +/**************** + * Return the length of the digest + */ +static int +md_digest_length (int algorithm) +{ + gcry_md_spec_t *spec; + + spec = spec_from_algo (algorithm); + return spec? spec->mdlen : 0; +} + + +/**************** + * Return the length of the digest in bytes. + * This function will return 0 in case of errors. + */ +unsigned int +_gcry_md_get_algo_dlen (int algorithm) +{ + return md_digest_length (algorithm); +} + + +/* Hmmm: add a mode to enumerate the OIDs + * to make g10/sig-check.c more portable */ +static const byte * +md_asn_oid (int algorithm, size_t *asnlen, size_t *mdlen) +{ + gcry_md_spec_t *spec; + const byte *asnoid = NULL; + + spec = spec_from_algo (algorithm); + if (spec) + { + if (asnlen) + *asnlen = spec->asnlen; + if (mdlen) + *mdlen = spec->mdlen; + asnoid = spec->asnoid; + } + else + log_bug ("no ASN.1 OID for md algo %d\n", algorithm); + + return asnoid; +} + + +/**************** + * Return information about the given cipher algorithm + * WHAT select the kind of information returned: + * GCRYCTL_TEST_ALGO: + * Returns 0 when the specified algorithm is available for use. + * buffer and nbytes must be zero. + * GCRYCTL_GET_ASNOID: + * Return the ASNOID of the algorithm in buffer. if buffer is NULL, only + * the required length is returned. + * GCRYCTL_SELFTEST + * Helper for the regression tests - shall not be used by applications. + * + * Note: Because this function is in most cases used to return an + * integer value, we can make it easier for the caller to just look at + * the return value. The caller will in all cases consult the value + * and thereby detecting whether a error occurred or not (i.e. while checking + * the block size) + */ +gcry_err_code_t +_gcry_md_algo_info (int algo, int what, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc; + + switch (what) + { + case GCRYCTL_TEST_ALGO: + if (buffer || nbytes) + rc = GPG_ERR_INV_ARG; + else + rc = check_digest_algo (algo); + break; + + case GCRYCTL_GET_ASNOID: + /* We need to check that the algo is available because + md_asn_oid would otherwise raise an assertion. */ + rc = check_digest_algo (algo); + if (!rc) + { + const char unsigned *asn; + size_t asnlen; + + asn = md_asn_oid (algo, &asnlen, NULL); + if (buffer && (*nbytes >= asnlen)) + { + memcpy (buffer, asn, asnlen); + *nbytes = asnlen; + } + else if (!buffer && nbytes) + *nbytes = asnlen; + else + { + if (buffer) + rc = GPG_ERR_TOO_SHORT; + else + rc = GPG_ERR_INV_ARG; + } + } + break; + + case GCRYCTL_SELFTEST: + /* Helper function for the regression tests. */ + rc = gpg_err_code (_gcry_md_selftest (algo, nbytes? (int)*nbytes : 0, + NULL)); + break; + + default: + rc = GPG_ERR_INV_OP; + break; + } + + return rc; +} + + +static void +md_start_debug ( gcry_md_hd_t md, const char *suffix ) +{ + static int idx=0; + char buf[50]; + + if (fips_mode ()) + return; + + if ( md->ctx->debug ) + { + log_debug("Oops: md debug already started\n"); + return; + } + idx++; + snprintf (buf, DIM(buf)-1, "dbgmd-%05d.%.10s", idx, suffix ); + md->ctx->debug = fopen(buf, "w"); + if ( !md->ctx->debug ) + log_debug("md debug: can't open %s\n", buf ); +} + + +static void +md_stop_debug( gcry_md_hd_t md ) +{ + if ( md->ctx->debug ) + { + if ( md->bufpos ) + md_write ( md, NULL, 0 ); + fclose (md->ctx->debug); + md->ctx->debug = NULL; + } + + { /* a kludge to pull in the __muldi3 for Solaris */ + volatile u32 a = (u32)(uintptr_t)md; + volatile u64 b = 42; + volatile u64 c; + c = a * b; + (void)c; + } +} + + + +/* + * Return information about the digest handle. + * GCRYCTL_IS_SECURE: + * Returns 1 when the handle works on secured memory + * otherwise 0 is returned. There is no error return. + * GCRYCTL_IS_ALGO_ENABLED: + * Returns 1 if the algo is enabled for that handle. + * The algo must be passed as the address of an int. + */ +gcry_err_code_t +_gcry_md_info (gcry_md_hd_t h, int cmd, void *buffer, size_t *nbytes) +{ + gcry_err_code_t rc = 0; + + switch (cmd) + { + case GCRYCTL_IS_SECURE: + *nbytes = h->ctx->flags.secure; + break; + + case GCRYCTL_IS_ALGO_ENABLED: + { + GcryDigestEntry *r; + int algo; + + if ( !buffer || !nbytes || *nbytes != sizeof (int)) + rc = GPG_ERR_INV_ARG; + else + { + algo = *(int*)buffer; + + *nbytes = 0; + for(r=h->ctx->list; r; r = r->next ) { + if (r->spec->algo == algo) + { + *nbytes = 1; + break; + } + } + } + break; + } + + default: + rc = GPG_ERR_INV_OP; + } + + return rc; +} + + +/* Explicitly initialize this module. */ +gcry_err_code_t +_gcry_md_init (void) +{ + if (fips_mode()) + { + /* disable algorithms that are disallowed in fips */ + int idx; + gcry_md_spec_t *spec; + + for (idx = 0; (spec = digest_list[idx]); idx++) + if (!spec->flags.fips) + spec->flags.disabled = 1; + } + + return 0; +} + + +int +_gcry_md_is_secure (gcry_md_hd_t a) +{ + size_t value; + + if (_gcry_md_info (a, GCRYCTL_IS_SECURE, NULL, &value)) + value = 1; /* It seems to be better to assume secure memory on + error. */ + return value; +} + + +int +_gcry_md_is_enabled (gcry_md_hd_t a, int algo) +{ + size_t value; + + value = sizeof algo; + if (_gcry_md_info (a, GCRYCTL_IS_ALGO_ENABLED, &algo, &value)) + value = 0; + return value; +} + + +/* Run the selftests for digest algorithm ALGO with optional reporting + function REPORT. */ +gpg_error_t +_gcry_md_selftest (int algo, int extended, selftest_report_func_t report) +{ + gcry_err_code_t ec = 0; + gcry_md_spec_t *spec; + + spec = spec_from_algo (algo); + if (spec && !spec->flags.disabled && spec->selftest) + ec = spec->selftest (algo, extended, report); + else + { + ec = (spec && spec->selftest) ? GPG_ERR_DIGEST_ALGO + /* */ : GPG_ERR_NOT_IMPLEMENTED; + if (report) + report ("digest", algo, "module", + (spec && !spec->flags.disabled)? + "no selftest available" : + spec? "algorithm disabled" : "algorithm not found"); + } + + return gpg_error (ec); +} diff --git a/comm/third_party/libgcrypt/cipher/md4.c b/comm/third_party/libgcrypt/cipher/md4.c new file mode 100644 index 0000000000..b55443a8aa --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/md4.c @@ -0,0 +1,296 @@ +/* md4.c - MD4 Message-Digest Algorithm + * Copyright (C) 2002, 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * Based on md5.c in libgcrypt, but rewritten to compute md4 checksums + * using a public domain md4 implementation with the following comments: + * + * Modified by Wei Dai from Andrew M. Kuchling's md4.c + * The original code and all modifications are in the public domain. + * + * This is the original introductory comment: + * + * md4.c : MD4 hash algorithm. + * + * Part of the Python Cryptography Toolkit, version 1.1 + * + * Distribute and use freely; there are no restrictions on further + * dissemination and usage except those imposed by the laws of your + * country of residence. + * + */ + +/* MD4 test suite: + * MD4 ("") = 31d6cfe0d16ae931b73c59d7e0c089c0 + * MD4 ("a") = bde52cb31de33e46245e05fbdbd6fb24 + * MD4 ("abc") = a448017aaf21d8525fc10ae87aa6729d + * MD4 ("message digest") = d9130a8164549fe818874806e1c7014b + * MD4 ("abcdefghijklmnopqrstuvwxyz") = d79e1c308aa5bbcdeea8ed63df412da9 + * MD4 ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789") = + * 043f8582f241db351ce627e153e7f0e4 + * MD4 ("123456789012345678901234567890123456789012345678901234567890123456 + * 78901234567890") = e33b4ddc9c38f2199c3e7b164fcc0536 + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" + +#include "bithelp.h" +#include "bufhelp.h" +#include "hash-common.h" + + +typedef struct { + gcry_md_block_ctx_t bctx; + u32 A,B,C,D; /* chaining variables */ +} MD4_CONTEXT; + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ); + +static void +md4_init (void *context, unsigned int flags) +{ + MD4_CONTEXT *ctx = context; + + (void)flags; + + ctx->A = 0x67452301; + ctx->B = 0xefcdab89; + ctx->C = 0x98badcfe; + ctx->D = 0x10325476; + + ctx->bctx.nblocks = 0; + ctx->bctx.nblocks_high = 0; + ctx->bctx.count = 0; + ctx->bctx.blocksize_shift = _gcry_ctz(64); + ctx->bctx.bwrite = transform; +} + +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) (((x) & (y)) | ((x) & (z)) | ((y) & (z))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) + + +/**************** + * transform 64 bytes + */ +static unsigned int +transform_blk ( void *c, const unsigned char *data ) +{ + MD4_CONTEXT *ctx = c; + u32 in[16]; + register u32 A = ctx->A; + register u32 B = ctx->B; + register u32 C = ctx->C; + register u32 D = ctx->D; + int i; + + for ( i = 0; i < 16; i++ ) + in[i] = buf_get_le32(data + i * 4); + + /* Round 1. */ +#define function(a,b,c,d,k,s) a=rol(a+F(b,c,d)+in[k],s); + function(A,B,C,D, 0, 3); + function(D,A,B,C, 1, 7); + function(C,D,A,B, 2,11); + function(B,C,D,A, 3,19); + function(A,B,C,D, 4, 3); + function(D,A,B,C, 5, 7); + function(C,D,A,B, 6,11); + function(B,C,D,A, 7,19); + function(A,B,C,D, 8, 3); + function(D,A,B,C, 9, 7); + function(C,D,A,B,10,11); + function(B,C,D,A,11,19); + function(A,B,C,D,12, 3); + function(D,A,B,C,13, 7); + function(C,D,A,B,14,11); + function(B,C,D,A,15,19); + +#undef function + + /* Round 2. */ +#define function(a,b,c,d,k,s) a=rol(a+G(b,c,d)+in[k]+0x5a827999,s); + + function(A,B,C,D, 0, 3); + function(D,A,B,C, 4, 5); + function(C,D,A,B, 8, 9); + function(B,C,D,A,12,13); + function(A,B,C,D, 1, 3); + function(D,A,B,C, 5, 5); + function(C,D,A,B, 9, 9); + function(B,C,D,A,13,13); + function(A,B,C,D, 2, 3); + function(D,A,B,C, 6, 5); + function(C,D,A,B,10, 9); + function(B,C,D,A,14,13); + function(A,B,C,D, 3, 3); + function(D,A,B,C, 7, 5); + function(C,D,A,B,11, 9); + function(B,C,D,A,15,13); + +#undef function + + /* Round 3. */ +#define function(a,b,c,d,k,s) a=rol(a+H(b,c,d)+in[k]+0x6ed9eba1,s); + + function(A,B,C,D, 0, 3); + function(D,A,B,C, 8, 9); + function(C,D,A,B, 4,11); + function(B,C,D,A,12,15); + function(A,B,C,D, 2, 3); + function(D,A,B,C,10, 9); + function(C,D,A,B, 6,11); + function(B,C,D,A,14,15); + function(A,B,C,D, 1, 3); + function(D,A,B,C, 9, 9); + function(C,D,A,B, 5,11); + function(B,C,D,A,13,15); + function(A,B,C,D, 3, 3); + function(D,A,B,C,11, 9); + function(C,D,A,B, 7,11); + function(B,C,D,A,15,15); + + + /* Put checksum in context given as argument. */ + ctx->A += A; + ctx->B += B; + ctx->C += C; + ctx->D += D; + + return /*burn_stack*/ 80+6*sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + + +/* The routine final terminates the message-digest computation and + * ends with the desired message digest in mdContext->digest[0...15]. + * The handle is prepared for a new MD4 cycle. + * Returns 16 bytes representing the digest. + */ + +static void +md4_final( void *context ) +{ + MD4_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if (hd->bctx.count < 56) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + if (hd->bctx.count < 56) + memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count); + + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 56, lsb); + buf_put_le32(hd->bctx.buf + 60, msb); + burn = transform (hd, hd->bctx.buf, 1); + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + /* fill pad and next block with zeroes */ + memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56); + + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 64 + 56, lsb); + buf_put_le32(hd->bctx.buf + 64 + 60, msb); + burn = transform (hd, hd->bctx.buf, 2); + } + + p = hd->bctx.buf; +#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0) + X(A); + X(B); + X(C); + X(D); +#undef X + + hd->bctx.count = 0; + + _gcry_burn_stack (burn); +} + +static byte * +md4_read (void *context) +{ + MD4_CONTEXT *hd = context; + return hd->bctx.buf; +} + +static byte asn[18] = /* Object ID is 1.2.840.113549.2.4 */ + { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48, + 0x86, 0xf7, 0x0d, 0x02, 0x04, 0x05, 0x00, 0x04, 0x10 }; + +static gcry_md_oid_spec_t oid_spec_md4[] = + { + /* iso.member-body.us.rsadsi.digestAlgorithm.md4 */ + { "1.2.840.113549.2.4" }, + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_md4 = + { + GCRY_MD_MD4, {0, 0}, + "MD4", asn, DIM (asn), oid_spec_md4,16, + md4_init, _gcry_md_block_write, md4_final, md4_read, NULL, + NULL, NULL, + sizeof (MD4_CONTEXT) + }; diff --git a/comm/third_party/libgcrypt/cipher/md5.c b/comm/third_party/libgcrypt/cipher/md5.c new file mode 100644 index 0000000000..32cb535aaa --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/md5.c @@ -0,0 +1,322 @@ +/* md5.c - MD5 Message-Digest Algorithm + * Copyright (C) 1995,1996,1998,1999,2001,2002, + * 2003 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + * + * According to the definition of MD5 in RFC 1321 from April 1992. + * NOTE: This is *not* the same file as the one from glibc. + * Written by Ulrich Drepper , 1995. + * heavily modified for GnuPG by Werner Koch + */ + +/* Test values: + * "" D4 1D 8C D9 8F 00 B2 04 E9 80 09 98 EC F8 42 7E + * "a" 0C C1 75 B9 C0 F1 B6 A8 31 C3 99 E2 69 77 26 61 + * "abc 90 01 50 98 3C D2 4F B0 D6 96 3F 7D 28 E1 7F 72 + * "message digest" F9 6B 69 7D 7C B7 93 8D 52 5A 2F 31 AA F1 61 D0 + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "cipher.h" + +#include "bithelp.h" +#include "bufhelp.h" +#include "hash-common.h" + + +typedef struct { + gcry_md_block_ctx_t bctx; + u32 A,B,C,D; /* chaining variables */ +} MD5_CONTEXT; + +static unsigned int +transform ( void *ctx, const unsigned char *data, size_t datalen ); + +static void +md5_init( void *context, unsigned int flags) +{ + MD5_CONTEXT *ctx = context; + + (void)flags; + + ctx->A = 0x67452301; + ctx->B = 0xefcdab89; + ctx->C = 0x98badcfe; + ctx->D = 0x10325476; + + ctx->bctx.nblocks = 0; + ctx->bctx.nblocks_high = 0; + ctx->bctx.count = 0; + ctx->bctx.blocksize_shift = _gcry_ctz(64); + ctx->bctx.bwrite = transform; +} + + +/* These are the four functions used in the four steps of the MD5 algorithm + and defined in the RFC 1321. The first function is a little bit optimized + (as found in Colin Plumbs public domain implementation). */ +/* #define FF(b, c, d) ((b & c) | (~b & d)) */ +#define FF(b, c, d) (d ^ (b & (c ^ d))) +#define FG(b, c, d) FF (d, b, c) +#define FH(b, c, d) (b ^ c ^ d) +#define FI(b, c, d) (c ^ (b | ~d)) + + +/**************** + * transform 64 bytes + */ +static unsigned int +transform_blk ( void *c, const unsigned char *data ) +{ + MD5_CONTEXT *ctx = c; + u32 correct_words[16]; + register u32 A = ctx->A; + register u32 B = ctx->B; + register u32 C = ctx->C; + register u32 D = ctx->D; + u32 *cwp = correct_words; + int i; + + for ( i = 0; i < 16; i++ ) + correct_words[i] = buf_get_le32(data + i * 4); + +#define OP(a, b, c, d, s, T) \ + do \ + { \ + a += FF (b, c, d) + (*cwp++) + T; \ + a = rol(a, s); \ + a += b; \ + } \ + while (0) + + /* Before we start, one word about the strange constants. + They are defined in RFC 1321 as + + T[i] = (int) (4294967296.0 * fabs (sin (i))), i=1..64 + */ + + /* Round 1. */ + OP (A, B, C, D, 7, 0xd76aa478); + OP (D, A, B, C, 12, 0xe8c7b756); + OP (C, D, A, B, 17, 0x242070db); + OP (B, C, D, A, 22, 0xc1bdceee); + OP (A, B, C, D, 7, 0xf57c0faf); + OP (D, A, B, C, 12, 0x4787c62a); + OP (C, D, A, B, 17, 0xa8304613); + OP (B, C, D, A, 22, 0xfd469501); + OP (A, B, C, D, 7, 0x698098d8); + OP (D, A, B, C, 12, 0x8b44f7af); + OP (C, D, A, B, 17, 0xffff5bb1); + OP (B, C, D, A, 22, 0x895cd7be); + OP (A, B, C, D, 7, 0x6b901122); + OP (D, A, B, C, 12, 0xfd987193); + OP (C, D, A, B, 17, 0xa679438e); + OP (B, C, D, A, 22, 0x49b40821); + +#undef OP +#define OP(f, a, b, c, d, k, s, T) \ + do \ + { \ + a += f (b, c, d) + correct_words[k] + T; \ + a = rol(a, s); \ + a += b; \ + } \ + while (0) + + /* Round 2. */ + OP (FG, A, B, C, D, 1, 5, 0xf61e2562); + OP (FG, D, A, B, C, 6, 9, 0xc040b340); + OP (FG, C, D, A, B, 11, 14, 0x265e5a51); + OP (FG, B, C, D, A, 0, 20, 0xe9b6c7aa); + OP (FG, A, B, C, D, 5, 5, 0xd62f105d); + OP (FG, D, A, B, C, 10, 9, 0x02441453); + OP (FG, C, D, A, B, 15, 14, 0xd8a1e681); + OP (FG, B, C, D, A, 4, 20, 0xe7d3fbc8); + OP (FG, A, B, C, D, 9, 5, 0x21e1cde6); + OP (FG, D, A, B, C, 14, 9, 0xc33707d6); + OP (FG, C, D, A, B, 3, 14, 0xf4d50d87); + OP (FG, B, C, D, A, 8, 20, 0x455a14ed); + OP (FG, A, B, C, D, 13, 5, 0xa9e3e905); + OP (FG, D, A, B, C, 2, 9, 0xfcefa3f8); + OP (FG, C, D, A, B, 7, 14, 0x676f02d9); + OP (FG, B, C, D, A, 12, 20, 0x8d2a4c8a); + + /* Round 3. */ + OP (FH, A, B, C, D, 5, 4, 0xfffa3942); + OP (FH, D, A, B, C, 8, 11, 0x8771f681); + OP (FH, C, D, A, B, 11, 16, 0x6d9d6122); + OP (FH, B, C, D, A, 14, 23, 0xfde5380c); + OP (FH, A, B, C, D, 1, 4, 0xa4beea44); + OP (FH, D, A, B, C, 4, 11, 0x4bdecfa9); + OP (FH, C, D, A, B, 7, 16, 0xf6bb4b60); + OP (FH, B, C, D, A, 10, 23, 0xbebfbc70); + OP (FH, A, B, C, D, 13, 4, 0x289b7ec6); + OP (FH, D, A, B, C, 0, 11, 0xeaa127fa); + OP (FH, C, D, A, B, 3, 16, 0xd4ef3085); + OP (FH, B, C, D, A, 6, 23, 0x04881d05); + OP (FH, A, B, C, D, 9, 4, 0xd9d4d039); + OP (FH, D, A, B, C, 12, 11, 0xe6db99e5); + OP (FH, C, D, A, B, 15, 16, 0x1fa27cf8); + OP (FH, B, C, D, A, 2, 23, 0xc4ac5665); + + /* Round 4. */ + OP (FI, A, B, C, D, 0, 6, 0xf4292244); + OP (FI, D, A, B, C, 7, 10, 0x432aff97); + OP (FI, C, D, A, B, 14, 15, 0xab9423a7); + OP (FI, B, C, D, A, 5, 21, 0xfc93a039); + OP (FI, A, B, C, D, 12, 6, 0x655b59c3); + OP (FI, D, A, B, C, 3, 10, 0x8f0ccc92); + OP (FI, C, D, A, B, 10, 15, 0xffeff47d); + OP (FI, B, C, D, A, 1, 21, 0x85845dd1); + OP (FI, A, B, C, D, 8, 6, 0x6fa87e4f); + OP (FI, D, A, B, C, 15, 10, 0xfe2ce6e0); + OP (FI, C, D, A, B, 6, 15, 0xa3014314); + OP (FI, B, C, D, A, 13, 21, 0x4e0811a1); + OP (FI, A, B, C, D, 4, 6, 0xf7537e82); + OP (FI, D, A, B, C, 11, 10, 0xbd3af235); + OP (FI, C, D, A, B, 2, 15, 0x2ad7d2bb); + OP (FI, B, C, D, A, 9, 21, 0xeb86d391); + + /* Put checksum in context given as argument. */ + ctx->A += A; + ctx->B += B; + ctx->C += C; + ctx->D += D; + + return /*burn_stack*/ 80+6*sizeof(void*); +} + + +static unsigned int +transform ( void *c, const unsigned char *data, size_t nblks ) +{ + unsigned int burn; + + do + { + burn = transform_blk (c, data); + data += 64; + } + while (--nblks); + + return burn; +} + + +/* The routine final terminates the message-digest computation and + * ends with the desired message digest in mdContext->digest[0...15]. + * The handle is prepared for a new MD5 cycle. + * Returns 16 bytes representing the digest. + */ + +static void +md5_final( void *context) +{ + MD5_CONTEXT *hd = context; + u32 t, th, msb, lsb; + byte *p; + unsigned int burn; + + t = hd->bctx.nblocks; + if (sizeof t == sizeof hd->bctx.nblocks) + th = hd->bctx.nblocks_high; + else + th = hd->bctx.nblocks >> 32; + + /* multiply by 64 to make a byte count */ + lsb = t << 6; + msb = (th << 6) | (t >> 26); + /* add the count */ + t = lsb; + if( (lsb += hd->bctx.count) < t ) + msb++; + /* multiply by 8 to make a bit count */ + t = lsb; + lsb <<= 3; + msb <<= 3; + msb |= t >> 29; + + if (hd->bctx.count < 56) /* enough room */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad */ + if (hd->bctx.count < 56) + memset (&hd->bctx.buf[hd->bctx.count], 0, 56 - hd->bctx.count); + + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 56, lsb); + buf_put_le32(hd->bctx.buf + 60, msb); + burn = transform (hd, hd->bctx.buf, 1); + } + else /* need one extra block */ + { + hd->bctx.buf[hd->bctx.count++] = 0x80; /* pad character */ + /* fill pad and next block with zeroes */ + memset (&hd->bctx.buf[hd->bctx.count], 0, 64 - hd->bctx.count + 56); + + /* append the 64 bit count */ + buf_put_le32(hd->bctx.buf + 64 + 56, lsb); + buf_put_le32(hd->bctx.buf + 64 + 60, msb); + burn = transform (hd, hd->bctx.buf, 2); + } + + p = hd->bctx.buf; +#define X(a) do { buf_put_le32(p, hd->a); p += 4; } while(0) + X(A); + X(B); + X(C); + X(D); +#undef X + + hd->bctx.count = 0; + + _gcry_burn_stack (burn); +} + +static byte * +md5_read( void *context ) +{ + MD5_CONTEXT *hd = (MD5_CONTEXT *) context; + return hd->bctx.buf; +} + +static byte asn[18] = /* Object ID is 1.2.840.113549.2.5 */ + { 0x30, 0x20, 0x30, 0x0c, 0x06, 0x08, 0x2a, 0x86,0x48, + 0x86, 0xf7, 0x0d, 0x02, 0x05, 0x05, 0x00, 0x04, 0x10 }; + +static gcry_md_oid_spec_t oid_spec_md5[] = + { + /* iso.member-body.us.rsadsi.pkcs.pkcs-1.4 (md5WithRSAEncryption) */ + { "1.2.840.113549.1.1.4" }, + /* RSADSI digestAlgorithm MD5 */ + { "1.2.840.113549.2.5" }, + { NULL }, + }; + +gcry_md_spec_t _gcry_digest_spec_md5 = + { + GCRY_MD_MD5, {0, 0}, + "MD5", asn, DIM (asn), oid_spec_md5, 16, + md5_init, _gcry_md_block_write, md5_final, md5_read, NULL, + NULL, NULL, + sizeof (MD5_CONTEXT) + }; diff --git a/comm/third_party/libgcrypt/cipher/poly1305-internal.h b/comm/third_party/libgcrypt/cipher/poly1305-internal.h new file mode 100644 index 0000000000..19cee5f6f3 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/poly1305-internal.h @@ -0,0 +1,64 @@ +/* poly1305-internal.h - Poly1305 internals + * Copyright (C) 2014 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef G10_POLY1305_INTERNAL_H +#define G10_POLY1305_INTERNAL_H + +#include +#include +#include +#include +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" + +#define POLY1305_TAGLEN 16 +#define POLY1305_KEYLEN 32 +#define POLY1305_BLOCKSIZE 16 + + +typedef struct +{ + u32 k[4]; + u32 r[4]; + u32 h[5]; +} POLY1305_STATE; + +typedef struct poly1305_context_s +{ + POLY1305_STATE state; + byte buffer[POLY1305_BLOCKSIZE]; + unsigned int leftover; +} poly1305_context_t; + + +gcry_err_code_t _gcry_poly1305_init (poly1305_context_t *ctx, const byte *key, + size_t keylen); + +void _gcry_poly1305_finish (poly1305_context_t *ctx, + byte mac[POLY1305_TAGLEN]); + +void _gcry_poly1305_update (poly1305_context_t *ctx, const byte *buf, + size_t buflen); + +unsigned int _gcry_poly1305_update_burn (poly1305_context_t *ctx, + const byte *m, size_t bytes); + +#endif /* G10_POLY1305_INTERNAL_H */ diff --git a/comm/third_party/libgcrypt/cipher/poly1305-s390x.S b/comm/third_party/libgcrypt/cipher/poly1305-s390x.S new file mode 100644 index 0000000000..844245f6ad --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/poly1305-s390x.S @@ -0,0 +1,87 @@ +/* poly1305-s390x.S - zSeries implementation of Poly1305 + * + * Copyright (C) 2020 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 +#include +#if defined(HAVE_GCC_INLINE_ASM_S390X) + +#include "asm-poly1305-s390x.h" + +.text + +.balign 8 +.globl _gcry_poly1305_s390x_blocks1 +ELF(.type _gcry_poly1305_s390x_blocks1,@function;) + +_gcry_poly1305_s390x_blocks1: + /* input: + * %r2: poly1305-state + * %r3: src + * %r4: len + * %r5: high_pad + */ + CFI_STARTPROC(); + + stmg %r6, %r14, 6 * 8(%r15); + + lgr POLY_RSTATE, %r2; + lgr POLY_RSRC, %r3; + srlg %r0, %r4, 4; + + cgije %r5, 0, .Lpoly_high0; + + POLY1305_LOAD_STATE(); + +.balign 4 +.Lpoly_loop_high1: + POLY1305_BLOCK_PART1(0 * 16); + INC_POLY1305_SRC(1 * 16); +.Lpoly_block_part2: + POLY1305_BLOCK_PART2(); + POLY1305_BLOCK_PART3(); + POLY1305_BLOCK_PART4(); + POLY1305_BLOCK_PART5(); + POLY1305_BLOCK_PART6(); + POLY1305_BLOCK_PART7(); + POLY1305_BLOCK_PART8(); + + brctg %r0, .Lpoly_loop_high1; + +.balign 4 +.Lpoly_done: + POLY1305_STORE_STATE(); + + lmg %r6, %r14, 6 * 8(%r15); + xgr %r2, %r2; + br %r14; + +.balign 4 +.Lpoly_high0: + lghi %r0, 1; + POLY1305_LOAD_STATE(); + POLY1305_BLOCK_PART1_HB(0 * 16, 0); + j .Lpoly_block_part2; + + CFI_ENDPROC(); +ELF(.size _gcry_poly1305_s390x_blocks1, + .-_gcry_poly1305_s390x_blocks1;) + +#endif /*HAVE_GCC_INLINE_ASM_S390X*/ +#endif /*__s390x__*/ diff --git a/comm/third_party/libgcrypt/cipher/poly1305.c b/comm/third_party/libgcrypt/cipher/poly1305.c new file mode 100644 index 0000000000..6cb4d2b72d --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/poly1305.c @@ -0,0 +1,740 @@ +/* poly1305.c - Poly1305 internals and generic implementation + * Copyright (C) 2014,2017,2018 Jussi Kivilinna + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include + +#include "types.h" +#include "g10lib.h" +#include "cipher.h" +#include "bufhelp.h" +#include "poly1305-internal.h" + +#include "mpi-internal.h" +#include "longlong.h" + + +static const char *selftest (void); + + +#undef HAVE_ASM_POLY1305_BLOCKS + + +#undef USE_MPI_64BIT +#undef USE_MPI_32BIT +#if BYTES_PER_MPI_LIMB == 8 && defined(HAVE_TYPE_U64) +# define USE_MPI_64BIT 1 +#elif BYTES_PER_MPI_LIMB == 4 +# define USE_MPI_32BIT 1 +#else +# error please implement for this limb size. +#endif + + +/* USE_S390X_ASM indicates whether to enable zSeries code. */ +#undef USE_S390X_ASM +#if BYTES_PER_MPI_LIMB == 8 +# if defined (__s390x__) && __GNUC__ >= 4 && __ARCH__ >= 9 +# if defined(HAVE_GCC_INLINE_ASM_S390X) +# define USE_S390X_ASM 1 +# endif /* USE_S390X_ASM */ +# endif +#endif + + +#ifdef USE_S390X_ASM + +#define HAVE_ASM_POLY1305_BLOCKS 1 + +extern unsigned int _gcry_poly1305_s390x_blocks1(void *state, + const byte *buf, size_t len, + byte high_pad); + +static unsigned int +poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) +{ + return _gcry_poly1305_s390x_blocks1(&ctx->state, buf, len, high_pad); +} + +#endif /* USE_S390X_ASM */ + + +static void poly1305_init (poly1305_context_t *ctx, + const byte key[POLY1305_KEYLEN]) +{ + POLY1305_STATE *st = &ctx->state; + + ctx->leftover = 0; + + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + st->r[0] = buf_get_le32(key + 0) & 0x0fffffff; + st->r[1] = buf_get_le32(key + 4) & 0x0ffffffc; + st->r[2] = buf_get_le32(key + 8) & 0x0ffffffc; + st->r[3] = buf_get_le32(key + 12) & 0x0ffffffc; + + st->k[0] = buf_get_le32(key + 16); + st->k[1] = buf_get_le32(key + 20); + st->k[2] = buf_get_le32(key + 24); + st->k[3] = buf_get_le32(key + 28); +} + + +#ifdef USE_MPI_64BIT + +#if defined (__aarch64__) && __GNUC__ >= 4 + +/* A += B (armv8/aarch64) */ +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("adds %0, %3, %0\n" \ + "adcs %1, %4, %1\n" \ + "adc %2, %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "r" (B0), "r" (B1), "r" (B2) \ + : "cc" ) + +#endif /* __aarch64__ */ + +#if defined (__x86_64__) && __GNUC__ >= 4 + +/* A += B (x86-64) */ +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("addq %3, %0\n" \ + "adcq %4, %1\n" \ + "adcq %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "g" (B0), "g" (B1), "g" (B2) \ + : "cc" ) + +#endif /* __x86_64__ */ + +#if defined (__powerpc__) && __GNUC__ >= 4 + +/* A += B (ppc64) */ +#define ADD_1305_64(A2, A1, A0, B2, B1, B0) \ + __asm__ ("addc %0, %3, %0\n" \ + "adde %1, %4, %1\n" \ + "adde %2, %5, %2\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2) \ + : "r" (B0), "r" (B1), "r" (B2) \ + : "cc" ) + +#endif /* __powerpc__ */ + +#ifndef ADD_1305_64 +/* A += B (generic, mpi) */ +# define ADD_1305_64(A2, A1, A0, B2, B1, B0) do { \ + u64 carry; \ + add_ssaaaa(carry, A0, 0, A0, 0, B0); \ + add_ssaaaa(A2, A1, A2, A1, B2, B1); \ + add_ssaaaa(A2, A1, A2, A1, 0, carry); \ + } while (0) +#endif + +/* H = H * R mod 2¹³⁰-5 */ +#define MUL_MOD_1305_64(H2, H1, H0, R1, R0, R1_MULT5) do { \ + u64 x0_lo, x0_hi, x1_lo, x1_hi; \ + u64 t0_lo, t0_hi, t1_lo, t1_hi; \ + \ + /* x = a * r (partial mod 2^130-5) */ \ + umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ + umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ + \ + umul_ppmm(t0_hi, t0_lo, H1, R1_MULT5); /* h1 * r1 mod 2^130-5 */ \ + add_ssaaaa(x0_hi, x0_lo, x0_hi, x0_lo, t0_hi, t0_lo); \ + umul_ppmm(t1_hi, t1_lo, H1, R0); /* h1 * r0 */ \ + add_ssaaaa(x1_hi, x1_lo, x1_hi, x1_lo, t1_hi, t1_lo); \ + \ + t1_lo = H2 * R1_MULT5; /* h2 * r1 mod 2^130-5 */ \ + t1_hi = H2 * R0; /* h2 * r0 */ \ + add_ssaaaa(H0, H1, x1_hi, x1_lo, t1_hi, t1_lo); \ + \ + /* carry propagation */ \ + H2 = H0 & 3; \ + H0 = (H0 >> 2) * 5; /* msb mod 2^130-5 */ \ + ADD_1305_64(H2, H1, H0, (u64)0, x0_hi, x0_lo); \ + } while (0) + +#ifndef HAVE_ASM_POLY1305_BLOCKS + +static unsigned int +poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) +{ + POLY1305_STATE *st = &ctx->state; + u64 r0, r1, r1_mult5; + u64 h0, h1, h2; + u64 m0, m1, m2; + + m2 = high_pad; + + h0 = st->h[0] + ((u64)st->h[1] << 32); + h1 = st->h[2] + ((u64)st->h[3] << 32); + h2 = st->h[4]; + + r0 = st->r[0] + ((u64)st->r[1] << 32); + r1 = st->r[2] + ((u64)st->r[3] << 32); + + r1_mult5 = (r1 >> 2) + r1; + + m0 = buf_get_le64(buf + 0); + m1 = buf_get_le64(buf + 8); + buf += POLY1305_BLOCKSIZE; + len -= POLY1305_BLOCKSIZE; + + while (len >= POLY1305_BLOCKSIZE) + { + /* a = h + m */ + ADD_1305_64(h2, h1, h0, m2, m1, m0); + + m0 = buf_get_le64(buf + 0); + m1 = buf_get_le64(buf + 8); + + /* h = a * r (partial mod 2^130-5) */ + MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); + + buf += POLY1305_BLOCKSIZE; + len -= POLY1305_BLOCKSIZE; + } + + /* a = h + m */ + ADD_1305_64(h2, h1, h0, m2, m1, m0); + + /* h = a * r (partial mod 2^130-5) */ + MUL_MOD_1305_64(h2, h1, h0, r1, r0, r1_mult5); + + st->h[0] = h0; + st->h[1] = h0 >> 32; + st->h[2] = h1; + st->h[3] = h1 >> 32; + st->h[4] = h2; + + return 6 * sizeof (void *) + 18 * sizeof (u64); +} + +#endif /* !HAVE_ASM_POLY1305_BLOCKS */ + +static unsigned int poly1305_final (poly1305_context_t *ctx, + byte mac[POLY1305_TAGLEN]) +{ + POLY1305_STATE *st = &ctx->state; + unsigned int burn = 0; + u64 u, carry; + u64 k0, k1; + u64 h0, h1; + u64 h2; + + /* process the remaining block */ + if (ctx->leftover) + { + ctx->buffer[ctx->leftover++] = 1; + if (ctx->leftover < POLY1305_BLOCKSIZE) + { + memset (&ctx->buffer[ctx->leftover], 0, + POLY1305_BLOCKSIZE - ctx->leftover); + ctx->leftover = POLY1305_BLOCKSIZE; + } + burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); + } + + h0 = st->h[0] + ((u64)st->h[1] << 32); + h1 = st->h[2] + ((u64)st->h[3] << 32); + h2 = st->h[4]; + + k0 = st->k[0] + ((u64)st->k[1] << 32); + k1 = st->k[2] + ((u64)st->k[3] << 32); + + /* check if h is more than 2^130-5, by adding 5. */ + add_ssaaaa(carry, u, 0, h0, 0, 5); + add_ssaaaa(carry, u, 0, carry, 0, h1); + u = (carry + h2) >> 2; /* u == 0 or 1 */ + + /* minus 2^130-5 ... (+5) */ + u = (-u) & 5; + add_ssaaaa(h1, h0, h1, h0, 0, u); + + /* add high part of key + h */ + add_ssaaaa(h1, h0, h1, h0, k1, k0); + buf_put_le64(mac + 0, h0); + buf_put_le64(mac + 8, h1); + + /* burn_stack */ + return 4 * sizeof (void *) + 7 * sizeof (u64) + burn; +} + +#endif /* USE_MPI_64BIT */ + +#ifdef USE_MPI_32BIT + +#ifdef HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS + +/* HI:LO += A * B (arm) */ +#define UMUL_ADD_32(HI, LO, A, B) \ + __asm__ ("umlal %1, %0, %4, %5" \ + : "=r" (HI), "=r" (LO) \ + : "0" (HI), "1" (LO), "r" (A), "r" (B) ) + +/* A += B (arm) */ +#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ + __asm__ ("adds %0, %0, %5\n" \ + "adcs %1, %1, %6\n" \ + "adcs %2, %2, %7\n" \ + "adcs %3, %3, %8\n" \ + "adc %4, %4, %9\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ + : "r" (B0), "r" (B1), "r" (B2), "r" (B3), "r" (B4) \ + : "cc" ) + +#endif /* HAVE_COMPATIBLE_GCC_ARM_PLATFORM_AS */ + +#if defined (__i386__) && __GNUC__ >= 4 + +/* A += B (i386) */ +#define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) \ + __asm__ ("addl %5, %0\n" \ + "adcl %6, %1\n" \ + "adcl %7, %2\n" \ + "adcl %8, %3\n" \ + "adcl %9, %4\n" \ + : "+r" (A0), "+r" (A1), "+r" (A2), "+r" (A3), "+r" (A4) \ + : "g" (B0), "g" (B1), "g" (B2), "g" (B3), "g" (B4) \ + : "cc" ) + +#endif /* __i386__ */ + +#ifndef UMUL_ADD_32 +/* HI:LO += A * B (generic, mpi) */ +# define UMUL_ADD_32(HI, LO, A, B) do { \ + u32 t_lo, t_hi; \ + umul_ppmm(t_hi, t_lo, A, B); \ + add_ssaaaa(HI, LO, HI, LO, t_hi, t_lo); \ + } while (0) +#endif + +#ifndef ADD_1305_32 +/* A += B (generic, mpi) */ +# define ADD_1305_32(A4, A3, A2, A1, A0, B4, B3, B2, B1, B0) do { \ + u32 carry0, carry1, carry2; \ + add_ssaaaa(carry0, A0, 0, A0, 0, B0); \ + add_ssaaaa(carry1, A1, 0, A1, 0, B1); \ + add_ssaaaa(carry1, A1, carry1, A1, 0, carry0); \ + add_ssaaaa(carry2, A2, 0, A2, 0, B2); \ + add_ssaaaa(carry2, A2, carry2, A2, 0, carry1); \ + add_ssaaaa(A4, A3, A4, A3, B4, B3); \ + add_ssaaaa(A4, A3, A4, A3, 0, carry2); \ + } while (0) +#endif + +/* H = H * R mod 2¹³⁰-5 */ +#define MUL_MOD_1305_32(H4, H3, H2, H1, H0, R3, R2, R1, R0, \ + R3_MULT5, R2_MULT5, R1_MULT5) do { \ + u32 x0_lo, x0_hi, x1_lo, x1_hi, x2_lo, x2_hi, x3_lo, x3_hi; \ + u32 t0_lo, t0_hi; \ + \ + /* x = a * r (partial mod 2^130-5) */ \ + umul_ppmm(x0_hi, x0_lo, H0, R0); /* h0 * r0 */ \ + umul_ppmm(x1_hi, x1_lo, H0, R1); /* h0 * r1 */ \ + umul_ppmm(x2_hi, x2_lo, H0, R2); /* h0 * r2 */ \ + umul_ppmm(x3_hi, x3_lo, H0, R3); /* h0 * r3 */ \ + \ + UMUL_ADD_32(x0_hi, x0_lo, H1, R3_MULT5); /* h1 * r3 mod 2^130-5 */ \ + UMUL_ADD_32(x1_hi, x1_lo, H1, R0); /* h1 * r0 */ \ + UMUL_ADD_32(x2_hi, x2_lo, H1, R1); /* h1 * r1 */ \ + UMUL_ADD_32(x3_hi, x3_lo, H1, R2); /* h1 * r2 */ \ + \ + UMUL_ADD_32(x0_hi, x0_lo, H2, R2_MULT5); /* h2 * r2 mod 2^130-5 */ \ + UMUL_ADD_32(x1_hi, x1_lo, H2, R3_MULT5); /* h2 * r3 mod 2^130-5 */ \ + UMUL_ADD_32(x2_hi, x2_lo, H2, R0); /* h2 * r0 */ \ + UMUL_ADD_32(x3_hi, x3_lo, H2, R1); /* h2 * r1 */ \ + \ + UMUL_ADD_32(x0_hi, x0_lo, H3, R1_MULT5); /* h3 * r1 mod 2^130-5 */ \ + H1 = x0_hi; \ + UMUL_ADD_32(x1_hi, x1_lo, H3, R2_MULT5); /* h3 * r2 mod 2^130-5 */ \ + UMUL_ADD_32(x2_hi, x2_lo, H3, R3_MULT5); /* h3 * r3 mod 2^130-5 */ \ + UMUL_ADD_32(x3_hi, x3_lo, H3, R0); /* h3 * r0 */ \ + \ + t0_lo = H4 * R1_MULT5; /* h4 * r1 mod 2^130-5 */ \ + t0_hi = H4 * R2_MULT5; /* h4 * r2 mod 2^130-5 */ \ + add_ssaaaa(H2, x1_lo, x1_hi, x1_lo, 0, t0_lo); \ + add_ssaaaa(H3, x2_lo, x2_hi, x2_lo, 0, t0_hi); \ + t0_lo = H4 * R3_MULT5; /* h4 * r3 mod 2^130-5 */ \ + t0_hi = H4 * R0; /* h4 * r0 */ \ + add_ssaaaa(H4, x3_lo, x3_hi, x3_lo, t0_hi, t0_lo); \ + \ + /* carry propagation */ \ + H0 = (H4 >> 2) * 5; /* msb mod 2^130-5 */ \ + H4 = H4 & 3; \ + ADD_1305_32(H4, H3, H2, H1, H0, 0, x3_lo, x2_lo, x1_lo, x0_lo); \ + } while (0) + +#ifndef HAVE_ASM_POLY1305_BLOCKS + +static unsigned int +poly1305_blocks (poly1305_context_t *ctx, const byte *buf, size_t len, + byte high_pad) +{ + POLY1305_STATE *st = &ctx->state; + u32 r1_mult5, r2_mult5, r3_mult5; + u32 h0, h1, h2, h3, h4; + u32 m0, m1, m2, m3, m4; + + m4 = high_pad; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + r1_mult5 = (st->r[1] >> 2) + st->r[1]; + r2_mult5 = (st->r[2] >> 2) + st->r[2]; + r3_mult5 = (st->r[3] >> 2) + st->r[3]; + + while (len >= POLY1305_BLOCKSIZE) + { + m0 = buf_get_le32(buf + 0); + m1 = buf_get_le32(buf + 4); + m2 = buf_get_le32(buf + 8); + m3 = buf_get_le32(buf + 12); + + /* a = h + m */ + ADD_1305_32(h4, h3, h2, h1, h0, m4, m3, m2, m1, m0); + + /* h = a * r (partial mod 2^130-5) */ + MUL_MOD_1305_32(h4, h3, h2, h1, h0, + st->r[3], st->r[2], st->r[1], st->r[0], + r3_mult5, r2_mult5, r1_mult5); + + buf += POLY1305_BLOCKSIZE; + len -= POLY1305_BLOCKSIZE; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; + + return 6 * sizeof (void *) + 28 * sizeof (u32); +} + +#endif /* !HAVE_ASM_POLY1305_BLOCKS */ + +static unsigned int poly1305_final (poly1305_context_t *ctx, + byte mac[POLY1305_TAGLEN]) +{ + POLY1305_STATE *st = &ctx->state; + unsigned int burn = 0; + u32 carry, tmp0, tmp1, tmp2, u; + u32 h4, h3, h2, h1, h0; + + /* process the remaining block */ + if (ctx->leftover) + { + ctx->buffer[ctx->leftover++] = 1; + if (ctx->leftover < POLY1305_BLOCKSIZE) + { + memset (&ctx->buffer[ctx->leftover], 0, + POLY1305_BLOCKSIZE - ctx->leftover); + ctx->leftover = POLY1305_BLOCKSIZE; + } + burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 0); + } + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + /* check if h is more than 2^130-5, by adding 5. */ + add_ssaaaa(carry, tmp0, 0, h0, 0, 5); + add_ssaaaa(carry, tmp0, 0, carry, 0, h1); + add_ssaaaa(carry, tmp0, 0, carry, 0, h2); + add_ssaaaa(carry, tmp0, 0, carry, 0, h3); + u = (carry + h4) >> 2; /* u == 0 or 1 */ + + /* minus 2^130-5 ... (+5) */ + u = (-u) & 5; + add_ssaaaa(carry, h0, 0, h0, 0, u); + add_ssaaaa(carry, h1, 0, h1, 0, carry); + add_ssaaaa(carry, h2, 0, h2, 0, carry); + add_ssaaaa(carry, h3, 0, h3, 0, carry); + + /* add high part of key + h */ + add_ssaaaa(tmp0, h0, 0, h0, 0, st->k[0]); + add_ssaaaa(tmp1, h1, 0, h1, 0, st->k[1]); + add_ssaaaa(tmp1, h1, tmp1, h1, 0, tmp0); + add_ssaaaa(tmp2, h2, 0, h2, 0, st->k[2]); + add_ssaaaa(tmp2, h2, tmp2, h2, 0, tmp1); + add_ssaaaa(carry, h3, 0, h3, 0, st->k[3]); + h3 += tmp2; + + buf_put_le32(mac + 0, h0); + buf_put_le32(mac + 4, h1); + buf_put_le32(mac + 8, h2); + buf_put_le32(mac + 12, h3); + + /* burn_stack */ + return 4 * sizeof (void *) + 10 * sizeof (u32) + burn; +} + +#endif /* USE_MPI_32BIT */ + + +unsigned int +_gcry_poly1305_update_burn (poly1305_context_t *ctx, const byte *m, + size_t bytes) +{ + unsigned int burn = 0; + + /* handle leftover */ + if (ctx->leftover) + { + size_t want = (POLY1305_BLOCKSIZE - ctx->leftover); + if (want > bytes) + want = bytes; + buf_cpy (ctx->buffer + ctx->leftover, m, want); + bytes -= want; + m += want; + ctx->leftover += want; + if (ctx->leftover < POLY1305_BLOCKSIZE) + return 0; + burn = poly1305_blocks (ctx, ctx->buffer, POLY1305_BLOCKSIZE, 1); + ctx->leftover = 0; + } + + /* process full blocks */ + if (bytes >= POLY1305_BLOCKSIZE) + { + size_t nblks = bytes / POLY1305_BLOCKSIZE; + burn = poly1305_blocks (ctx, m, nblks * POLY1305_BLOCKSIZE, 1); + m += nblks * POLY1305_BLOCKSIZE; + bytes -= nblks * POLY1305_BLOCKSIZE; + } + + /* store leftover */ + if (bytes) + { + buf_cpy (ctx->buffer + ctx->leftover, m, bytes); + ctx->leftover += bytes; + } + + return burn; +} + + +void +_gcry_poly1305_update (poly1305_context_t *ctx, const byte *m, size_t bytes) +{ + unsigned int burn; + + burn = _gcry_poly1305_update_burn (ctx, m, bytes); + + if (burn) + _gcry_burn_stack (burn); +} + + +void +_gcry_poly1305_finish (poly1305_context_t *ctx, byte mac[POLY1305_TAGLEN]) +{ + unsigned int burn; + + burn = poly1305_final (ctx, mac); + + _gcry_burn_stack (burn); +} + + +gcry_err_code_t +_gcry_poly1305_init (poly1305_context_t * ctx, const byte * key, + size_t keylen) +{ + static int initialized; + static const char *selftest_failed; + + if (!initialized) + { + initialized = 1; + selftest_failed = selftest (); + if (selftest_failed) + log_error ("Poly1305 selftest failed (%s)\n", selftest_failed); + } + + if (keylen != POLY1305_KEYLEN) + return GPG_ERR_INV_KEYLEN; + + if (selftest_failed) + return GPG_ERR_SELFTEST_FAILED; + + poly1305_init (ctx, key); + + return 0; +} + + +static void +poly1305_auth (byte mac[POLY1305_TAGLEN], const byte * m, size_t bytes, + const byte * key) +{ + poly1305_context_t ctx; + + memset (&ctx, 0, sizeof (ctx)); + + _gcry_poly1305_init (&ctx, key, POLY1305_KEYLEN); + _gcry_poly1305_update (&ctx, m, bytes); + _gcry_poly1305_finish (&ctx, mac); + + wipememory (&ctx, sizeof (ctx)); +} + + +static const char * +selftest (void) +{ + /* example from nacl */ + static const byte nacl_key[POLY1305_KEYLEN] = { + 0xee, 0xa6, 0xa7, 0x25, 0x1c, 0x1e, 0x72, 0x91, + 0x6d, 0x11, 0xc2, 0xcb, 0x21, 0x4d, 0x3c, 0x25, + 0x25, 0x39, 0x12, 0x1d, 0x8e, 0x23, 0x4e, 0x65, + 0x2d, 0x65, 0x1f, 0xa4, 0xc8, 0xcf, 0xf8, 0x80, + }; + + static const byte nacl_msg[131] = { + 0x8e, 0x99, 0x3b, 0x9f, 0x48, 0x68, 0x12, 0x73, + 0xc2, 0x96, 0x50, 0xba, 0x32, 0xfc, 0x76, 0xce, + 0x48, 0x33, 0x2e, 0xa7, 0x16, 0x4d, 0x96, 0xa4, + 0x47, 0x6f, 0xb8, 0xc5, 0x31, 0xa1, 0x18, 0x6a, + 0xc0, 0xdf, 0xc1, 0x7c, 0x98, 0xdc, 0xe8, 0x7b, + 0x4d, 0xa7, 0xf0, 0x11, 0xec, 0x48, 0xc9, 0x72, + 0x71, 0xd2, 0xc2, 0x0f, 0x9b, 0x92, 0x8f, 0xe2, + 0x27, 0x0d, 0x6f, 0xb8, 0x63, 0xd5, 0x17, 0x38, + 0xb4, 0x8e, 0xee, 0xe3, 0x14, 0xa7, 0xcc, 0x8a, + 0xb9, 0x32, 0x16, 0x45, 0x48, 0xe5, 0x26, 0xae, + 0x90, 0x22, 0x43, 0x68, 0x51, 0x7a, 0xcf, 0xea, + 0xbd, 0x6b, 0xb3, 0x73, 0x2b, 0xc0, 0xe9, 0xda, + 0x99, 0x83, 0x2b, 0x61, 0xca, 0x01, 0xb6, 0xde, + 0x56, 0x24, 0x4a, 0x9e, 0x88, 0xd5, 0xf9, 0xb3, + 0x79, 0x73, 0xf6, 0x22, 0xa4, 0x3d, 0x14, 0xa6, + 0x59, 0x9b, 0x1f, 0x65, 0x4c, 0xb4, 0x5a, 0x74, + 0xe3, 0x55, 0xa5 + }; + + static const byte nacl_mac[16] = { + 0xf3, 0xff, 0xc7, 0x70, 0x3f, 0x94, 0x00, 0xe5, + 0x2a, 0x7d, 0xfb, 0x4b, 0x3d, 0x33, 0x05, 0xd9 + }; + + /* generates a final value of (2^130 - 2) == 3 */ + static const byte wrap_key[POLY1305_KEYLEN] = { + 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + + static const byte wrap_msg[16] = { + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + + static const byte wrap_mac[16] = { + 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + + /* mac of the macs of messages of length 0 to 256, where the key and messages + * have all their values set to the length + */ + static const byte total_key[POLY1305_KEYLEN] = { + 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0xff, 0xfe, 0xfd, 0xfc, 0xfb, 0xfa, 0xf9, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff + }; + + static const byte total_mac[16] = { + 0x64, 0xaf, 0xe2, 0xe8, 0xd6, 0xad, 0x7b, 0xbd, + 0xd2, 0x87, 0xf9, 0x7c, 0x44, 0x62, 0x3d, 0x39 + }; + + poly1305_context_t ctx; + poly1305_context_t total_ctx; + byte all_key[POLY1305_KEYLEN]; + byte all_msg[256]; + byte mac[16]; + size_t i, j; + + memset (&ctx, 0, sizeof (ctx)); + memset (&total_ctx, 0, sizeof (total_ctx)); + + memset (mac, 0, sizeof (mac)); + poly1305_auth (mac, nacl_msg, sizeof (nacl_msg), nacl_key); + if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) + return "Poly1305 test 1 failed."; + + /* SSE2/AVX have a 32 byte block size, but also support 64 byte blocks, so + * make sure everything still works varying between them */ + memset (mac, 0, sizeof (mac)); + _gcry_poly1305_init (&ctx, nacl_key, POLY1305_KEYLEN); + _gcry_poly1305_update (&ctx, nacl_msg + 0, 32); + _gcry_poly1305_update (&ctx, nacl_msg + 32, 64); + _gcry_poly1305_update (&ctx, nacl_msg + 96, 16); + _gcry_poly1305_update (&ctx, nacl_msg + 112, 8); + _gcry_poly1305_update (&ctx, nacl_msg + 120, 4); + _gcry_poly1305_update (&ctx, nacl_msg + 124, 2); + _gcry_poly1305_update (&ctx, nacl_msg + 126, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 127, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 128, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 129, 1); + _gcry_poly1305_update (&ctx, nacl_msg + 130, 1); + _gcry_poly1305_finish (&ctx, mac); + if (memcmp (nacl_mac, mac, sizeof (nacl_mac)) != 0) + return "Poly1305 test 2 failed."; + + memset (mac, 0, sizeof (mac)); + poly1305_auth (mac, wrap_msg, sizeof (wrap_msg), wrap_key); + if (memcmp (wrap_mac, mac, sizeof (nacl_mac)) != 0) + return "Poly1305 test 3 failed."; + + _gcry_poly1305_init (&total_ctx, total_key, POLY1305_KEYLEN); + for (i = 0; i < 256; i++) + { + /* set key and message to 'i,i,i..' */ + for (j = 0; j < sizeof (all_key); j++) + all_key[j] = i; + for (j = 0; j < i; j++) + all_msg[j] = i; + poly1305_auth (mac, all_msg, i, all_key); + _gcry_poly1305_update (&total_ctx, mac, 16); + } + _gcry_poly1305_finish (&total_ctx, mac); + if (memcmp (total_mac, mac, sizeof (total_mac)) != 0) + return "Poly1305 test 4 failed."; + + return NULL; +} diff --git a/comm/third_party/libgcrypt/cipher/primegen.c b/comm/third_party/libgcrypt/cipher/primegen.c new file mode 100644 index 0000000000..e24de4dc7c --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/primegen.c @@ -0,0 +1,1878 @@ +/* primegen.c - prime number generator + * Copyright (C) 1998, 2000, 2001, 2002, 2003 + * 2004, 2008 Free Software Foundation, Inc. + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + */ + +#include + +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" + +static gcry_mpi_t gen_prime (unsigned int nbits, int secret, int randomlevel, + int (*extra_check)(void *, gcry_mpi_t), + void *extra_check_arg); +static int check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds, + gcry_prime_check_func_t cb_func, void *cb_arg ); +static int is_prime (gcry_mpi_t n, int steps, unsigned int *count); +static void m_out_of_n( char *array, int m, int n ); + +static void (*progress_cb) (void *,const char*,int,int, int ); +static void *progress_cb_data; + +/* Note: 2 is not included because it can be tested more easily by + looking at bit 0. The last entry in this list is marked by a zero */ +static ushort small_prime_numbers[] = { + 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, + 47, 53, 59, 61, 67, 71, 73, 79, 83, 89, 97, 101, + 103, 107, 109, 113, 127, 131, 137, 139, 149, 151, + 157, 163, 167, 173, 179, 181, 191, 193, 197, 199, + 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, + 269, 271, 277, 281, 283, 293, 307, 311, 313, 317, + 331, 337, 347, 349, 353, 359, 367, 373, 379, 383, + 389, 397, 401, 409, 419, 421, 431, 433, 439, 443, + 449, 457, 461, 463, 467, 479, 487, 491, 499, 503, + 509, 521, 523, 541, 547, 557, 563, 569, 571, 577, + 587, 593, 599, 601, 607, 613, 617, 619, 631, 641, + 643, 647, 653, 659, 661, 673, 677, 683, 691, 701, + 709, 719, 727, 733, 739, 743, 751, 757, 761, 769, + 773, 787, 797, 809, 811, 821, 823, 827, 829, 839, + 853, 857, 859, 863, 877, 881, 883, 887, 907, 911, + 919, 929, 937, 941, 947, 953, 967, 971, 977, 983, + 991, 997, 1009, 1013, 1019, 1021, 1031, 1033, + 1039, 1049, 1051, 1061, 1063, 1069, 1087, 1091, + 1093, 1097, 1103, 1109, 1117, 1123, 1129, 1151, + 1153, 1163, 1171, 1181, 1187, 1193, 1201, 1213, + 1217, 1223, 1229, 1231, 1237, 1249, 1259, 1277, + 1279, 1283, 1289, 1291, 1297, 1301, 1303, 1307, + 1319, 1321, 1327, 1361, 1367, 1373, 1381, 1399, + 1409, 1423, 1427, 1429, 1433, 1439, 1447, 1451, + 1453, 1459, 1471, 1481, 1483, 1487, 1489, 1493, + 1499, 1511, 1523, 1531, 1543, 1549, 1553, 1559, + 1567, 1571, 1579, 1583, 1597, 1601, 1607, 1609, + 1613, 1619, 1621, 1627, 1637, 1657, 1663, 1667, + 1669, 1693, 1697, 1699, 1709, 1721, 1723, 1733, + 1741, 1747, 1753, 1759, 1777, 1783, 1787, 1789, + 1801, 1811, 1823, 1831, 1847, 1861, 1867, 1871, + 1873, 1877, 1879, 1889, 1901, 1907, 1913, 1931, + 1933, 1949, 1951, 1973, 1979, 1987, 1993, 1997, + 1999, 2003, 2011, 2017, 2027, 2029, 2039, 2053, + 2063, 2069, 2081, 2083, 2087, 2089, 2099, 2111, + 2113, 2129, 2131, 2137, 2141, 2143, 2153, 2161, + 2179, 2203, 2207, 2213, 2221, 2237, 2239, 2243, + 2251, 2267, 2269, 2273, 2281, 2287, 2293, 2297, + 2309, 2311, 2333, 2339, 2341, 2347, 2351, 2357, + 2371, 2377, 2381, 2383, 2389, 2393, 2399, 2411, + 2417, 2423, 2437, 2441, 2447, 2459, 2467, 2473, + 2477, 2503, 2521, 2531, 2539, 2543, 2549, 2551, + 2557, 2579, 2591, 2593, 2609, 2617, 2621, 2633, + 2647, 2657, 2659, 2663, 2671, 2677, 2683, 2687, + 2689, 2693, 2699, 2707, 2711, 2713, 2719, 2729, + 2731, 2741, 2749, 2753, 2767, 2777, 2789, 2791, + 2797, 2801, 2803, 2819, 2833, 2837, 2843, 2851, + 2857, 2861, 2879, 2887, 2897, 2903, 2909, 2917, + 2927, 2939, 2953, 2957, 2963, 2969, 2971, 2999, + 3001, 3011, 3019, 3023, 3037, 3041, 3049, 3061, + 3067, 3079, 3083, 3089, 3109, 3119, 3121, 3137, + 3163, 3167, 3169, 3181, 3187, 3191, 3203, 3209, + 3217, 3221, 3229, 3251, 3253, 3257, 3259, 3271, + 3299, 3301, 3307, 3313, 3319, 3323, 3329, 3331, + 3343, 3347, 3359, 3361, 3371, 3373, 3389, 3391, + 3407, 3413, 3433, 3449, 3457, 3461, 3463, 3467, + 3469, 3491, 3499, 3511, 3517, 3527, 3529, 3533, + 3539, 3541, 3547, 3557, 3559, 3571, 3581, 3583, + 3593, 3607, 3613, 3617, 3623, 3631, 3637, 3643, + 3659, 3671, 3673, 3677, 3691, 3697, 3701, 3709, + 3719, 3727, 3733, 3739, 3761, 3767, 3769, 3779, + 3793, 3797, 3803, 3821, 3823, 3833, 3847, 3851, + 3853, 3863, 3877, 3881, 3889, 3907, 3911, 3917, + 3919, 3923, 3929, 3931, 3943, 3947, 3967, 3989, + 4001, 4003, 4007, 4013, 4019, 4021, 4027, 4049, + 4051, 4057, 4073, 4079, 4091, 4093, 4099, 4111, + 4127, 4129, 4133, 4139, 4153, 4157, 4159, 4177, + 4201, 4211, 4217, 4219, 4229, 4231, 4241, 4243, + 4253, 4259, 4261, 4271, 4273, 4283, 4289, 4297, + 4327, 4337, 4339, 4349, 4357, 4363, 4373, 4391, + 4397, 4409, 4421, 4423, 4441, 4447, 4451, 4457, + 4463, 4481, 4483, 4493, 4507, 4513, 4517, 4519, + 4523, 4547, 4549, 4561, 4567, 4583, 4591, 4597, + 4603, 4621, 4637, 4639, 4643, 4649, 4651, 4657, + 4663, 4673, 4679, 4691, 4703, 4721, 4723, 4729, + 4733, 4751, 4759, 4783, 4787, 4789, 4793, 4799, + 4801, 4813, 4817, 4831, 4861, 4871, 4877, 4889, + 4903, 4909, 4919, 4931, 4933, 4937, 4943, 4951, + 4957, 4967, 4969, 4973, 4987, 4993, 4999, + 0 +}; +static int no_of_small_prime_numbers = DIM (small_prime_numbers) - 1; + + + +/* An object and a list to build up a global pool of primes. See + save_pool_prime and get_pool_prime. */ +struct primepool_s +{ + struct primepool_s *next; + gcry_mpi_t prime; /* If this is NULL the entry is not used. */ + unsigned int nbits; + gcry_random_level_t randomlevel; +}; +struct primepool_s *primepool; +/* Mutex used to protect access to the primepool. */ +GPGRT_LOCK_DEFINE (primepool_lock); + + +gcry_err_code_t +_gcry_primegen_init (void) +{ + /* This function was formerly used to initialize the primepool + Mutex. This has been replace by a static initialization. */ + return 0; +} + + +/* Save PRIME which has been generated at RANDOMLEVEL for later + use. Needs to be called while primepool_lock is being hold. Note + that PRIME should be considered released after calling this + function. */ +static void +save_pool_prime (gcry_mpi_t prime, gcry_random_level_t randomlevel) +{ + struct primepool_s *item, *item2; + size_t n; + + for (n=0, item = primepool; item; item = item->next, n++) + if (!item->prime) + break; + if (!item && n > 100) + { + /* Remove some of the entries. Our strategy is removing + the last third from the list. */ + int i; + + for (i=0, item2 = primepool; item2; item2 = item2->next) + { + if (i >= n/3*2) + { + _gcry_mpi_release (item2->prime); + item2->prime = NULL; + if (!item) + item = item2; + } + } + } + if (!item) + { + item = xtrycalloc (1, sizeof *item); + if (!item) + { + /* Out of memory. Silently giving up. */ + _gcry_mpi_release (prime); + return; + } + item->next = primepool; + primepool = item; + } + item->prime = prime; + item->nbits = mpi_get_nbits (prime); + item->randomlevel = randomlevel; +} + + +/* Return a prime for the prime pool or NULL if none has been found. + The prime needs to match NBITS and randomlevel. This function needs + to be called with the primepool_look is being hold. */ +static gcry_mpi_t +get_pool_prime (unsigned int nbits, gcry_random_level_t randomlevel) +{ + struct primepool_s *item; + + for (item = primepool; item; item = item->next) + if (item->prime + && item->nbits == nbits && item->randomlevel == randomlevel) + { + gcry_mpi_t prime = item->prime; + item->prime = NULL; + gcry_assert (nbits == mpi_get_nbits (prime)); + return prime; + } + return NULL; +} + + + + + + +void +_gcry_register_primegen_progress ( void (*cb)(void *,const char*,int,int,int), + void *cb_data ) +{ + progress_cb = cb; + progress_cb_data = cb_data; +} + + +static void +progress( int c ) +{ + if ( progress_cb ) + progress_cb ( progress_cb_data, "primegen", c, 0, 0 ); +} + + +/**************** + * Generate a prime number (stored in secure memory) + */ +gcry_mpi_t +_gcry_generate_secret_prime (unsigned int nbits, + gcry_random_level_t random_level, + int (*extra_check)(void*, gcry_mpi_t), + void *extra_check_arg) +{ + gcry_mpi_t prime; + + prime = gen_prime (nbits, 1, random_level, extra_check, extra_check_arg); + progress('\n'); + return prime; +} + + +/* Generate a prime number which may be public, i.e. not allocated in + secure memory. */ +gcry_mpi_t +_gcry_generate_public_prime (unsigned int nbits, + gcry_random_level_t random_level, + int (*extra_check)(void*, gcry_mpi_t), + void *extra_check_arg) +{ + gcry_mpi_t prime; + + prime = gen_prime (nbits, 0, random_level, extra_check, extra_check_arg); + progress('\n'); + return prime; +} + + +/* Core prime generation function. The algorithm used to generate + practically save primes is due to Lim and Lee as described in the + CRYPTO '97 proceedings (ISBN3540633847) page 260. + + NEED_Q_FACTOR: If true make sure that at least one factor is of + size qbits. This is for example required for DSA. + PRIME_GENERATED: Adresss of a variable where the resulting prime + number will be stored. + PBITS: Requested size of the prime number. At least 48. + QBITS: One factor of the prime needs to be of this size. Maybe 0 + if this is not required. See also MODE. + G: If not NULL an MPI which will receive a generator for the prime + for use with Elgamal. + RET_FACTORS: if not NULL, an array with all factors are stored at + that address. + ALL_FACTORS: If set to true all factors of prime-1 are returned. + RANDOMLEVEL: How strong should the random numers be. + FLAGS: Prime generation bit flags. Currently supported: + GCRY_PRIME_FLAG_SECRET - The prime needs to be kept secret. + CB_FUNC, CB_ARG: Callback to be used for extra checks. + + */ +static gcry_err_code_t +prime_generate_internal (int need_q_factor, + gcry_mpi_t *prime_generated, unsigned int pbits, + unsigned int qbits, gcry_mpi_t g, + gcry_mpi_t **ret_factors, + gcry_random_level_t randomlevel, unsigned int flags, + int all_factors, + gcry_prime_check_func_t cb_func, void *cb_arg) +{ + gcry_err_code_t err = 0; + gcry_mpi_t *factors_new = NULL; /* Factors to return to the + caller. */ + gcry_mpi_t *factors = NULL; /* Current factors. */ + gcry_random_level_t poolrandomlevel; /* Random level used for pool primes. */ + gcry_mpi_t *pool = NULL; /* Pool of primes. */ + int *pool_in_use = NULL; /* Array with currently used POOL elements. */ + unsigned char *perms = NULL; /* Permutations of POOL. */ + gcry_mpi_t q_factor = NULL; /* Used if QBITS is non-zero. */ + unsigned int fbits = 0; /* Length of prime factors. */ + unsigned int n = 0; /* Number of factors. */ + unsigned int m = 0; /* Number of primes in pool. */ + gcry_mpi_t q = NULL; /* First prime factor. */ + gcry_mpi_t prime = NULL; /* Prime candidate. */ + unsigned int nprime = 0; /* Bits of PRIME. */ + unsigned int req_qbits; /* The original QBITS value. */ + gcry_mpi_t val_2; /* For check_prime(). */ + int is_locked = 0; /* Flag to help unlocking the primepool. */ + unsigned int is_secret = (flags & GCRY_PRIME_FLAG_SECRET); + unsigned int count1 = 0, count2 = 0; + unsigned int i = 0, j = 0; + + if (pbits < 48) + return GPG_ERR_INV_ARG; + + /* We won't use a too strong random elvel for the pooled subprimes. */ + poolrandomlevel = (randomlevel > GCRY_STRONG_RANDOM? + GCRY_STRONG_RANDOM : randomlevel); + + + /* If QBITS is not given, assume a reasonable value. */ + if (!qbits) + qbits = pbits / 3; + + req_qbits = qbits; + + /* Find number of needed prime factors N. */ + for (n = 1; (pbits - qbits - 1) / n >= qbits; n++) + ; + n--; + + val_2 = mpi_alloc_set_ui (2); + + if ((! n) || ((need_q_factor) && (n < 2))) + { + err = GPG_ERR_INV_ARG; + goto leave; + } + + if (need_q_factor) + { + n--; /* Need one factor less because we want a specific Q-FACTOR. */ + fbits = (pbits - 2 * req_qbits -1) / n; + qbits = pbits - req_qbits - n * fbits; + } + else + { + fbits = (pbits - req_qbits -1) / n; + qbits = pbits - n * fbits; + } + + if (DBG_CIPHER) + log_debug ("gen prime: pbits=%u qbits=%u fbits=%u/%u n=%d\n", + pbits, req_qbits, qbits, fbits, n); + + /* Allocate an integer to old the new prime. */ + prime = mpi_new (pbits); + + /* Generate first prime factor. */ + q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL); + + /* Generate a specific Q-Factor if requested. */ + if (need_q_factor) + q_factor = gen_prime (req_qbits, is_secret, randomlevel, NULL, NULL); + + /* Allocate an array to hold all factors + 2 for later usage. */ + factors = xtrycalloc (n + 2, sizeof (*factors)); + if (!factors) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + /* Allocate an array to track pool usage. */ + pool_in_use = xtrymalloc (n * sizeof *pool_in_use); + if (!pool_in_use) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + for (i=0; i < n; i++) + pool_in_use[i] = -1; + + /* Make a pool of 3n+5 primes (this is an arbitrary value). We + require at least 30 primes for are useful selection process. + + Fixme: We need to research the best formula for sizing the pool. + */ + m = n * 3 + 5; + if (need_q_factor) /* Need some more in this case. */ + m += 5; + if (m < 30) + m = 30; + pool = xtrycalloc (m , sizeof (*pool)); + if (! pool) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + /* Permutate over the pool of primes until we find a prime of the + requested length. */ + do + { + next_try: + for (i=0; i < n; i++) + pool_in_use[i] = -1; + + if (!perms) + { + /* Allocate new primes. This is done right at the beginning + of the loop and if we have later run out of primes. */ + for (i = 0; i < m; i++) + { + mpi_free (pool[i]); + pool[i] = NULL; + } + + /* Init m_out_of_n(). */ + perms = xtrycalloc (1, m); + if (!perms) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + err = gpgrt_lock_lock (&primepool_lock); + if (err) + goto leave; + is_locked = 1; + + for (i = 0; i < n; i++) + { + perms[i] = 1; + /* At a maximum we use strong random for the factors. + This saves us a lot of entropy. Given that Q and + possible Q-factor are also used in the final prime + this should be acceptable. We also don't allocate in + secure memory to save on that scare resource too. If + Q has been allocated in secure memory, the final + prime will be saved there anyway. This is because + our MPI routines take care of that. GnuPG has worked + this way ever since. */ + pool[i] = NULL; + if (is_locked) + { + pool[i] = get_pool_prime (fbits, poolrandomlevel); + if (!pool[i]) + { + err = gpgrt_lock_unlock (&primepool_lock); + if (err) + goto leave; + is_locked = 0; + } + } + if (!pool[i]) + pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL); + pool_in_use[i] = i; + factors[i] = pool[i]; + } + + if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock))) + goto leave; + is_locked = 0; + } + else + { + /* Get next permutation. */ + m_out_of_n ( (char*)perms, n, m); + + if ((err = gpgrt_lock_lock (&primepool_lock))) + goto leave; + is_locked = 1; + + for (i = j = 0; (i < m) && (j < n); i++) + if (perms[i]) + { + /* If the subprime has not yet beed generated do it now. */ + if (!pool[i] && is_locked) + { + pool[i] = get_pool_prime (fbits, poolrandomlevel); + if (!pool[i]) + { + if ((err = gpgrt_lock_unlock (&primepool_lock))) + goto leave; + is_locked = 0; + } + } + if (!pool[i]) + pool[i] = gen_prime (fbits, 0, poolrandomlevel, NULL, NULL); + pool_in_use[j] = i; + factors[j++] = pool[i]; + } + + if (is_locked && (err = gpgrt_lock_unlock (&primepool_lock))) + goto leave; + is_locked = 0; + + if (i == n) + { + /* Ran out of permutations: Allocate new primes. */ + xfree (perms); + perms = NULL; + progress ('!'); + goto next_try; + } + } + + /* Generate next prime candidate: + p = 2 * q [ * q_factor] * factor_0 * factor_1 * ... * factor_n + 1. + */ + mpi_set (prime, q); + mpi_mul_ui (prime, prime, 2); + if (need_q_factor) + mpi_mul (prime, prime, q_factor); + for(i = 0; i < n; i++) + mpi_mul (prime, prime, factors[i]); + mpi_add_ui (prime, prime, 1); + nprime = mpi_get_nbits (prime); + + if (nprime < pbits) + { + if (++count1 > 20) + { + count1 = 0; + qbits++; + progress('>'); + mpi_free (q); + q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL); + goto next_try; + } + } + else + count1 = 0; + + if (nprime > pbits) + { + if (++count2 > 20) + { + count2 = 0; + qbits--; + progress('<'); + mpi_free (q); + q = gen_prime (qbits, is_secret, randomlevel, NULL, NULL); + goto next_try; + } + } + else + count2 = 0; + } + while (! ((nprime == pbits) && check_prime (prime, val_2, 5, + cb_func, cb_arg))); + + if (DBG_CIPHER) + { + progress ('\n'); + log_mpidump ("prime ", prime); + log_mpidump ("factor q", q); + if (need_q_factor) + log_mpidump ("factor q0", q_factor); + for (i = 0; i < n; i++) + log_mpidump ("factor pi", factors[i]); + log_debug ("bit sizes: prime=%u, q=%u", + mpi_get_nbits (prime), mpi_get_nbits (q)); + if (need_q_factor) + log_printf (", q0=%u", mpi_get_nbits (q_factor)); + for (i = 0; i < n; i++) + log_printf (", p%d=%u", i, mpi_get_nbits (factors[i])); + log_printf ("\n"); + } + + if (ret_factors) + { + /* Caller wants the factors. */ + factors_new = xtrycalloc (n + 4, sizeof (*factors_new)); + if (! factors_new) + { + err = gpg_err_code_from_errno (errno); + goto leave; + } + + if (all_factors) + { + i = 0; + factors_new[i++] = mpi_set_ui (NULL, 2); + factors_new[i++] = mpi_copy (q); + if (need_q_factor) + factors_new[i++] = mpi_copy (q_factor); + for(j=0; j < n; j++) + factors_new[i++] = mpi_copy (factors[j]); + } + else + { + i = 0; + if (need_q_factor) + { + factors_new[i++] = mpi_copy (q_factor); + for (; i <= n; i++) + factors_new[i] = mpi_copy (factors[i]); + } + else + for (; i < n; i++ ) + factors_new[i] = mpi_copy (factors[i]); + } + } + + if (g && need_q_factor) + err = GPG_ERR_NOT_IMPLEMENTED; + else if (g) + { + /* Create a generator (start with 3). */ + gcry_mpi_t tmp = mpi_alloc (mpi_get_nlimbs (prime)); + gcry_mpi_t b = mpi_alloc (mpi_get_nlimbs (prime)); + gcry_mpi_t pmin1 = mpi_alloc (mpi_get_nlimbs (prime)); + + factors[n] = q; + factors[n + 1] = mpi_alloc_set_ui (2); + mpi_sub_ui (pmin1, prime, 1); + mpi_set_ui (g, 2); + do + { + mpi_add_ui (g, g, 1); + if (DBG_CIPHER) + log_printmpi ("checking g", g); + else + progress('^'); + for (i = 0; i < n + 2; i++) + { + mpi_fdiv_q (tmp, pmin1, factors[i]); + /* No mpi_pow(), but it is okay to use this with mod + prime. */ + mpi_powm (b, g, tmp, prime); + if (! mpi_cmp_ui (b, 1)) + break; + } + if (DBG_CIPHER) + progress('\n'); + } + while (i < n + 2); + + mpi_free (factors[n+1]); + mpi_free (tmp); + mpi_free (b); + mpi_free (pmin1); + } + + if (! DBG_CIPHER) + progress ('\n'); + + + leave: + if (pool) + { + is_locked = !gpgrt_lock_lock (&primepool_lock); + for(i = 0; i < m; i++) + { + if (pool[i]) + { + for (j=0; j < n; j++) + if (pool_in_use[j] == i) + break; + if (j == n && is_locked) + { + /* This pooled subprime has not been used. */ + save_pool_prime (pool[i], poolrandomlevel); + } + else + mpi_free (pool[i]); + } + } + if (is_locked) + err = gpgrt_lock_unlock (&primepool_lock); + is_locked = 0; + xfree (pool); + } + xfree (pool_in_use); + if (factors) + xfree (factors); /* Factors are shallow copies. */ + if (perms) + xfree (perms); + + mpi_free (val_2); + mpi_free (q); + mpi_free (q_factor); + + if (! err) + { + *prime_generated = prime; + if (ret_factors) + *ret_factors = factors_new; + } + else + { + if (factors_new) + { + for (i = 0; factors_new[i]; i++) + mpi_free (factors_new[i]); + xfree (factors_new); + } + mpi_free (prime); + } + + return err; +} + + +/* Generate a prime used for discrete logarithm algorithms; i.e. this + prime will be public and no strong random is required. On success + R_PRIME receives a new MPI with the prime. On error R_PRIME is set + to NULL and an error code is returned. If RET_FACTORS is not NULL + it is set to an allocated array of factors on success or to NULL on + error. */ +gcry_err_code_t +_gcry_generate_elg_prime (int mode, unsigned pbits, unsigned qbits, + gcry_mpi_t g, + gcry_mpi_t *r_prime, gcry_mpi_t **ret_factors) +{ + *r_prime = NULL; + if (ret_factors) + *ret_factors = NULL; + return prime_generate_internal ((mode == 1), r_prime, pbits, qbits, g, + ret_factors, GCRY_WEAK_RANDOM, 0, 0, + NULL, NULL); +} + + +static gcry_mpi_t +gen_prime (unsigned int nbits, int secret, int randomlevel, + int (*extra_check)(void *, gcry_mpi_t), void *extra_check_arg) +{ + gcry_mpi_t prime, ptest, pminus1, val_2, val_3, result; + int i; + unsigned int x, step; + unsigned int count1, count2; + int *mods; + +/* if ( DBG_CIPHER ) */ +/* log_debug ("generate a prime of %u bits ", nbits ); */ + + if (nbits < 16) + log_fatal ("can't generate a prime with less than %d bits\n", 16); + + mods = (secret? xmalloc_secure (no_of_small_prime_numbers * sizeof *mods) + /* */ : xmalloc (no_of_small_prime_numbers * sizeof *mods)); + /* Make nbits fit into gcry_mpi_t implementation. */ + val_2 = mpi_alloc_set_ui( 2 ); + val_3 = mpi_alloc_set_ui( 3); + prime = secret? mpi_snew (nbits): mpi_new (nbits); + result = mpi_alloc_like( prime ); + pminus1= mpi_alloc_like( prime ); + ptest = mpi_alloc_like( prime ); + count1 = count2 = 0; + for (;;) + { /* try forvever */ + int dotcount=0; + + /* generate a random number */ + _gcry_mpi_randomize( prime, nbits, randomlevel ); + + /* Set high order bit to 1, set low order bit to 1. If we are + generating a secret prime we are most probably doing that + for RSA, to make sure that the modulus does have the + requested key size we set the 2 high order bits. */ + mpi_set_highbit (prime, nbits-1); + if (secret) + mpi_set_bit (prime, nbits-2); + mpi_set_bit(prime, 0); + + /* Calculate all remainders. */ + for (i=0; (x = small_prime_numbers[i]); i++ ) + mods[i] = mpi_fdiv_r_ui(NULL, prime, x); + + /* Now try some primes starting with prime. */ + for(step=0; step < 20000; step += 2 ) + { + /* Check against all the small primes we have in mods. */ + count1++; + for (i=0; (x = small_prime_numbers[i]); i++ ) + { + while ( mods[i] + step >= x ) + mods[i] -= x; + if ( !(mods[i] + step) ) + break; + } + if ( x ) + continue; /* Found a multiple of an already known prime. */ + + mpi_add_ui( ptest, prime, step ); + + /* Do a fast Fermat test now. */ + count2++; + mpi_sub_ui( pminus1, ptest, 1); + mpi_powm( result, val_2, pminus1, ptest ); + if ( !mpi_cmp_ui( result, 1 ) ) + { + /* Not composite, perform stronger tests */ + if (is_prime(ptest, 5, &count2 )) + { + if (!mpi_test_bit( ptest, nbits-1-secret )) + { + progress('\n'); + log_debug ("overflow in prime generation\n"); + break; /* Stop loop, continue with a new prime. */ + } + + if (extra_check && extra_check (extra_check_arg, ptest)) + { + /* The extra check told us that this prime is + not of the caller's taste. */ + progress ('/'); + } + else + { + /* Got it. */ + mpi_free(val_2); + mpi_free(val_3); + mpi_free(result); + mpi_free(pminus1); + mpi_free(prime); + xfree(mods); + return ptest; + } + } + } + if (++dotcount == 10 ) + { + progress('.'); + dotcount = 0; + } + } + progress(':'); /* restart with a new random value */ + } +} + +/**************** + * Returns: true if this may be a prime + * RM_ROUNDS gives the number of Rabin-Miller tests to run. + */ +static int +check_prime( gcry_mpi_t prime, gcry_mpi_t val_2, int rm_rounds, + gcry_prime_check_func_t cb_func, void *cb_arg) +{ + int i; + unsigned int x; + unsigned int count=0; + + /* Check against small primes. */ + for (i=0; (x = small_prime_numbers[i]); i++ ) + { + if ( mpi_divisible_ui( prime, x ) ) + return !mpi_cmp_ui (prime, x); + } + + /* A quick Fermat test. */ + { + gcry_mpi_t result = mpi_alloc_like( prime ); + gcry_mpi_t pminus1 = mpi_alloc_like( prime ); + mpi_sub_ui( pminus1, prime, 1); + mpi_powm( result, val_2, pminus1, prime ); + mpi_free( pminus1 ); + if ( mpi_cmp_ui( result, 1 ) ) + { + /* Is composite. */ + mpi_free( result ); + progress('.'); + return 0; + } + mpi_free( result ); + } + + if (!cb_func || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_MAYBE_PRIME, prime)) + { + /* Perform stronger tests. */ + if ( is_prime( prime, rm_rounds, &count ) ) + { + if (!cb_func + || cb_func (cb_arg, GCRY_PRIME_CHECK_AT_GOT_PRIME, prime)) + return 1; /* Probably a prime. */ + } + } + progress('.'); + return 0; +} + + +/* + * Return true if n is probably a prime + */ +static int +is_prime (gcry_mpi_t n, int steps, unsigned int *count) +{ + gcry_mpi_t x = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t y = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t z = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t nminus1 = mpi_alloc( mpi_get_nlimbs( n ) ); + gcry_mpi_t a2 = mpi_alloc_set_ui( 2 ); + gcry_mpi_t q; + unsigned i, j, k; + int rc = 0; + unsigned nbits = mpi_get_nbits( n ); + + if (steps < 5) /* Make sure that we do at least 5 rounds. */ + steps = 5; + + mpi_sub_ui( nminus1, n, 1 ); + + /* Find q and k, so that n = 1 + 2^k * q . */ + q = mpi_copy ( nminus1 ); + k = mpi_trailing_zeros ( q ); + mpi_tdiv_q_2exp (q, q, k); + + for (i=0 ; i < steps; i++ ) + { + ++*count; + if( !i ) + { + mpi_set_ui( x, 2 ); + } + else + { + /* We need to loop to avoid an X with value 0 or 1. */ + do + { + _gcry_mpi_randomize (x, nbits, GCRY_WEAK_RANDOM); + + /* Make sure that the number is smaller than the prime + * and keep the randomness of the high bit. */ + if (mpi_test_bit (x, nbits-2)) + { + mpi_set_highbit (x, nbits-2); /* Clear all higher bits. */ + } + else + { + mpi_set_highbit (x, nbits-2); + mpi_clear_bit (x, nbits-2); + } + } + while (mpi_cmp_ui (x, 1) <= 0); + gcry_assert (mpi_cmp (x, nminus1) < 0); + } + mpi_powm ( y, x, q, n); + if ( mpi_cmp_ui(y, 1) && mpi_cmp( y, nminus1 ) ) + { + for ( j=1; j < k && mpi_cmp( y, nminus1 ); j++ ) + { + mpi_powm(y, y, a2, n); + if( !mpi_cmp_ui( y, 1 ) ) + goto leave; /* Not a prime. */ + } + if (mpi_cmp( y, nminus1 ) ) + goto leave; /* Not a prime. */ + } + progress('+'); + } + rc = 1; /* May be a prime. */ + + leave: + mpi_free( x ); + mpi_free( y ); + mpi_free( z ); + mpi_free( nminus1 ); + mpi_free( q ); + mpi_free( a2 ); + + return rc; +} + + +/* Given ARRAY of size N with M elements set to true produce a + modified array with the next permutation of M elements. Note, that + ARRAY is used in a one-bit-per-byte approach. To detected the last + permutation it is useful to initialize the array with the first M + element set to true and use this test: + m_out_of_n (array, m, n); + for (i = j = 0; i < n && j < m; i++) + if (array[i]) + j++; + if (j == m) + goto ready; + + This code is based on the algorithm 452 from the "Collected + Algorithms From ACM, Volume II" by C. N. Liu and D. T. Tang. +*/ +static void +m_out_of_n ( char *array, int m, int n ) +{ + int i=0, i1=0, j=0, jp=0, j1=0, k1=0, k2=0; + + if( !m || m >= n ) + return; + + /* Need to handle this simple case separately. */ + if( m == 1 ) + { + for (i=0; i < n; i++ ) + { + if ( array[i] ) + { + array[i++] = 0; + if( i >= n ) + i = 0; + array[i] = 1; + return; + } + } + BUG(); + } + + + for (j=1; j < n; j++ ) + { + if ( array[n-1] == array[n-j-1]) + continue; + j1 = j; + break; + } + + if ( (m & 1) ) + { + /* M is odd. */ + if( array[n-1] ) + { + if( j1 & 1 ) + { + k1 = n - j1; + k2 = k1+2; + if( k2 > n ) + k2 = n; + goto leave; + } + goto scan; + } + k2 = n - j1 - 1; + if( k2 == 0 ) + { + k1 = i; + k2 = n - j1; + } + else if( array[k2] && array[k2-1] ) + k1 = n; + else + k1 = k2 + 1; + } + else + { + /* M is even. */ + if( !array[n-1] ) + { + k1 = n - j1; + k2 = k1 + 1; + goto leave; + } + + if( !(j1 & 1) ) + { + k1 = n - j1; + k2 = k1+2; + if( k2 > n ) + k2 = n; + goto leave; + } + scan: + jp = n - j1 - 1; + for (i=1; i <= jp; i++ ) + { + i1 = jp + 2 - i; + if( array[i1-1] ) + { + if( array[i1-2] ) + { + k1 = i1 - 1; + k2 = n - j1; + } + else + { + k1 = i1 - 1; + k2 = n + 1 - j1; + } + goto leave; + } + } + k1 = 1; + k2 = n + 1 - m; + } + leave: + /* Now complement the two selected bits. */ + array[k1-1] = !array[k1-1]; + array[k2-1] = !array[k2-1]; +} + + +/* Generate a new prime number of PRIME_BITS bits and store it in + PRIME. If FACTOR_BITS is non-zero, one of the prime factors of + (prime - 1) / 2 must be FACTOR_BITS bits long. If FACTORS is + non-zero, allocate a new, NULL-terminated array holding the prime + factors and store it in FACTORS. FLAGS might be used to influence + the prime number generation process. */ +gcry_err_code_t +_gcry_prime_generate (gcry_mpi_t *prime, unsigned int prime_bits, + unsigned int factor_bits, gcry_mpi_t **factors, + gcry_prime_check_func_t cb_func, void *cb_arg, + gcry_random_level_t random_level, + unsigned int flags) +{ + gcry_err_code_t rc = 0; + gcry_mpi_t *factors_generated = NULL; + gcry_mpi_t prime_generated = NULL; + unsigned int mode = 0; + + if (!prime) + return GPG_ERR_INV_ARG; + *prime = NULL; + + if (flags & GCRY_PRIME_FLAG_SPECIAL_FACTOR) + mode = 1; + + /* Generate. */ + rc = prime_generate_internal ((mode==1), &prime_generated, prime_bits, + factor_bits, NULL, + factors? &factors_generated : NULL, + random_level, flags, 1, + cb_func, cb_arg); + + if (!rc && cb_func) + { + /* Additional check. */ + if ( !cb_func (cb_arg, GCRY_PRIME_CHECK_AT_FINISH, prime_generated)) + { + /* Failed, deallocate resources. */ + unsigned int i; + + mpi_free (prime_generated); + if (factors) + { + for (i = 0; factors_generated[i]; i++) + mpi_free (factors_generated[i]); + xfree (factors_generated); + } + rc = GPG_ERR_GENERAL; + } + } + + if (!rc) + { + if (factors) + *factors = factors_generated; + *prime = prime_generated; + } + + return rc; +} + +/* Check whether the number X is prime. */ +gcry_err_code_t +_gcry_prime_check (gcry_mpi_t x, unsigned int flags) +{ + (void)flags; + + switch (mpi_cmp_ui (x, 2)) + { + case 0: return 0; /* 2 is a prime */ + case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes. */ + } + + /* We use 64 rounds because the prime we are going to test is not + guaranteed to be a random one. */ + if (check_prime (x, mpi_const (MPI_C_TWO), 64, NULL, NULL)) + return 0; + + return GPG_ERR_NO_PRIME; +} + + +/* Check whether the number X is prime according to FIPS 186-4 table C.2. */ +gcry_err_code_t +_gcry_fips186_4_prime_check (gcry_mpi_t x, unsigned int bits) +{ + gcry_err_code_t ec = GPG_ERR_NO_ERROR; + + switch (mpi_cmp_ui (x, 2)) + { + case 0: return ec; /* 2 is a prime */ + case -1: return GPG_ERR_NO_PRIME; /* Only numbers > 1 are primes. */ + } + + /* We use 5 or 4 rounds as specified in table C.2 */ + if (! check_prime (x, mpi_const (MPI_C_TWO), bits > 1024 ? 4 : 5, NULL, NULL)) + ec = GPG_ERR_NO_PRIME; + + return ec; +} + + +/* Find a generator for PRIME where the factorization of (prime-1) is + in the NULL terminated array FACTORS. Return the generator as a + newly allocated MPI in R_G. If START_G is not NULL, use this as s + atart for the search. Returns 0 on success.*/ +gcry_err_code_t +_gcry_prime_group_generator (gcry_mpi_t *r_g, + gcry_mpi_t prime, gcry_mpi_t *factors, + gcry_mpi_t start_g) +{ + gcry_mpi_t tmp, b, pmin1, g; + int first, i, n; + + if (!r_g) + return GPG_ERR_INV_ARG; + *r_g = NULL; + if (!factors || !prime) + return GPG_ERR_INV_ARG; + + for (n=0; factors[n]; n++) + ; + if (n < 2) + return GPG_ERR_INV_ARG; + + tmp = mpi_new (0); + b = mpi_new (0); + pmin1 = mpi_new (0); + g = start_g? mpi_copy (start_g) : mpi_set_ui (NULL, 3); + + /* Extra sanity check - usually disabled. */ +/* mpi_set (tmp, factors[0]); */ +/* for(i = 1; i < n; i++) */ +/* mpi_mul (tmp, tmp, factors[i]); */ +/* mpi_add_ui (tmp, tmp, 1); */ +/* if (mpi_cmp (prime, tmp)) */ +/* return gpg_error (GPG_ERR_INV_ARG); */ + + mpi_sub_ui (pmin1, prime, 1); + first = 1; + do + { + if (first) + first = 0; + else + mpi_add_ui (g, g, 1); + + if (DBG_CIPHER) + log_printmpi ("checking g", g); + else + progress('^'); + + for (i = 0; i < n; i++) + { + mpi_fdiv_q (tmp, pmin1, factors[i]); + mpi_powm (b, g, tmp, prime); + if (! mpi_cmp_ui (b, 1)) + break; + } + if (DBG_CIPHER) + progress('\n'); + } + while (i < n); + + _gcry_mpi_release (tmp); + _gcry_mpi_release (b); + _gcry_mpi_release (pmin1); + *r_g = g; + + return 0; +} + +/* Convenience function to release the factors array. */ +void +_gcry_prime_release_factors (gcry_mpi_t *factors) +{ + if (factors) + { + int i; + + for (i=0; factors[i]; i++) + mpi_free (factors[i]); + xfree (factors); + } +} + + + +/* Helper for _gcry_derive_x931_prime. */ +static gcry_mpi_t +find_x931_prime (const gcry_mpi_t pfirst) +{ + gcry_mpi_t val_2 = mpi_alloc_set_ui (2); + gcry_mpi_t prime; + + prime = mpi_copy (pfirst); + /* If P is even add 1. */ + mpi_set_bit (prime, 0); + + /* We use 64 Rabin-Miller rounds which is better and thus + sufficient. We do not have a Lucas test implementation thus we + can't do it in the X9.31 preferred way of running a few + Rabin-Miller followed by one Lucas test. */ + while ( !check_prime (prime, val_2, 64, NULL, NULL) ) + mpi_add_ui (prime, prime, 2); + + mpi_free (val_2); + + return prime; +} + + +/* Generate a prime using the algorithm from X9.31 appendix B.4. + + This function requires that the provided public exponent E is odd. + XP, XP1 and XP2 are the seed values. All values are mandatory. + + On success the prime is returned. If R_P1 or R_P2 are given the + internal values P1 and P2 are saved at these addresses. On error + NULL is returned. */ +gcry_mpi_t +_gcry_derive_x931_prime (const gcry_mpi_t xp, + const gcry_mpi_t xp1, const gcry_mpi_t xp2, + const gcry_mpi_t e, + gcry_mpi_t *r_p1, gcry_mpi_t *r_p2) +{ + gcry_mpi_t p1, p2, p1p2, yp0; + + if (!xp || !xp1 || !xp2) + return NULL; + if (!e || !mpi_test_bit (e, 0)) + return NULL; /* We support only odd values for E. */ + + p1 = find_x931_prime (xp1); + p2 = find_x931_prime (xp2); + p1p2 = mpi_alloc_like (xp); + mpi_mul (p1p2, p1, p2); + + { + gcry_mpi_t r1, tmp; + + /* r1 = (p2^{-1} mod p1)p2 - (p1^{-1} mod p2) */ + tmp = mpi_alloc_like (p1); + mpi_invm (tmp, p2, p1); + mpi_mul (tmp, tmp, p2); + r1 = tmp; + + tmp = mpi_alloc_like (p2); + mpi_invm (tmp, p1, p2); + mpi_mul (tmp, tmp, p1); + mpi_sub (r1, r1, tmp); + + /* Fixup a negative value. */ + if (mpi_has_sign (r1)) + mpi_add (r1, r1, p1p2); + + /* yp0 = xp + (r1 - xp mod p1*p2) */ + yp0 = tmp; tmp = NULL; + mpi_subm (yp0, r1, xp, p1p2); + mpi_add (yp0, yp0, xp); + mpi_free (r1); + + /* Fixup a negative value. */ + if (mpi_cmp (yp0, xp) < 0 ) + mpi_add (yp0, yp0, p1p2); + } + + /* yp0 is now the first integer greater than xp with p1 being a + large prime factor of yp0-1 and p2 a large prime factor of yp0+1. */ + + /* Note that the first example from X9.31 (D.1.1) which uses + (Xq1 #1A5CF72EE770DE50CB09ACCEA9#) + (Xq2 #134E4CAA16D2350A21D775C404#) + (Xq #CC1092495D867E64065DEE3E7955F2EBC7D47A2D + 7C9953388F97DDDC3E1CA19C35CA659EDC2FC325 + 6D29C2627479C086A699A49C4C9CEE7EF7BD1B34 + 321DE34A#)))) + returns an yp0 of + #CC1092495D867E64065DEE3E7955F2EBC7D47A2D + 7C9953388F97DDDC3E1CA19C35CA659EDC2FC4E3 + BF20CB896EE37E098A906313271422162CB6C642 + 75C1201F# + and not + #CC1092495D867E64065DEE3E7955F2EBC7D47A2D + 7C9953388F97DDDC3E1CA19C35CA659EDC2FC2E6 + C88FE299D52D78BE405A97E01FD71DD7819ECB91 + FA85A076# + as stated in the standard. This seems to be a bug in X9.31. + */ + + { + gcry_mpi_t val_2 = mpi_alloc_set_ui (2); + gcry_mpi_t gcdtmp = mpi_alloc_like (yp0); + int gcdres; + + mpi_sub_ui (p1p2, p1p2, 1); /* Adjust for loop body. */ + mpi_sub_ui (yp0, yp0, 1); /* Ditto. */ + for (;;) + { + gcdres = mpi_gcd (gcdtmp, e, yp0); + mpi_add_ui (yp0, yp0, 1); + if (!gcdres) + progress ('/'); /* gcd (e, yp0-1) != 1 */ + else if (check_prime (yp0, val_2, 64, NULL, NULL)) + break; /* Found. */ + /* We add p1p2-1 because yp0 is incremented after the gcd test. */ + mpi_add (yp0, yp0, p1p2); + } + mpi_free (gcdtmp); + mpi_free (val_2); + } + + mpi_free (p1p2); + + progress('\n'); + if (r_p1) + *r_p1 = p1; + else + mpi_free (p1); + if (r_p2) + *r_p2 = p2; + else + mpi_free (p2); + return yp0; +} + + + +/* Generate the two prime used for DSA using the algorithm specified + in FIPS 186-2. PBITS is the desired length of the prime P and a + QBITS the length of the prime Q. If SEED is not supplied and + SEEDLEN is 0 the function generates an appropriate SEED. On + success the generated primes are stored at R_Q and R_P, the counter + value is stored at R_COUNTER and the seed actually used for + generation is stored at R_SEED and R_SEEDVALUE. */ +gpg_err_code_t +_gcry_generate_fips186_2_prime (unsigned int pbits, unsigned int qbits, + const void *seed, size_t seedlen, + gcry_mpi_t *r_q, gcry_mpi_t *r_p, + int *r_counter, + void **r_seed, size_t *r_seedlen) +{ + gpg_err_code_t ec; + unsigned char seed_help_buffer[160/8]; /* Used to hold a generated SEED. */ + unsigned char *seed_plus; /* Malloced buffer to hold SEED+x. */ + unsigned char digest[160/8]; /* Helper buffer for SHA-1 digest. */ + gcry_mpi_t val_2 = NULL; /* Helper for the prime test. */ + gcry_mpi_t tmpval = NULL; /* Helper variable. */ + int i; + + unsigned char value_u[160/8]; + int value_n, value_b, value_k; + int counter; + gcry_mpi_t value_w = NULL; + gcry_mpi_t value_x = NULL; + gcry_mpi_t prime_q = NULL; + gcry_mpi_t prime_p = NULL; + + /* FIPS 186-2 allows only for 1024/160 bit. */ + if (pbits != 1024 || qbits != 160) + return GPG_ERR_INV_KEYLEN; + + if (!seed && !seedlen) + ; /* No seed value given: We are asked to generate it. */ + else if (!seed || seedlen < qbits/8) + return GPG_ERR_INV_ARG; + + /* Allocate a buffer to later compute SEED+some_increment. */ + seed_plus = xtrymalloc (seedlen < 20? 20:seedlen); + if (!seed_plus) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + + val_2 = mpi_alloc_set_ui (2); + value_n = (pbits - 1) / qbits; + value_b = (pbits - 1) - value_n * qbits; + value_w = mpi_new (pbits); + value_x = mpi_new (pbits); + + restart: + /* Generate Q. */ + for (;;) + { + /* Step 1: Generate a (new) seed unless one has been supplied. */ + if (!seed) + { + seedlen = sizeof seed_help_buffer; + _gcry_create_nonce (seed_help_buffer, seedlen); + seed = seed_help_buffer; + } + + /* Step 2: U = sha1(seed) ^ sha1((seed+1) mod 2^{qbits}) */ + memcpy (seed_plus, seed, seedlen); + for (i=seedlen-1; i >= 0; i--) + { + seed_plus[i]++; + if (seed_plus[i]) + break; + } + _gcry_md_hash_buffer (GCRY_MD_SHA1, value_u, seed, seedlen); + _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen); + for (i=0; i < sizeof value_u; i++) + value_u[i] ^= digest[i]; + + /* Step 3: Form q from U */ + _gcry_mpi_release (prime_q); prime_q = NULL; + ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG, + value_u, sizeof value_u, NULL); + if (ec) + goto leave; + mpi_set_highbit (prime_q, qbits-1 ); + mpi_set_bit (prime_q, 0); + + /* Step 4: Test whether Q is prime using 64 round of Rabin-Miller. */ + if (check_prime (prime_q, val_2, 64, NULL, NULL)) + break; /* Yes, Q is prime. */ + + /* Step 5. */ + seed = NULL; /* Force a new seed at Step 1. */ + } + + /* Step 6. Note that we do no use an explicit offset but increment + SEED_PLUS accordingly. SEED_PLUS is currently SEED+1. */ + counter = 0; + + /* Generate P. */ + prime_p = mpi_new (pbits); + for (;;) + { + /* Step 7: For k = 0,...n let + V_k = sha1(seed+offset+k) mod 2^{qbits} + Step 8: W = V_0 + V_1*2^160 + + ... + + V_{n-1}*2^{(n-1)*160} + + (V_{n} mod 2^b)*2^{n*160} + */ + mpi_set_ui (value_w, 0); + for (value_k=0; value_k <= value_n; value_k++) + { + /* There is no need to have an explicit offset variable: In + the first round we shall have an offset of 2, this is + achieved by using SEED_PLUS which is already at SEED+1, + thus we just need to increment it once again. The + requirement for the next round is to update offset by N, + which we implictly did at the end of this loop, and then + to add one; this one is the same as in the first round. */ + for (i=seedlen-1; i >= 0; i--) + { + seed_plus[i]++; + if (seed_plus[i]) + break; + } + _gcry_md_hash_buffer (GCRY_MD_SHA1, digest, seed_plus, seedlen); + + _gcry_mpi_release (tmpval); tmpval = NULL; + ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG, + digest, sizeof digest, NULL); + if (ec) + goto leave; + if (value_k == value_n) + mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */ + mpi_lshift (tmpval, tmpval, value_k*qbits); + mpi_add (value_w, value_w, tmpval); + } + + /* Step 8 continued: X = W + 2^{L-1} */ + mpi_set_ui (value_x, 0); + mpi_set_highbit (value_x, pbits-1); + mpi_add (value_x, value_x, value_w); + + /* Step 9: c = X mod 2q, p = X - (c - 1) */ + mpi_mul_2exp (tmpval, prime_q, 1); + mpi_mod (tmpval, value_x, tmpval); + mpi_sub_ui (tmpval, tmpval, 1); + mpi_sub (prime_p, value_x, tmpval); + + /* Step 10: If p < 2^{L-1} skip the primality test. */ + /* Step 11 and 12: Primality test. */ + if (mpi_get_nbits (prime_p) >= pbits-1 + && check_prime (prime_p, val_2, 64, NULL, NULL) ) + break; /* Yes, P is prime, continue with Step 15. */ + + /* Step 13: counter = counter + 1, offset = offset + n + 1. */ + counter++; + + /* Step 14: If counter >= 2^12 goto Step 1. */ + if (counter >= 4096) + goto restart; + } + + /* Step 15: Save p, q, counter and seed. */ +/* log_debug ("fips186-2 pbits p=%u q=%u counter=%d\n", */ +/* mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */ +/* log_printhex("fips186-2 seed:", seed, seedlen); */ +/* log_mpidump ("fips186-2 prime p", prime_p); */ +/* log_mpidump ("fips186-2 prime q", prime_q); */ + if (r_q) + { + *r_q = prime_q; + prime_q = NULL; + } + if (r_p) + { + *r_p = prime_p; + prime_p = NULL; + } + if (r_counter) + *r_counter = counter; + if (r_seed && r_seedlen) + { + memcpy (seed_plus, seed, seedlen); + *r_seed = seed_plus; + seed_plus = NULL; + *r_seedlen = seedlen; + } + + + leave: + _gcry_mpi_release (tmpval); + _gcry_mpi_release (value_x); + _gcry_mpi_release (value_w); + _gcry_mpi_release (prime_p); + _gcry_mpi_release (prime_q); + xfree (seed_plus); + _gcry_mpi_release (val_2); + return ec; +} + + + +/* WARNING: The code below has not yet been tested! + * + * Generate the two prime used for DSA using the algorithm specified + * in FIPS 186-3, A.1.1.2. PBITS is the desired length of the prime P + * and a QBITS the length of the prime Q. If SEED is not supplied and + * SEEDLEN is 0 the function generates an appropriate SEED. On + * success the generated primes are stored at R_Q and R_P, the counter + * value is stored at R_COUNTER and the seed actually used for + * generation is stored at R_SEED and R_SEEDVALUE. The hash algorithm + * used is stored at R_HASHALGO. + * + * Note that this function is very similar to the fips186_2 code. Due + * to the minor differences, other buffer sizes and for documentarion, + * we use a separate function. + */ +gpg_err_code_t +_gcry_generate_fips186_3_prime (unsigned int pbits, unsigned int qbits, + const void *seed, size_t seedlen, + gcry_mpi_t *r_q, gcry_mpi_t *r_p, + int *r_counter, + void **r_seed, size_t *r_seedlen, + int *r_hashalgo) +{ + gpg_err_code_t ec; + unsigned char seed_help_buffer[256/8]; /* Used to hold a generated SEED. */ + unsigned char *seed_plus; /* Malloced buffer to hold SEED+x. */ + unsigned char digest[256/8]; /* Helper buffer for SHA-2 digest. */ + gcry_mpi_t val_2 = NULL; /* Helper for the prime test. */ + gcry_mpi_t tmpval = NULL; /* Helper variable. */ + int hashalgo; /* The id of the Approved Hash Function. */ + int i; + + unsigned char value_u[256/8]; + int value_n, value_b, value_j; + int counter; + gcry_mpi_t value_w = NULL; + gcry_mpi_t value_x = NULL; + gcry_mpi_t prime_q = NULL; + gcry_mpi_t prime_p = NULL; + + gcry_assert (sizeof seed_help_buffer == sizeof digest + && sizeof seed_help_buffer == sizeof value_u); + + /* Step 1: Check the requested prime lengths. */ + /* Note that due to the size of our buffers QBITS is limited to 256. */ + if (pbits == 2048 && qbits == 224) + hashalgo = GCRY_MD_SHA224; + else if (pbits == 2048 && qbits == 256) + hashalgo = GCRY_MD_SHA256; + else if (pbits == 3072 && qbits == 256) + hashalgo = GCRY_MD_SHA256; + else + return GPG_ERR_INV_KEYLEN; + + /* Also check that the hash algorithm is available. */ + ec = _gcry_md_test_algo (hashalgo); + if (ec) + return ec; + gcry_assert (qbits/8 <= sizeof digest); + gcry_assert (_gcry_md_get_algo_dlen (hashalgo) == qbits/8); + + + /* Step 2: Check seedlen. */ + if (!seed && !seedlen) + ; /* No seed value given: We are asked to generate it. */ + else if (!seed || seedlen < qbits/8) + return GPG_ERR_INV_ARG; + + /* Allocate a buffer to later compute SEED+some_increment and a few + helper variables. */ + seed_plus = xtrymalloc (seedlen < sizeof seed_help_buffer? + sizeof seed_help_buffer : seedlen); + if (!seed_plus) + { + ec = gpg_err_code_from_syserror (); + goto leave; + } + val_2 = mpi_alloc_set_ui (2); + value_w = mpi_new (pbits); + value_x = mpi_new (pbits); + + /* Step 3: n = \lceil L / outlen \rceil - 1 */ + value_n = (pbits + qbits - 1) / qbits - 1; + /* Step 4: b = L - 1 - (n * outlen) */ + value_b = pbits - 1 - (value_n * qbits); + + restart: + /* Generate Q. */ + for (;;) + { + /* Step 5: Generate a (new) seed unless one has been supplied. */ + if (!seed) + { + seedlen = qbits/8; + gcry_assert (seedlen <= sizeof seed_help_buffer); + _gcry_create_nonce (seed_help_buffer, seedlen); + seed = seed_help_buffer; + } + + /* Step 6: U = hash(seed) */ + _gcry_md_hash_buffer (hashalgo, value_u, seed, seedlen); + + /* Step 7: q = 2^{N-1} + U + 1 - (U mod 2) */ + if ( !(value_u[qbits/8-1] & 0x01) ) + { + for (i=qbits/8-1; i >= 0; i--) + { + value_u[i]++; + if (value_u[i]) + break; + } + } + _gcry_mpi_release (prime_q); prime_q = NULL; + ec = _gcry_mpi_scan (&prime_q, GCRYMPI_FMT_USG, + value_u, qbits/8, NULL); + if (ec) + goto leave; + mpi_set_highbit (prime_q, qbits-1 ); + + /* Step 8: Test whether Q is prime using 64 round of Rabin-Miller. + According to table C.1 this is sufficient for all + supported prime sizes (i.e. up 3072/256). */ + if (check_prime (prime_q, val_2, 64, NULL, NULL)) + break; /* Yes, Q is prime. */ + + /* Step 8. */ + seed = NULL; /* Force a new seed at Step 5. */ + } + + /* Step 11. Note that we do no use an explicit offset but increment + SEED_PLUS accordingly. */ + memcpy (seed_plus, seed, seedlen); + counter = 0; + + /* Generate P. */ + prime_p = mpi_new (pbits); + for (;;) + { + /* Step 11.1: For j = 0,...n let + V_j = hash(seed+offset+j) + Step 11.2: W = V_0 + V_1*2^outlen + + ... + + V_{n-1}*2^{(n-1)*outlen} + + (V_{n} mod 2^b)*2^{n*outlen} + */ + mpi_set_ui (value_w, 0); + for (value_j=0; value_j <= value_n; value_j++) + { + /* There is no need to have an explicit offset variable: In + the first round we shall have an offset of 1 and a j of + 0. This is achieved by incrementing SEED_PLUS here. For + the next round offset is implicitly updated by using + SEED_PLUS again. */ + for (i=seedlen-1; i >= 0; i--) + { + seed_plus[i]++; + if (seed_plus[i]) + break; + } + _gcry_md_hash_buffer (hashalgo, digest, seed_plus, seedlen); + + _gcry_mpi_release (tmpval); tmpval = NULL; + ec = _gcry_mpi_scan (&tmpval, GCRYMPI_FMT_USG, + digest, qbits/8, NULL); + if (ec) + goto leave; + if (value_j == value_n) + mpi_clear_highbit (tmpval, value_b); /* (V_n mod 2^b) */ + mpi_lshift (tmpval, tmpval, value_j*qbits); + mpi_add (value_w, value_w, tmpval); + } + + /* Step 11.3: X = W + 2^{L-1} */ + mpi_set_ui (value_x, 0); + mpi_set_highbit (value_x, pbits-1); + mpi_add (value_x, value_x, value_w); + + /* Step 11.4: c = X mod 2q */ + mpi_mul_2exp (tmpval, prime_q, 1); + mpi_mod (tmpval, value_x, tmpval); + + /* Step 11.5: p = X - (c - 1) */ + mpi_sub_ui (tmpval, tmpval, 1); + mpi_sub (prime_p, value_x, tmpval); + + /* Step 11.6: If p < 2^{L-1} skip the primality test. */ + /* Step 11.7 and 11.8: Primality test. */ + if (mpi_get_nbits (prime_p) >= pbits-1 + && check_prime (prime_p, val_2, 64, NULL, NULL) ) + break; /* Yes, P is prime, continue with Step 15. */ + + /* Step 11.9: counter = counter + 1, offset = offset + n + 1. + If counter >= 4L goto Step 5. */ + counter++; + if (counter >= 4*pbits) + goto restart; + } + + /* Step 12: Save p, q, counter and seed. */ + /* log_debug ("fips186-3 pbits p=%u q=%u counter=%d\n", */ + /* mpi_get_nbits (prime_p), mpi_get_nbits (prime_q), counter); */ + /* log_printhex ("fips186-3 seed", seed, seedlen); */ + /* log_printmpi ("fips186-3 p", prime_p); */ + /* log_printmpi ("fips186-3 q", prime_q); */ + + if (r_q) + { + *r_q = prime_q; + prime_q = NULL; + } + if (r_p) + { + *r_p = prime_p; + prime_p = NULL; + } + if (r_counter) + *r_counter = counter; + if (r_seed && r_seedlen) + { + memcpy (seed_plus, seed, seedlen); + *r_seed = seed_plus; + seed_plus = NULL; + *r_seedlen = seedlen; + } + if (r_hashalgo) + *r_hashalgo = hashalgo; + + leave: + _gcry_mpi_release (tmpval); + _gcry_mpi_release (value_x); + _gcry_mpi_release (value_w); + _gcry_mpi_release (prime_p); + _gcry_mpi_release (prime_q); + xfree (seed_plus); + _gcry_mpi_release (val_2); + return ec; +} diff --git a/comm/third_party/libgcrypt/cipher/pubkey-internal.h b/comm/third_party/libgcrypt/cipher/pubkey-internal.h new file mode 100644 index 0000000000..d31e26f392 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/pubkey-internal.h @@ -0,0 +1,105 @@ +/* pubkey-internal.h - Internal defs for pubkey.c + * Copyright (C) 2013 g10 code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser general Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#ifndef GCRY_PUBKEY_INTERNAL_H +#define GCRY_PUBKEY_INTERNAL_H + +/*-- pubkey-util.c --*/ +gpg_err_code_t _gcry_pk_util_parse_flaglist (gcry_sexp_t list, + int *r_flags, + enum pk_encoding *r_encoding); +gpg_err_code_t _gcry_pk_util_get_nbits (gcry_sexp_t list, + unsigned int *r_nbits); +gpg_err_code_t _gcry_pk_util_get_rsa_use_e (gcry_sexp_t list, + unsigned long *r_e); +gpg_err_code_t _gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, + const char **algo_names, + gcry_sexp_t *r_parms, + int *r_eccflags); +gpg_err_code_t _gcry_pk_util_preparse_encval (gcry_sexp_t sexp, + const char **algo_names, + gcry_sexp_t *r_parms, + struct pk_encoding_ctx *ctx); +void _gcry_pk_util_init_encoding_ctx (struct pk_encoding_ctx *ctx, + enum pk_operation op, + unsigned int nbits); +void _gcry_pk_util_free_encoding_ctx (struct pk_encoding_ctx *ctx); +gcry_err_code_t _gcry_pk_util_data_to_mpi (gcry_sexp_t input, + gcry_mpi_t *ret_mpi, + struct pk_encoding_ctx *ctx); + + + +/*-- rsa-common.c --*/ +gpg_err_code_t +_gcry_rsa_pkcs1_encode_for_enc (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen, + const unsigned char *random_override, + size_t random_override_len); +gpg_err_code_t +_gcry_rsa_pkcs1_decode_for_enc (unsigned char **r_result, size_t *r_resultlen, + unsigned int nbits, gcry_mpi_t value); +gpg_err_code_t +_gcry_rsa_pkcs1_encode_raw_for_sig (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen); + +gpg_err_code_t +_gcry_rsa_pkcs1_encode_for_sig (gcry_mpi_t *r_result, unsigned int nbits, + const unsigned char *value, size_t valuelen, + int algo); +gpg_err_code_t +_gcry_rsa_oaep_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo, + const unsigned char *value, size_t valuelen, + const unsigned char *label, size_t labellen, + const void *random_override, size_t random_override_len); +gpg_err_code_t +_gcry_rsa_oaep_decode (unsigned char **r_result, size_t *r_resultlen, + unsigned int nbits, int algo, + gcry_mpi_t value, + const unsigned char *label, size_t labellen); +gpg_err_code_t +_gcry_rsa_pss_encode (gcry_mpi_t *r_result, unsigned int nbits, int algo, + const unsigned char *value, size_t valuelen, int saltlen, + const void *random_override, size_t random_override_len); +gpg_err_code_t +_gcry_rsa_pss_verify (gcry_mpi_t value, gcry_mpi_t encoded, + unsigned int nbits, int algo, size_t saltlen); + + + +/*-- dsa-common.c --*/ +void _gcry_dsa_modify_k (gcry_mpi_t k, gcry_mpi_t q, int qbits); +gcry_mpi_t _gcry_dsa_gen_k (gcry_mpi_t q, int security_level); +gpg_err_code_t _gcry_dsa_gen_rfc6979_k (gcry_mpi_t *r_k, + gcry_mpi_t dsa_q, gcry_mpi_t dsa_x, + const unsigned char *h1, + unsigned int h1len, + int halgo, + unsigned int extraloops); + +gpg_err_code_t _gcry_dsa_normalize_hash (gcry_mpi_t input, + gcry_mpi_t *out, + unsigned int qbits); + +/*-- ecc.c --*/ +gpg_err_code_t _gcry_pk_ecc_get_sexp (gcry_sexp_t *r_sexp, int mode, + mpi_ec_t ec); + + +#endif /*GCRY_PUBKEY_INTERNAL_H*/ diff --git a/comm/third_party/libgcrypt/cipher/pubkey-util.c b/comm/third_party/libgcrypt/cipher/pubkey-util.c new file mode 100644 index 0000000000..7ddef7dc31 --- /dev/null +++ b/comm/third_party/libgcrypt/cipher/pubkey-util.c @@ -0,0 +1,1160 @@ +/* pubkey-util.c - Supporting functions for all pubkey modules. + * Copyright (C) 1998, 1999, 2000, 2002, 2003, 2005, + * 2007, 2008, 2011 Free Software Foundation, Inc. + * Copyright (C) 2013, 2015 g10 Code GmbH + * + * This file is part of Libgcrypt. + * + * Libgcrypt is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of + * the License, or (at your option) any later version. + * + * Libgcrypt is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this program; if not, see . + */ + +#include +#include +#include +#include + +#include "g10lib.h" +#include "mpi.h" +#include "cipher.h" +#include "pubkey-internal.h" + + +/* Callback for the pubkey algorithm code to verify PSS signatures. + OPAQUE is the data provided by the actual caller. The meaning of + TMP depends on the actual algorithm (but there is only RSA); now + for RSA it is the output of running the public key function on the + input. */ +static int +pss_verify_cmp (void *opaque, gcry_mpi_t tmp) +{ + struct pk_encoding_ctx *ctx = opaque; + gcry_mpi_t hash = ctx->verify_arg; + + return _gcry_rsa_pss_verify (hash, tmp, ctx->nbits - 1, + ctx->hash_algo, ctx->saltlen); +} + + +/* Parser for a flag list. On return the encoding is stored at + R_ENCODING and the flags are stored at R_FLAGS. If any of them is + not needed, NULL may be passed. The function returns 0 on success + or an error code. */ +gpg_err_code_t +_gcry_pk_util_parse_flaglist (gcry_sexp_t list, + int *r_flags, enum pk_encoding *r_encoding) +{ + gpg_err_code_t rc = 0; + const char *s; + size_t n; + int i; + int encoding = PUBKEY_ENC_UNKNOWN; + int flags = 0; + int igninvflag = 0; + + for (i = list ? sexp_length (list)-1 : 0; i > 0; i--) + { + s = sexp_nth_data (list, i, &n); + if (!s) + continue; /* Not a data element. */ + + switch (n) + { + case 3: + if (!memcmp (s, "pss", 3) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_PSS; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "raw", 3) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_RAW_FLAG; /* Explicitly given. */ + } + else if (!memcmp (s, "sm2", 3)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_SM2 | PUBKEY_FLAG_RAW_FLAG; + } + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 4: + if (!memcmp (s, "comp", 4)) + flags |= PUBKEY_FLAG_COMP; + else if (!memcmp (s, "oaep", 4) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_OAEP; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "gost", 4)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_GOST; + } + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 5: + if (!memcmp (s, "eddsa", 5)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_EDDSA; + flags |= PUBKEY_FLAG_DJB_TWEAK; + } + else if (!memcmp (s, "pkcs1", 5) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_PKCS1; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "param", 5)) + flags |= PUBKEY_FLAG_PARAM; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 6: + if (!memcmp (s, "nocomp", 6)) + flags |= PUBKEY_FLAG_NOCOMP; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 7: + if (!memcmp (s, "rfc6979", 7)) + flags |= PUBKEY_FLAG_RFC6979; + else if (!memcmp (s, "noparam", 7)) + ; /* Ignore - it is the default. */ + else if (!memcmp (s, "prehash", 7)) + flags |= PUBKEY_FLAG_PREHASH; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 8: + if (!memcmp (s, "use-x931", 8)) + flags |= PUBKEY_FLAG_USE_X931; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 9: + if (!memcmp (s, "pkcs1-raw", 9) && encoding == PUBKEY_ENC_UNKNOWN) + { + encoding = PUBKEY_ENC_PKCS1_RAW; + flags |= PUBKEY_FLAG_FIXEDLEN; + } + else if (!memcmp (s, "djb-tweak", 9)) + { + encoding = PUBKEY_ENC_RAW; + flags |= PUBKEY_FLAG_DJB_TWEAK; + } + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 10: + if (!memcmp (s, "igninvflag", 10)) + igninvflag = 1; + else if (!memcmp (s, "no-keytest", 10)) + flags |= PUBKEY_FLAG_NO_KEYTEST; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 11: + if (!memcmp (s, "no-blinding", 11)) + flags |= PUBKEY_FLAG_NO_BLINDING; + else if (!memcmp (s, "use-fips186", 11)) + flags |= PUBKEY_FLAG_USE_FIPS186; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + case 13: + if (!memcmp (s, "use-fips186-2", 13)) + flags |= PUBKEY_FLAG_USE_FIPS186_2; + else if (!memcmp (s, "transient-key", 13)) + flags |= PUBKEY_FLAG_TRANSIENT_KEY; + else if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + + default: + if (!igninvflag) + rc = GPG_ERR_INV_FLAG; + break; + } + } + + if (r_flags) + *r_flags = flags; + if (r_encoding) + *r_encoding = encoding; + + return rc; +} + + +static int +get_hash_algo (const char *s, size_t n) +{ + static const struct { const char *name; int algo; } hashnames[] = { + { "sha1", GCRY_MD_SHA1 }, + { "md5", GCRY_MD_MD5 }, + { "sha256", GCRY_MD_SHA256 }, + { "ripemd160", GCRY_MD_RMD160 }, + { "rmd160", GCRY_MD_RMD160 }, + { "sha384", GCRY_MD_SHA384 }, + { "sha512", GCRY_MD_SHA512 }, + { "sha224", GCRY_MD_SHA224 }, + { "md2", GCRY_MD_MD2 }, + { "md4", GCRY_MD_MD4 }, + { "tiger", GCRY_MD_TIGER }, + { "haval", GCRY_MD_HAVAL }, + { "sha3-224", GCRY_MD_SHA3_224 }, + { "sha3-256", GCRY_MD_SHA3_256 }, + { "sha3-384", GCRY_MD_SHA3_384 }, + { "sha3-512", GCRY_MD_SHA3_512 }, + { "sm3", GCRY_MD_SM3 }, + { "shake128", GCRY_MD_SHAKE128 }, + { "shake256", GCRY_MD_SHAKE256 }, + { NULL, 0 } + }; + int algo; + int i; + + for (i=0; hashnames[i].name; i++) + { + if ( strlen (hashnames[i].name) == n + && !memcmp (hashnames[i].name, s, n)) + break; + } + if (hashnames[i].name) + algo = hashnames[i].algo; + else + { + /* In case of not listed or dynamically allocated hash + algorithm we fall back to this somewhat slower + method. Further, it also allows to use OIDs as + algorithm names. */ + char *tmpname; + + tmpname = xtrymalloc (n+1); + if (!tmpname) + algo = 0; /* Out of core - silently give up. */ + else + { + memcpy (tmpname, s, n); + tmpname[n] = 0; + algo = _gcry_md_map_name (tmpname); + xfree (tmpname); + } + } + return algo; +} + + +/* Get the "nbits" parameter from an s-expression of the format: + * + * (algo + * (parameter_name_1 ....) + * .... + * (parameter_name_n ....)) + * + * Example: + * + * (rsa + * (nbits 4:2048)) + * + * On success the value for nbits is stored at R_NBITS. If no nbits + * parameter is found, the function returns success and stores 0 at + * R_NBITS. For parsing errors the function returns an error code and + * stores 0 at R_NBITS. + */ +gpg_err_code_t +_gcry_pk_util_get_nbits (gcry_sexp_t list, unsigned int *r_nbits) +{ + char buf[50]; + const char *s; + size_t n; + + *r_nbits = 0; + + list = sexp_find_token (list, "nbits", 0); + if (!list) + return 0; /* No NBITS found. */ + + s = sexp_nth_data (list, 1, &n); + if (!s || n >= DIM (buf) - 1 ) + { + /* NBITS given without a cdr. */ + sexp_release (list); + return GPG_ERR_INV_OBJ; + } + memcpy (buf, s, n); + buf[n] = 0; + *r_nbits = (unsigned int)strtoul (buf, NULL, 0); + sexp_release (list); + return 0; +} + + +/* Get the optional "rsa-use-e" parameter from an s-expression of the + * format: + * + * (algo + * (parameter_name_1 ....) + * .... + * (parameter_name_n ....)) + * + * Example: + * + * (rsa + * (nbits 4:2048) + * (rsa-use-e 2:41)) + * + * On success the value for nbits is stored at R_E. If no rsa-use-e + * parameter is found, the function returns success and stores 65537 at + * R_E. For parsing errors the function returns an error code and + * stores 0 at R_E. + */ +gpg_err_code_t +_gcry_pk_util_get_rsa_use_e (gcry_sexp_t list, unsigned long *r_e) +{ + char buf[50]; + const char *s; + size_t n; + + *r_e = 0; + + list = sexp_find_token (list, "rsa-use-e", 0); + if (!list) + { + *r_e = 65537; /* Not given, use the value generated by old versions. */ + return 0; + } + + s = sexp_nth_data (list, 1, &n); + if (!s || n >= DIM (buf) - 1 ) + { + /* No value or value too large. */ + sexp_release (list); + return GPG_ERR_INV_OBJ; + } + memcpy (buf, s, n); + buf[n] = 0; + *r_e = strtoul (buf, NULL, 0); + sexp_release (list); + return 0; +} + + +/* Parse a "sig-val" s-expression and store the inner parameter list at + R_PARMS. ALGO_NAMES is used to verify that the algorithm in + "sig-val" is valid. Returns 0 on success and stores a new list at + R_PARMS which must be freed by the caller. On error R_PARMS is set + to NULL and an error code returned. If R_ECCFLAGS is not NULL flag + values are set into it; as of now they are only used with ecc + algorithms. */ +gpg_err_code_t +_gcry_pk_util_preparse_sigval (gcry_sexp_t s_sig, const char **algo_names, + gcry_sexp_t *r_parms, int *r_eccflags) +{ + gpg_err_code_t rc; + gcry_sexp_t l1 = NULL; + gcry_sexp_t l2 = NULL; + char *name = NULL; + int i; + + *r_parms = NULL; + if (r_eccflags) + *r_eccflags = 0; + + /* Extract the signature value. */ + l1 = sexp_find_token (s_sig, "sig-val", 0); + if (!l1) + { + rc = GPG_ERR_INV_OBJ; /* Does not contain a signature value object. */ + goto leave; + } + + l2 = sexp_nth (l1, 1); + if (!l2) + { + rc = GPG_ERR_NO_OBJ; /* No cadr for the sig object. */ + goto leave; + } + name = sexp_nth_string (l2, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + goto leave; + } + else if (!strcmp (name, "flags")) + { + /* Skip a "flags" parameter and look again for the algorithm + name. This is not used but here just for the sake of + consistent S-expressions we need to handle it. */ + sexp_release (l2); + l2 = sexp_nth (l1, 2); + if (!l2) + { + rc = GPG_ERR_INV_OBJ; + goto leave; + } + xfree (name); + name = sexp_nth_string (l2, 0); + if (!name) + { + rc = GPG_ERR_INV_OBJ; /* Invalid structure of object. */ + goto leave; + } + } + + for (i=0; algo_names[i]; i++) + if (!stricmp (name, algo_names[i])) + break; + if (!algo_names[i]) + { + rc = GPG_ERR_CONFLICT; /* "sig-val" uses an unexpected algo. */ + goto leave; + } + if (r_eccflags) + { + if (!strcmp (name, "eddsa")) + *r_eccflags = PUBKEY_FLAG_EDDSA; + if (!strcmp (name, "gost")) + *r_eccflags = PUBKEY_FLAG_GOST; + if (!strcmp (name, "sm2")) + *r_eccflags = PUBKEY_FLAG_SM2; + } + + *r_parms = l2; + l2 = NULL; + rc = 0; + + leave: + xfree (name); + sexp_release (l2); + sexp_release (l1); + return rc; +} + + +/* Parse a "enc-val" s-expression and store the inner parameter list + at R_PARMS. ALGO_NAMES is used to verify that the algorithm in + "enc-val" is valid. Returns 0 on success and stores a new list at + R_PARMS which must be freed by the caller. On error R_PARMS is set + to NULL and an error code returned. If R_ECCFLAGS is not NULL flag + values are set into it; as of now they are only used with ecc + algorithms. + + (enc-val + [(flags [raw, pkcs1, oaep, no-blinding])] + [(hash-algo )] + [(label